Clover coverage report - Clover results for XOM 1.2d1
Coverage timestamp: Wed Feb 8 2006 08:31:33 EST
file stats: LOC: 979   Methods: 27
NCLOC: 571   Classes: 1
 
 Source file Conditionals Statements Methods TOTAL
XIncluder.java 91.9% 95.3% 96.3% 94.2%
coverage coverage
 1    /* Copyright 2002-2005 Elliotte Rusty Harold
 2   
 3    This library is free software; you can redistribute it and/or modify
 4    it under the terms of version 2.1 of the GNU Lesser General Public
 5    License as published by the Free Software Foundation.
 6   
 7    This library is distributed in the hope that it will be useful,
 8    but WITHOUT ANY WARRANTY; without even the implied warranty of
 9    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 10    GNU Lesser General Public License for more details.
 11   
 12    You should have received a copy of the GNU Lesser General Public
 13    License along with this library; if not, write to the
 14    Free Software Foundation, Inc., 59 Temple Place, Suite 330,
 15    Boston, MA 02111-1307 USA
 16   
 17    You can contact Elliotte Rusty Harold by sending e-mail to
 18    elharo@metalab.unc.edu. Please include the word "XOM" in the
 19    subject line. The XOM home page is located at http://www.xom.nu/
 20    */
 21   
 22    package nu.xom.xinclude;
 23   
 24    import java.io.BufferedInputStream;
 25    import java.io.BufferedReader;
 26    import java.io.IOException;
 27    import java.io.InputStream;
 28    import java.io.InputStreamReader;
 29    import java.io.Reader;
 30    import java.io.UnsupportedEncodingException;
 31    import java.net.MalformedURLException;
 32    import java.net.URL;
 33    import java.net.URLConnection;
 34    import java.util.Locale;
 35    import java.util.ArrayList;
 36   
 37    import nu.xom.Attribute;
 38    import nu.xom.Builder;
 39    import nu.xom.DocType;
 40    import nu.xom.Document;
 41    import nu.xom.Element;
 42    import nu.xom.Elements;
 43    import nu.xom.MalformedURIException;
 44    import nu.xom.Node;
 45    import nu.xom.NodeFactory;
 46    import nu.xom.Nodes;
 47    import nu.xom.ParentNode;
 48    import nu.xom.ParsingException;
 49    import nu.xom.Text;
 50   
 51    /**
 52    * <p>
 53    * Implements XInclude resolution as specified in
 54    * <a href="http://www.w3.org/TR/2004/REC-xinclude-20041220/"
 55    * target="_top"><cite>XML Inclusions (XInclude) Version
 56    * 1.0</cite></a>. Fallbacks are supported.
 57    * The XPointer <code>element()</code> scheme and
 58    * shorthand XPointers are also supported. The XPointer
 59    * <code>xpointer()</code> scheme is not supported.
 60    * The <code>accept</code> and <code>accept-language</code>
 61    * attributes are supported.
 62    * </p>
 63    *
 64    * @author Elliotte Rusty Harold
 65    * @version 1.1b3
 66    *
 67    */
 68    public class XIncluder {
 69   
 70    private static String version = System.getProperty("java.version");
 71   
 72    // could rewrite this to handle only elements in documents
 73    // (no parentless elements) and then add code to handle Nodes
 74    // and parentless elements by sticking each one in a Document
 75   
 76    // prevent instantiation
 77  0 private XIncluder() {}
 78   
 79    /**
 80    * <p>
 81    * The namespace name of all XInclude elements.
 82    * </p>
 83    */
 84    public final static String XINCLUDE_NS
 85    = "http://www.w3.org/2001/XInclude";
 86   
 87    /**
 88    * <p>
 89    * Returns a copy of the document in which all
 90    * <code>xinclude:include</code> elements have been
 91    * replaced by their referenced content. The original
 92    * <code>Document</code> object is not modified.
 93    * Resolution is recursive; that is, include elements
 94    * in the included documents are themselves resolved.
 95    * The <code>Document</code> returned contains no
 96    * include elements.
 97    * </p>
 98    *
 99    * @param in the document in which include elements
 100    * should be resolved
 101    *
 102    * @return copy of the document in which
 103    * all <code>xinclude:include</code> elements
 104    * have been replaced by their referenced content
 105    *
 106    * @throws BadParseAttributeException if an <code>include</code>
 107    * element has a <code>parse</code> attribute with any value
 108    * other than <code>text</code> or <code>parse</code>
 109    * @throws InclusionLoopException if the document
 110    * contains an XInclude element that attempts to include
 111    * a document in which this element is directly or indirectly
 112    * included.
 113    * @throws IOException if an included document could not be loaded,
 114    * and no fallback was available
 115    * @throws NoIncludeLocationException if an <code>xinclude:include</code>
 116    * element does not have an <code>href</code> attribute
 117    * @throws ParsingException if an included XML document
 118    * was malformed
 119    * @throws UnsupportedEncodingException if an included document
 120    * used an encoding this parser does not support, and no
 121    * fallback was available
 122    * @throws XIncludeException if the document violates the
 123    * syntax rules of XInclude
 124    * @throws XMLException if resolving an include element would
 125    * result in a malformed document
 126    */
 127  125 public static Document resolve(Document in)
 128    throws BadParseAttributeException, InclusionLoopException,
 129    IOException, NoIncludeLocationException, ParsingException,
 130    UnsupportedEncodingException, XIncludeException {
 131   
 132  125 Builder builder = new Builder();
 133  125 return resolve(in, builder);
 134   
 135    }
 136   
 137    /**
 138    * <p>
 139    * Returns a copy of the document in which all
 140    * <code>xinclude:include</code> elements have been
 141    * replaced by their referenced content as loaded by the builder.
 142    * The original <code>Document</code> object is not modified.
 143    * Resolution is recursive; that is, include elements
 144    * in the included documents are themselves resolved.
 145    * The document returned contains no <code>include</code> elements.
 146    * </p>
 147    *
 148    * @param in the document in which include elements
 149    * should be resolved
 150    * @param builder the builder used to build the
 151    * nodes included from other documents
 152    *
 153    * @return copy of the document in which
 154    * all <code>xinclude:include</code> elements
 155    * have been replaced by their referenced content
 156    *
 157    * @throws BadParseAttributeException if an <code>include</code>
 158    * element has a <code>parse</code> attribute with any value
 159    * other than <code>text</code> or <code>parse</code>
 160    * @throws InclusionLoopException if the document
 161    * contains an XInclude element that attempts to include
 162    * a document in which this element is directly or indirectly
 163    * included.
 164    * @throws IOException if an included document could not be loaded,
 165    * and no fallback was available
 166    * @throws NoIncludeLocationException if an <code>xinclude:include</code>
 167    * element does not have an href attribute.
 168    * @throws ParsingException if an included XML document
 169    * was malformed
 170    * @throws UnsupportedEncodingException if an included document
 171    * used an encoding this parser does not support, and no
 172    * fallback was available
 173    * @throws XIncludeException if the document violates the
 174    * syntax rules of XInclude
 175    * @throws XMLException if resolving an include element would
 176    * result in a malformed document
 177    */
 178  134 public static Document resolve(Document in, Builder builder)
 179    throws BadParseAttributeException, InclusionLoopException,
 180    IOException, NoIncludeLocationException, ParsingException,
 181    UnsupportedEncodingException, XIncludeException {
 182   
 183  134 Document copy = new Document(in);
 184  134 resolveInPlace(copy, builder);
 185  87 return copy;
 186   
 187    }
 188   
 189    /**
 190    * <p>
 191    * Modifies a document by replacing all
 192    * <code>xinclude:include</code> elements
 193    * by their referenced content.
 194    * Resolution is recursive; that is, include elements
 195    * in the included documents are themselves resolved.
 196    * The resolved document contains no
 197    * <code>xinclude:include</code> elements.
 198    * </p>
 199    *
 200    * <p>
 201    * If the inclusion fails for any reason&mdash;XInclude syntax
 202    * error, missing resource with no fallback, etc.&mdash;the document
 203    * may be left in a partially resolved state.
 204    * </p>
 205    *
 206    * @param in the document in which include elements
 207    * should be resolved
 208    *
 209    * @throws BadParseAttributeException if an <code>include</code>
 210    * element has a <code>parse</code> attribute
 211    * with any value other than <code>text</code>
 212    * or <code>parse</code>
 213    * @throws InclusionLoopException if the document
 214    * contains an XInclude element that attempts to include a
 215    * document in which this element is directly or indirectly
 216    * included
 217    * @throws IOException if an included document could not be loaded,
 218    * and no fallback was available
 219    * @throws NoIncludeLocationException if an <code>xinclude:include</code>
 220    * element does not have an <code>href</code> attribute
 221    * @throws ParsingException if an included XML document
 222    * was malformed
 223    * @throws UnsupportedEncodingException if an included document
 224    * used an encoding this parser does not support, and no
 225    * fallback was available
 226    * @throws XIncludeException if the document violates the
 227    * syntax rules of XInclude
 228    * @throws XMLException if resolving an include element would
 229    * result in a malformed document
 230    */
 231  53 public static void resolveInPlace(Document in)
 232    throws BadParseAttributeException, InclusionLoopException,
 233    IOException, NoIncludeLocationException, ParsingException,
 234    UnsupportedEncodingException, XIncludeException {
 235  53 resolveInPlace(in, new Builder());
 236    }
 237   
 238    /**
 239    * <p>
 240    * Modifies a document by replacing all
 241    * <code>xinclude:include</code> elements with their referenced
 242    * content as loaded by the builder. Resolution is recursive;
 243    * that is, <code>include</code> elements in the included documents
 244    * are themselves resolved. The resolved document contains no
 245    * <code>xinclude:include</code> elements.
 246    * </p>
 247    *
 248    * <p>
 249    * If the inclusion fails for any reason &mdash; XInclude syntax
 250    * error, missing resource with no fallback, etc. &mdash; the
 251    * document may be left in a partially resolved state.
 252    * </p>
 253    *
 254    * @param in the document in which include elements
 255    * should be resolved
 256    * @param builder the builder used to build the
 257    * nodes included from other documents
 258    *
 259    * @throws BadParseAttributeException if an <code>include</code>
 260    * element has a <code>parse</code> attribute
 261    * with any value other than <code>text</code>
 262    * or <code>parse</code>
 263    * @throws InclusionLoopException if this element
 264    * contains an XInclude element that attempts to include a
 265    * document in which this element is directly or indirectly
 266    * included
 267    * @throws IOException if an included document could not be loaded,
 268    * and no fallback was available
 269    * @throws NoIncludeLocationException if an <code>xinclude:include</code>
 270    * element does not have an <code>href</code> attribute.
 271    * @throws ParsingException if an included XML document
 272    * was malformed
 273    * @throws UnsupportedEncodingException if an included document
 274    * used an encoding this parser does not support, and no
 275    * fallback was available
 276    * @throws XIncludeException if the document violates the
 277    * syntax rules of XInclude
 278    * @throws XMLException if resolving an include element would
 279    * result in a malformed document
 280    */
 281  187 public static void resolveInPlace(Document in, Builder builder)
 282    throws BadParseAttributeException, InclusionLoopException,
 283    IOException, NoIncludeLocationException, ParsingException,
 284    UnsupportedEncodingException, XIncludeException {
 285   
 286  187 ArrayList stack = new ArrayList();
 287  187 resolveInPlace(in, builder, stack);
 288   
 289    }
 290   
 291   
 292  323 private static void resolveInPlace(
 293    Document in, Builder builder, ArrayList baseURLs)
 294    throws IOException, ParsingException, XIncludeException {
 295   
 296  323 String base = in.getBaseURI();
 297    // workaround a bug in Sun VMs
 298  323 if (base != null && base.startsWith("file:///")) {
 299  125 base = "file:/" + base.substring(8);
 300    }
 301   
 302  323 baseURLs.add(base);
 303  323 Element root = in.getRootElement();
 304  323 resolve(root, builder, baseURLs);
 305  250 baseURLs.remove(baseURLs.size()-1);
 306   
 307    }
 308   
 309   
 310  1037 private static void resolve(
 311    Element element, Builder builder, ArrayList baseURLs)
 312    throws IOException, ParsingException, XIncludeException {
 313   
 314  1037 resolve(element, builder, baseURLs, null);
 315   
 316    }
 317   
 318   
 319  1044 private static void resolve(
 320    Element element, Builder builder, ArrayList baseURLs, Document originalDoc)
 321    throws IOException, ParsingException, XIncludeException {
 322   
 323  1044 if (isIncludeElement(element)) {
 324  262 verifyIncludeElement(element);
 325   
 326  241 String parse = element.getAttributeValue("parse");
 327  174 if (parse == null) parse = "xml";
 328  241 String xpointer = element.getAttributeValue("xpointer");
 329  241 String encoding = element.getAttributeValue("encoding");
 330  241 String href = element.getAttributeValue("href");
 331    // empty string href is same as no href attribute
 332  4 if ("".equals(href)) href = null;
 333   
 334  241 ParentNode parent = element.getParent();
 335  241 String base = element.getBaseURI();
 336  241 URL baseURL = null;
 337  241 try {
 338  241 baseURL = new URL(base);
 339    }
 340    catch (MalformedURLException ex) {
 341    // don't use base
 342    }
 343  241 URL url = null;
 344  241 try {
 345    // xml:base attributes added to maintain the
 346    // base URI should not have fragment IDs
 347   
 348  241 if (baseURL != null && href != null) {
 349  213 url = absolutize(baseURL, href);
 350    }
 351  28 else if (href != null) {
 352  7 try {
 353  7 testURISyntax(href);
 354  4 url = new URL(href);
 355    }
 356    catch (MalformedURIException ex) {
 357  3 if (baseURL == null) {
 358  3 throw new BadHrefAttributeException(
 359    "Could not resolve relative URI " + href
 360    + " because the xi:include element does"
 361    + " not have a base URI.", href);
 362    }
 363  0 throw new BadHrefAttributeException("Illegal IRI in href attribute", href);
 364    }
 365    }
 366   
 367  236 String accept = element.getAttributeValue("accept");
 368  236 checkHeader(accept);
 369  234 String acceptLanguage = element.getAttributeValue("accept-language");
 370  234 checkHeader(acceptLanguage);
 371   
 372  234 if (parse.equals("xml")) {
 373   
 374  184 String parentLanguage = "";
 375  184 if (parent instanceof Element) {
 376  181 parentLanguage = getXMLLangValue((Element) parent);
 377    }
 378   
 379  184 Nodes replacements;
 380  184 if (url != null) {
 381  163 replacements = downloadXMLDocument(url,
 382    xpointer, builder, baseURLs, accept, acceptLanguage, parentLanguage);
 383    // Add base URIs. Base URIs added by XInclusion require
 384    // the element to maintain the same base URI as it had
 385    // in the original document. Since its base URI in the
 386    // original document does not contain a fragment ID,
 387    // therefore its base URI after inclusion shouldn't,
 388    // and this special case is unnecessary. Base URI fixup
 389    // should not add the fragment ID.
 390  103 for (int i = 0; i < replacements.size(); i++) {
 391  115 Node child = replacements.get(i);
 392  115 if (child instanceof Element) {
 393  103 String noFragment = child.getBaseURI();
 394  103 if (noFragment.indexOf('#') >= 0) {
 395  1 noFragment = noFragment.substring(
 396    0, noFragment.indexOf('#'));
 397    }
 398  103 Element baseless = (Element) child;
 399   
 400    // parent is null here; need to get real parent
 401  103 String parentBase = parent.getBaseURI();
 402  103 if (parentBase != null && ! "".equals(parentBase)) {
 403  102 parentBase = getDirectoryBase(parentBase);
 404    }
 405   
 406  103 if (noFragment.startsWith(parentBase)) {
 407  73 noFragment = noFragment.substring(parentBase.length());
 408    }
 409  103 Attribute baseAttribute = new Attribute(
 410    "xml:base",
 411    "http://www.w3.org/XML/1998/namespace",
 412    noFragment
 413    );
 414  103 baseless.addAttribute(baseAttribute);
 415   
 416    }
 417    }
 418    }
 419    else {
 420  21 Document parentDoc = element.getDocument();
 421  21 if (parentDoc == null) {
 422  4 parentDoc = originalDoc;
 423    }
 424  21 Nodes originals = XPointer.query(parentDoc, xpointer);
 425  21 replacements = new Nodes();
 426  21 for (int i = 0; i < originals.size(); i++) {
 427  21 Node original = originals.get(i);
 428    // current implementation of XPointer never returns non-elements
 429  21 if (contains((Element) original, element)) {
 430  2 throw new InclusionLoopException(
 431    "Element tried to include itself"
 432    );
 433    }
 434  19 Node copy = original.copy();
 435  19 replacements.append(copy);
 436    }
 437  19 replacements = resolveXPointerSelection(
 438    replacements, builder, baseURLs, parentDoc);
 439   
 440    }
 441   
 442    // Will fail if we're replacing the root element with
 443    // a node list containing zero or multiple elements,
 444    // but that should fail. However, I may wish to
 445    // adjust the type of exception thrown. This is only
 446    // relevant if I add support for the xpointer scheme
 447    // since otherwise you can only point at one element
 448    // or document.
 449  122 if (parent instanceof Element) {
 450  119 int position = parent.indexOf(element);
 451  119 for (int i = 0; i < replacements.size(); i++) {
 452  129 Node child = replacements.get(i);
 453  129 parent.insertChild(child, position+i);
 454    }
 455  119 element.detach();
 456    }
 457    else { // root element needs special treatment
 458    // I am assuming here that it is not possible
 459    // for parent to be null. I think this is true
 460    // in the current version, but it could change
 461    // if I made it possible to directly resolve an
 462    // element or a Nodes.
 463  3 Document doc = (Document) parent;
 464  3 int i = 0;
 465    // prolog and root
 466  3 while (true) {
 467  4 Node child = replacements.get(i);
 468  4 i++;
 469  4 if (child instanceof Element) {
 470  3 doc.setRootElement((Element) child);
 471  3 break;
 472    }
 473    else {
 474  1 doc.insertChild(
 475    child, doc.indexOf(element)
 476    );
 477    }
 478   
 479    }
 480    // epilog
 481  3 Element root = doc.getRootElement();
 482  3 int position = doc.indexOf(root);
 483  3 for (int j=i; j < replacements.size(); j++) {
 484  1 doc.insertChild(
 485    replacements.get(j), position+1+j-i
 486    );
 487    }
 488    }
 489    }
 490  50 else if (parse.equals("text")) {
 491  47 Nodes replacements
 492    = downloadTextDocument(url, encoding, builder, accept, acceptLanguage);
 493  44 for (int j = 0; j < replacements.size(); j++) {
 494  44 Node replacement = replacements.get(j);
 495  44 if (replacement instanceof Attribute) {
 496  1 ((Element) parent).addAttribute((Attribute) replacement);
 497    }
 498    else {
 499  43 parent.insertChild(replacement, parent.indexOf(element));
 500    }
 501    }
 502  44 parent.removeChild(element);
 503    }
 504    else {
 505  3 throw new BadParseAttributeException(
 506    "Bad value for parse attribute: " + parse,
 507    element.getDocument().getBaseURI());
 508    }
 509   
 510    }
 511    catch (IOException ex) {
 512  24 processFallback(element, builder, baseURLs, parent, ex);
 513    }
 514    catch (XPointerSyntaxException ex) {
 515  13 processFallback(element, builder, baseURLs, parent, ex);
 516    }
 517    catch (XPointerResourceException ex) {
 518    // Process fallbacks; I'm not sure this is correct
 519    // behavior. Possibly this should include nothing. See
 520    // http://lists.w3.org/Archives/Public/www-xml-xinclude-comments/2003Aug/0000.html
 521    // Daniel Veillard thinks this is correct. See
 522    // http://lists.w3.org/Archives/Public/www-xml-xinclude-comments/2003Aug/0001.html
 523  9 processFallback(element, builder, baseURLs, parent, ex);
 524    }
 525   
 526    }
 527  782 else if (isFallbackElement(element)) {
 528  3 throw new MisplacedFallbackException(
 529    "Fallback element outside include element",
 530    element.getDocument().getBaseURI()
 531    );
 532    }
 533    else {
 534  779 Elements children = element.getChildElements();
 535  779 for (int i = 0; i < children.size(); i++) {
 536  697 resolve(children.get(i), builder, baseURLs);
 537    }
 538    }
 539   
 540    }
 541   
 542   
 543    // ???? Move this into URIUtil when it goes public
 544  102 private static String getDirectoryBase(String parentBase) {
 545  1 if (parentBase.endsWith("/")) return parentBase;
 546  101 int lastSlash = parentBase.lastIndexOf('/');
 547  101 return parentBase.substring(0, lastSlash+1);
 548    }
 549   
 550   
 551   
 552  262 private static void verifyIncludeElement(Element element)
 553    throws XIncludeException {
 554   
 555  262 testHref(element);
 556  259 testForFragmentIdentifier(element);
 557  254 verifyEncoding(element);
 558  251 testForForbiddenChildElements(element);
 559    }
 560   
 561   
 562  262 private static void testHref(Element include) throws NoIncludeLocationException {
 563   
 564  262 String href = include.getAttributeValue("href");
 565  262 String xpointer = include.getAttributeValue("xpointer");
 566  262 if (href == null && xpointer == null) {
 567  3 throw new NoIncludeLocationException(
 568    "Missing href attribute",
 569    include.getDocument().getBaseURI()
 570    );
 571    }
 572    }
 573   
 574   
 575  259 private static void testForFragmentIdentifier(Element include)
 576    throws BadHrefAttributeException {
 577   
 578  259 String href = include.getAttributeValue("href");
 579  259 if (href != null) {
 580  242 if (href.indexOf('#') > -1) {
 581  5 throw new BadHrefAttributeException(
 582    "fragment identifier in URI " + href, include.getBaseURI()
 583    );
 584    }
 585    }
 586   
 587    }
 588   
 589   
 590  254 private static void verifyEncoding(Element include)
 591    throws BadEncodingAttributeException {
 592   
 593  254 String encoding = include.getAttributeValue("encoding");
 594  248 if (encoding == null) return;
 595    // production 81 of XML spec
 596    // EncName :=[A-Za-z] ([A-Za-z0-9._] | '-')*
 597  6 char[] text = encoding.toCharArray();
 598  6 if (text.length == 0) {
 599  1 throw new BadEncodingAttributeException(
 600    "Empty encoding attribute", include.getBaseURI());
 601    }
 602  5 char c = text[0];
 603  5 if (!((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))) {
 604  1 throw new BadEncodingAttributeException(
 605    "Illegal value for encoding attribute: " + encoding, include.getBaseURI()
 606    );
 607    }
 608  4 for (int i = 1; i < text.length; i++) {
 609  23 c = text[i];
 610  23 if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
 611    || (c >= '0' && c <= '9') || c == '-' || c == '_' || c == '.') {
 612  22 continue;
 613    }
 614  1 throw new BadEncodingAttributeException(
 615    "Illegal value for encoding attribute: " + encoding, include.getBaseURI()
 616    );
 617    }
 618   
 619    }
 620   
 621   
 622    // hack because URIUtil isn't public
 623  213 private static URL absolutize(URL baseURL, String href)
 624    throws MalformedURLException, BadHrefAttributeException {
 625   
 626  213 Element parent = new Element("c");
 627  213 parent.setBaseURI(baseURL.toExternalForm());
 628  213 Element child = new Element("c");
 629  213 parent.appendChild(child);
 630  213 child.addAttribute(new Attribute(
 631    "xml:base", "http://www.w3.org/XML/1998/namespace", href));
 632  213 URL result = new URL(child.getBaseURI());
 633  213 if (!"".equals(href) && result.equals(baseURL)) {
 634  3 if (! baseURL.toExternalForm().endsWith(href)) {
 635  1 throw new BadHrefAttributeException(href
 636    + " is not a syntactically correct IRI");
 637    }
 638    }
 639  212 return result;
 640   
 641    }
 642   
 643   
 644  7 private static void testURISyntax(String href) {
 645  7 Element e = new Element("e");
 646  7 e.setNamespaceURI(href);
 647    }
 648   
 649   
 650  222 private static String getXMLLangValue(Element element) {
 651   
 652  222 while (true) {
 653  289 Attribute lang = element.getAttribute(
 654    "lang", "http://www.w3.org/XML/1998/namespace");
 655  19 if (lang != null) return lang.getValue();
 656  270 ParentNode parent = element.getParent();
 657  4 if (parent == null) return "";
 658  199 else if (parent instanceof Document) return "";
 659  67 else element = (Element) parent;
 660    }
 661   
 662    }
 663   
 664   
 665    // This assumes current implementation of XPointer that
 666    // always selects exactly one element or throws an exception.
 667  19 private static Nodes resolveXPointerSelection(Nodes in,
 668    Builder builder, ArrayList baseURLs, Document original)
 669    throws IOException, ParsingException, XIncludeException {
 670   
 671  19 Element preinclude = (Element) in.get(0);
 672  19 return resolveSilently(preinclude, builder, baseURLs, original);
 673   
 674    }
 675   
 676   
 677  21 private static boolean contains(ParentNode ancestor, Node descendant) {
 678   
 679  21 for (Node parent = descendant;
 680  81 parent != null;
 681    parent=parent.getParent()) {
 682  2 if (parent == ancestor) return true;
 683    }
 684   
 685  19 return false;
 686   
 687    }
 688   
 689   
 690  19 private static Nodes resolveSilently(
 691    Element element, Builder builder, ArrayList baseURLs, Document originalDoc)
 692    throws IOException, ParsingException, XIncludeException {
 693   
 694    // There is no possibility the element passed to this method
 695    // is an include or a fallback element
 696  19 if (isIncludeElement(element) || isFallbackElement(element) ) {
 697  0 throw new RuntimeException(
 698    "XOM BUG: include or fallback element passed to resolveSilently;"
 699    + " please report with a test case");
 700    }
 701   
 702  19 Elements children = element.getChildElements();
 703  19 for (int i = 0; i < children.size(); i++) {
 704  7 resolve(children.get(i), builder, baseURLs, originalDoc);
 705    }
 706  19 return new Nodes(element);
 707   
 708    }
 709   
 710   
 711  251 private static void testForForbiddenChildElements(Element element)
 712    throws XIncludeException {
 713   
 714  251 int fallbacks = 0;
 715  251 Elements children = element.getChildElements();
 716  251 int size = children.size();
 717  251 for (int i = 0; i < size; i++) {
 718  51 Element child = children.get(i);
 719  51 if (XINCLUDE_NS.equals(child.getNamespaceURI())) {
 720  50 if ("fallback".equals(child.getLocalName())) {
 721  44 fallbacks++;
 722  44 if (fallbacks > 1) {
 723  4 throw new XIncludeException("Multiple fallback elements",
 724    element.getDocument().getBaseURI());
 725    }
 726    }
 727    else {
 728  6 throw new XIncludeException(
 729    "Include element contains an include child",
 730    element.getDocument().getBaseURI());
 731    }
 732    }
 733    }
 734   
 735    }
 736   
 737   
 738  46 private static void processFallback(Element includeElement,
 739    Builder builder, ArrayList baseURLs, ParentNode parent, Exception ex)
 740    throws XIncludeException, IOException, ParsingException {
 741   
 742  46 Element fallback
 743    = includeElement.getFirstChildElement("fallback", XINCLUDE_NS);
 744  46 if (fallback == null) {
 745  4 if (ex instanceof IOException) throw (IOException) ex;
 746  16 XIncludeException ex2 = new XIncludeException(
 747    ex.getMessage(), includeElement.getDocument().getBaseURI());
 748  16 ex2.initCause(ex);
 749  16 throw ex2;
 750    }
 751   
 752  26 while (fallback.getChildCount() > 0) {
 753  33 Node child = fallback.getChild(0);
 754  33 if (child instanceof Element) {
 755  17 resolve((Element) child, builder, baseURLs);
 756    }
 757  28 child = fallback.getChild(0);
 758  28 child.detach();
 759  28 parent.insertChild(child, parent.indexOf(includeElement));
 760    }
 761  21 includeElement.detach();
 762   
 763    }
 764   
 765   
 766    // I could probably move the xpointer out of this method
 767  163 private static Nodes downloadXMLDocument(
 768    URL source, String xpointer, Builder builder, ArrayList baseURLs,
 769    String accept, String acceptLanguage, String parentLanguage)
 770    throws IOException, ParsingException, XIncludeException,
 771    XPointerSyntaxException, XPointerResourceException {
 772   
 773  163 String base = source.toExternalForm();
 774  163 if (xpointer == null && baseURLs.indexOf(base) != -1) {
 775  5 throw new InclusionLoopException(
 776    "Tried to include the already included document " + base +
 777    " from " + baseURLs.get(baseURLs.size()-1), (String) baseURLs.get(baseURLs.size()-1));
 778    }
 779   
 780  158 URLConnection uc = source.openConnection();
 781  158 setHeaders(uc, accept, acceptLanguage);
 782  158 InputStream in = new BufferedInputStream(uc.getInputStream());
 783  139 Document doc;
 784  139 try {
 785  139 doc = builder.build(in, source.toExternalForm());
 786    }
 787    finally {
 788  139 in.close();
 789    }
 790   
 791  136 resolveInPlace(doc, builder, baseURLs);
 792  125 Nodes included;
 793  125 if (xpointer != null && xpointer.length() != 0) {
 794  67 included = XPointer.query(doc, xpointer);
 795    // fill in lang attributes here
 796  45 for (int i = 0; i < included.size(); i++) {
 797  45 Node node = included.get(i);
 798    // Current implementation can only select elements
 799  45 Element top = (Element) node;
 800  45 Attribute lang = top.getAttribute("lang",
 801    "http://www.w3.org/XML/1998/namespace");
 802  45 if (lang == null) {
 803  41 String childLanguage = getXMLLangValue(top);
 804  41 if (!parentLanguage.equals(childLanguage)) {
 805  9 top.addAttribute(new Attribute("xml:lang",
 806    "http://www.w3.org/XML/1998/namespace",
 807    childLanguage));
 808    }
 809    }
 810    }
 811    }
 812    else {
 813  58 included = new Nodes();
 814  58 for (int i = 0; i < doc.getChildCount(); i++) {
 815  88 Node child = doc.getChild(i);
 816  88 if (!(child instanceof DocType)) {
 817  70 included.append(child);
 818    }
 819    }
 820    }
 821    // so we can detach the old root if necessary
 822  103 doc.setRootElement(new Element("f"));
 823  103 for (int i = 0; i < included.size(); i++) {
 824  115 Node node = included.get(i);
 825    // Take account of xml:base attribute, which we normally
 826    // don't do when detaching
 827  115 String noFragment = node.getBaseURI();
 828  115 if (noFragment.indexOf('#') >= 0) {
 829  1 noFragment = noFragment.substring(0, noFragment.indexOf('#'));
 830    }
 831  115 node.detach();
 832  115 if (node instanceof Element) {
 833  103 ((Element) node).setBaseURI(noFragment);
 834    }
 835    }
 836   
 837  103 return included;
 838   
 839    }
 840   
 841   
 842    /**
 843    * <p>
 844    * This utility method reads a document at a specified URL
 845    * and returns the contents of that document as a <code>Text</code>.
 846    * It's used to include files with <code>parse="text"</code>.
 847    * </p>
 848    *
 849    * @param source <code>URL</code> of the document to download
 850    * @param encoding encoding of the document; e.g. UTF-8,
 851    * ISO-8859-1, etc.
 852    * @param builder the <code>Builder</code> used to build the
 853    * nodes included from other documents
 854    *
 855    * @return the document retrieved from the source <code>URL</code>
 856    *
 857    * @throws IOException if the remote document cannot
 858    * be read due to an I/O error
 859    */
 860  47 private static Nodes downloadTextDocument(
 861    URL source, String encoding, Builder builder,
 862    String accept, String language)
 863    throws IOException, XIncludeException {
 864   
 865  47 if (encoding == null || encoding.length() == 0) {
 866  46 encoding = "UTF-8";
 867    }
 868   
 869  47 URLConnection uc = source.openConnection();
 870  47 setHeaders(uc, accept, language);
 871   
 872  47 String encodingFromHeader = uc.getContentEncoding();
 873  47 String contentType = uc.getContentType();
 874  47 int contentLength = uc.getContentLength();
 875  7 if (contentLength < 0) contentLength = 1024;
 876  47 InputStream in = new BufferedInputStream(uc.getInputStream());
 877  44 try {
 878  0 if (encodingFromHeader != null) encoding = encodingFromHeader;
 879    else {
 880  44 if (contentType != null) {
 881  44 contentType = contentType.toLowerCase(Locale.ENGLISH);
 882  44 if (contentType.equals("text/xml")
 883    || contentType.equals("application/xml")
 884    || (contentType.startsWith("text/")
 885    && contentType.endsWith("+xml") )
 886    || (contentType.startsWith("application/")
 887    && contentType.endsWith("+xml"))) {
 888  24 encoding
 889    = EncodingHeuristics.readEncodingFromStream(in);
 890    }
 891    }
 892    }
 893    // workaround for pre-1.3 VMs that don't recognize UTF-16
 894  44 if (version.startsWith("1.2") || version.startsWith("1.1")) {
 895  0 if (encoding.equalsIgnoreCase("UTF-16")) {
 896    // is it big-endian or little-endian?
 897  0 in.mark(2);
 898  0 int first = in.read();
 899  0 if (first == 0xFF) encoding = "UnicodeLittle";
 900  0 else encoding="UnicodeBig";
 901  0 in.reset();
 902    }
 903  0 else if (encoding.equalsIgnoreCase("UnicodeBigUnmarked")) {
 904  0 encoding = "UnicodeBig";
 905    }
 906  0 else if (encoding.equalsIgnoreCase("UnicodeLittleUnmarked")) {
 907  0 encoding = "UnicodeLittle";
 908    }
 909    }
 910  44 Reader reader = new BufferedReader(
 911    new InputStreamReader(in, encoding)
 912    );
 913  44 StringBuffer sb = new StringBuffer(contentLength);
 914  44 for (int c = reader.read(); c != -1; c = reader.read()) {
 915  1548 sb.append((char) c);
 916    }
 917   
 918  44 NodeFactory factory = builder.getNodeFactory();
 919  44 if (factory != null) {
 920  5 return factory.makeText(sb.toString());
 921    }
 922  39 else return new Nodes(new Text(sb.toString()));
 923    }
 924    finally {
 925  44 in.close();
 926    }
 927   
 928    }
 929   
 930   
 931  205 private static void setHeaders(URLConnection uc, String accept,
 932    String language) throws BadHTTPHeaderException {
 933   
 934  205 if (accept != null) {
 935  2 checkHeader(accept);
 936  2 uc.setRequestProperty("accept", accept);
 937    }
 938  205 if (language != null) {
 939  2 checkHeader(language);
 940  2 uc.setRequestProperty("accept-language", language);
 941    }
 942   
 943    }
 944   
 945   
 946  474 private static void checkHeader(String header)
 947    throws BadHTTPHeaderException {
 948   
 949  464 if (header == null) return;
 950  10 int length = header.length();
 951  10 for (int i = 0; i < length; i++) {
 952  66 char c = header.charAt(i);
 953  66 if (c < 0x20 || c > 0x7E) {
 954  2 throw new BadHTTPHeaderException(
 955    "Header contains illegal character 0x"
 956    + Integer.toHexString(c).toUpperCase());
 957    }
 958    }
 959   
 960    }
 961   
 962   
 963  1063 private static boolean isIncludeElement(Element element) {
 964   
 965  1063 return element.getLocalName().equals("include")
 966    && element.getNamespaceURI().equals(XINCLUDE_NS);
 967   
 968    }
 969   
 970   
 971  801 private static boolean isFallbackElement(Element element) {
 972   
 973  801 return element.getLocalName().equals("fallback")
 974    && element.getNamespaceURI().equals(XINCLUDE_NS);
 975   
 976    }
 977   
 978   
 979    }