Clover coverage report - Clover results for XOM 1.2d1
Coverage timestamp: Wed Feb 8 2006 08:31:33 EST
file stats: LOC: 1,290   Methods: 29
NCLOC: 760   Classes: 3
 
 Source file Conditionals Statements Methods TOTAL
Builder.java 87.5% 93.4% 100% 93%
coverage coverage
 1    /* Copyright 2002-2005 Elliotte Rusty Harold
 2   
 3    This library is free software; you can redistribute it and/or modify
 4    it under the terms of version 2.1 of the GNU Lesser General Public
 5    License as published by the Free Software Foundation.
 6   
 7    This library is distributed in the hope that it will be useful,
 8    but WITHOUT ANY WARRANTY; without even the implied warranty of
 9    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 10    GNU Lesser General Public License for more details.
 11   
 12    You should have received a copy of the GNU Lesser General Public
 13    License along with this library; if not, write to the
 14    Free Software Foundation, Inc., 59 Temple Place, Suite 330,
 15    Boston, MA 02111-1307 USA
 16   
 17    You can contact Elliotte Rusty Harold by sending e-mail to
 18    elharo@metalab.unc.edu. Please include the word "XOM" in the
 19    subject line. The XOM home page is located at http://www.xom.nu/
 20    */
 21   
 22    package nu.xom;
 23   
 24    import java.io.CharConversionException;
 25    import java.io.File;
 26    import java.io.FileInputStream;
 27    import java.io.IOException;
 28    import java.io.InputStream;
 29    import java.io.Reader;
 30    import java.io.StringReader;
 31    import java.io.UTFDataFormatException;
 32    import java.net.MalformedURLException;
 33    import java.net.URL;
 34   
 35    import org.xml.sax.ErrorHandler;
 36    import org.xml.sax.InputSource;
 37    import org.xml.sax.SAXException;
 38    import org.xml.sax.SAXNotRecognizedException;
 39    import org.xml.sax.SAXNotSupportedException;
 40    import org.xml.sax.SAXParseException;
 41    import org.xml.sax.XMLFilter;
 42    import org.xml.sax.XMLReader;
 43    import org.xml.sax.helpers.XMLReaderFactory;
 44   
 45    import org.apache.xerces.impl.Version;
 46   
 47    /**
 48    * <p>
 49    * This class is responsible for creating XOM <code>Document</code>
 50    * objects from a URL, file, string, or input stream by reading
 51    * an XML document. A SAX parser is used to read the
 52    * document and report any well-formedness errors.
 53    * </p>
 54    *
 55    * @author Elliotte Rusty Harold
 56    * @version 1.2b1
 57    *
 58    */
 59    public class Builder {
 60   
 61   
 62    private XMLReader parser;
 63    private NodeFactory factory;
 64   
 65    private static double xercesVersion = 2.6;
 66   
 67    static {
 68   
 69  19 try {
 70  19 String versionString = Version.getVersion();
 71  19 versionString = versionString.substring(9, 12);
 72  19 xercesVersion = Double.valueOf(versionString).doubleValue();
 73    }
 74    catch (Exception ex) {
 75    // The version string format changed so presumably it's
 76    // 2.6 or later
 77    }
 78    catch (Error err) {
 79    // Xerces not installed, so none of this matters
 80    }
 81   
 82    }
 83   
 84   
 85    /**
 86    * <p>
 87    * Creates a <code>Builder</code> that uses the default node
 88    * factory and chooses among any available SAX2 parsers.
 89    * In order of preference, it looks for:
 90    * </p>
 91    *
 92    * <ol>
 93    * <li>Xerces 2.x (a.k.a. IBM XML parser for Java)</li>
 94    * <li>GNU &AElig;lfred</li>
 95    * <li>Crimson</li>
 96    * <li>Piccolo</li>
 97    * <li>Oracle</li>
 98    * <li>XP</li>
 99    * <li>Saxon's &AElig;lfred</li>
 100    * <li>dom4j's &AElig;lfred</li>
 101    * <li>The platform default specified by the
 102    * <code>org.xml.sax.driver</code> system property</li>
 103    * </ol>
 104    *
 105    * <p>
 106    * Parsers must implicitly or explicitly support the
 107    * http://xml.org/sax/features/external-general-entities
 108    * and
 109    * http://xml.org/sax/features/external-parameter-entities
 110    * features XOM requires. Parsers that don't are rejected
 111    * automatically.
 112    * </p>
 113    *
 114    * @throws XMLException if no satisfactory parser is
 115    * installed in the local class path
 116    */
 117  912 public Builder() {
 118  912 this(false);
 119    }
 120   
 121   
 122    /**
 123    * <p>
 124    * Creates a <code>Builder</code> based on an optionally validating
 125    * parser. If the <code>validate</code> argument
 126    * is true, then a validity error while
 127    * parsing will cause a fatal error; that is,
 128    * it will throw a <code>ValidityException</code>.
 129    * </p>
 130    *
 131    * @param validate true if the parser should
 132    * validate the document while parsing
 133    *
 134    * @throws XMLException if no satisfactory parser
 135    * is installed in the local class path
 136    */
 137  1062 public Builder(boolean validate) {
 138  1062 this(findParser(validate), validate, null);
 139    }
 140   
 141   
 142    /**
 143    * <p>
 144    * Creates a <code>Builder</code> based on an optionally
 145    * validating parser that builds node objects with the supplied
 146    * factory. If the <code>validate</code> argument is true, then
 147    * a validity error while parsing will cause a fatal error; that
 148    * is, it will throw a <code>ValidityException</code>.
 149    * </p>
 150    *
 151    * @param validate true if the parser should
 152    * validate the document while parsing
 153    * @param factory the <code>NodeFactory</code> that creates
 154    * the node objects for this <code>Builder</code>
 155    *
 156    * @throws XMLException if no satisfactory parser
 157    * is installed in the local class path
 158    */
 159  1 public Builder(boolean validate, NodeFactory factory) {
 160  1 this(findParser(validate), validate, factory);
 161    }
 162   
 163   
 164    // These are stored in the order of preference.
 165    private static String[] parsers = {
 166    "nu.xom.XML1_0Parser",
 167    "nu.xom.JDK15XML1_0Parser",
 168    "org.apache.xerces.parsers.SAXParser",
 169    "com.sun.org.apache.xerces.internal.parsers.SAXParser",
 170    "gnu.xml.aelfred2.XmlReader",
 171    "org.apache.crimson.parser.XMLReaderImpl",
 172    "com.bluecast.xml.Piccolo",
 173    "oracle.xml.parser.v2.SAXParser",
 174    "com.jclark.xml.sax.SAX2Driver",
 175    "net.sf.saxon.aelfred.SAXDriver",
 176    "com.icl.saxon.aelfred.SAXDriver",
 177    "org.dom4j.io.aelfred2.SAXDriver",
 178    "org.dom4j.io.aelfred.SAXDriver"
 179    };
 180   
 181   
 182  1123 static XMLReader findParser(boolean validate) {
 183   
 184    // first look for Xerces; we only trust Xerces if
 185    // we set it up; and we need to configure it specially
 186    // so we can't load it with the XMLReaderFactory
 187  1123 XMLReader parser;
 188  1123 try {
 189  1123 parser = new XML1_0Parser();
 190  1123 setupParser(parser, validate);
 191  1123 return parser;
 192    }
 193    catch (SAXException ex) {
 194    // look for next one
 195    }
 196    catch (NoClassDefFoundError err) {
 197    // Xerces is not available; look for next one
 198    }
 199   
 200  0 try {
 201  0 parser = (XMLReader) Class.forName(
 202    "nu.xom.JDK15XML1_0Parser").newInstance();
 203  0 setupParser(parser, validate);
 204  0 return parser;
 205    }
 206    catch (SAXException ex) {
 207    // look for next one
 208    }
 209    catch (InstantiationException ex) {
 210    // look for next one
 211    }
 212    catch (ClassNotFoundException ex) {
 213    // look for next one
 214    }
 215    catch (IllegalAccessException ex) {
 216    // look for next one
 217    }
 218    catch (NoClassDefFoundError err) {
 219    // Xerces is not available; look for next one
 220    }
 221   
 222    // XMLReaderFactory.createXMLReader never returns
 223    // null. If it can't locate the parser, it throws
 224    // a SAXException.
 225  0 for (int i = 2; i < parsers.length; i++) {
 226  0 try {
 227  0 parser = XMLReaderFactory.createXMLReader(parsers[i]);
 228  0 setupParser(parser, validate);
 229  0 return parser;
 230    }
 231    catch (SAXException ex) {
 232    // try the next one
 233    }
 234    catch (NoClassDefFoundError err) {
 235    // try the next one
 236    }
 237    }
 238   
 239  0 try { // default
 240  0 parser = XMLReaderFactory.createXMLReader();
 241  0 setupParser(parser, validate);
 242  0 return parser;
 243    }
 244    catch (SAXException ex) {
 245  0 throw new XMLException(
 246    "Could not find a suitable SAX2 parser", ex);
 247    }
 248   
 249    }
 250   
 251   
 252  2289 private static void setupParser(XMLReader parser, boolean validate)
 253    throws SAXNotRecognizedException, SAXNotSupportedException {
 254   
 255  2289 XMLReader baseParser = parser;
 256  2289 while (baseParser instanceof XMLFilter) {
 257  25 XMLReader parent = ((XMLFilter) baseParser).getParent();
 258  5 if (parent == null) break;
 259  20 baseParser = parent;
 260    }
 261   
 262  2289 String parserName = baseParser.getClass().getName();
 263  2289 parser.setFeature(
 264    "http://xml.org/sax/features/namespace-prefixes", true);
 265  2289 parser.setFeature(
 266    "http://xml.org/sax/features/namespaces", true);
 267  2289 if (!validate) {
 268  1995 if (parserName.equals( // Crimson workaround
 269    "org.apache.crimson.parser.XMLReaderImpl")) {
 270  14 parser.setErrorHandler(
 271    new NamespaceWellformednessRequired()
 272    );
 273    }
 274    else {
 275  1981 parser.setFeature(
 276    "http://xml.org/sax/features/external-general-entities",
 277    true
 278    );
 279  1980 parser.setFeature(
 280    "http://xml.org/sax/features/external-parameter-entities",
 281    true
 282    );
 283    }
 284    }
 285    else {
 286  294 parser.setFeature(
 287    "http://xml.org/sax/features/validation", true);
 288  293 parser.setErrorHandler(new ValidityRequired());
 289    }
 290   
 291  2287 try {
 292  2287 parser.setFeature(
 293    "http://xml.org/sax/features/string-interning", true);
 294    }
 295    catch (SAXException ex) {
 296    // This parser does not support string interning.
 297    // We can live without that.
 298    }
 299   
 300    // A couple of Xerces specific properties
 301  2287 if (parserName.equals("nu.xom.XML1_0Parser")
 302    || parserName.equals("nu.xom.JDK15XML1_0Parser")
 303    || parserName.equals("org.apache.xerces.parsers.SAXParser")
 304    || parserName.equals("com.sun.org.apache.xerces.internal.parsers.SAXParser")) {
 305  2262 try {
 306  2262 parser.setFeature(
 307    "http://apache.org/xml/features/allow-java-encodings", true);
 308    }
 309    catch (SAXException ex) {
 310    // Possibly an earlier version of Xerces; no big deal.
 311    // We can live without this feature.
 312    }
 313    // See http://nagoya.apache.org/bugzilla/show_bug.cgi?id=23768
 314    // if you care to know why this line breaks unit tests on
 315    // versions of Xerces prior to 2.6.1
 316  2262 try {
 317  2262 parser.setFeature(
 318    "http://apache.org/xml/features/standard-uri-conformant",
 319    true);
 320    }
 321    catch (SAXException ex) {
 322    // Possibly an earlier version of Xerces, or a
 323    // or a non-Xerces parser; no big deal.
 324    // We can live without this.
 325    }
 326    }
 327   
 328    }
 329   
 330   
 331    /**
 332    * <p>
 333    * Creates a <code>Builder</code> that uses
 334    * the specified SAX <code>XMLReader</code>.
 335    * Custom SAX features and properties such as
 336    * schema validation can be set on this <code>XMLReader</code>
 337    * before passing it to this method.
 338    * </p>
 339    *
 340    * @param parser the SAX2 <code>XMLReader</code> that
 341    * parses the document
 342    *
 343    * @throws XMLException if <code>parser</code> does not support the
 344    * features XOM requires
 345    */
 346  37 public Builder(XMLReader parser) {
 347  37 this(parser, false);
 348    }
 349   
 350   
 351    /**
 352    * <p>
 353    * Creates a <code>Builder</code> that uses
 354    * the specified <code>NodeFactory</code> to create
 355    * node objects.
 356    * </p>
 357    *
 358    * @param factory the <code>NodeFactory</code> that creates
 359    * the node objects for this <code>Builder</code>
 360    *
 361    * @throws XMLException if no satisfactory parser is
 362    * installed in the local class path
 363    */
 364  58 public Builder(NodeFactory factory) {
 365  58 this(findParser(false), false, factory);
 366    }
 367   
 368   
 369    /**
 370    * <p>
 371    * Creates a optionally validating <code>Builder</code> based
 372    * on the specified parser object. Custom SAX features and
 373    * properties such as schema validation can be set on this
 374    * <code>XMLReader</code> before passing it to this method.
 375    * </p>
 376    *
 377    * <p>
 378    * If the validate argument is true, then a validity error
 379    * while parsing will cause a fatal error; that is, it
 380    * will throw a <code>ParsingException</code>
 381    * </p>
 382    *
 383    * @param parser the SAX2 <code>XMLReader</code> that parses
 384    * the document
 385    * @param validate true if the parser should validate
 386    * the document while parsing
 387    *
 388    */
 389  43 public Builder(XMLReader parser, boolean validate) {
 390  43 this(parser, validate, null);
 391    }
 392   
 393   
 394    /**
 395    * <p>
 396    * Creates an optionally validating <code>Builder</code> that reads
 397    * data from the specified parser object and constructs new nodes
 398    * using the specified factory object. Custom SAX features and
 399    * properties such as schema validation can be set on this
 400    * <code>XMLReader</code> before passing it to this method.
 401    * </p>
 402    *
 403    * <p>
 404    * If the <code>validate</code> argument is true, then a validity
 405    * error while parsing will throw a <code>ParsingException</code>.
 406    * </p>
 407    *
 408    * @param parser the SAX2 <code>XMLReader</code> that parses
 409    * the document
 410    * @param validate true if the parser should validate the
 411    * document while parsing
 412    * @param factory the <code>NodeFactory</code>
 413    * this builder uses to create objects in the tree
 414    *
 415    * @throws XMLException if <code>parser</code> does not support
 416    * the features XOM requires
 417    *
 418    */
 419  1166 public Builder(
 420    XMLReader parser, boolean validate, NodeFactory factory) {
 421   
 422  1166 try {
 423  1166 setupParser(parser, validate);
 424    }
 425    catch (SAXException ex) {
 426  2 if (validate) {
 427  1 throw new XMLException(parser.getClass().getName()
 428    + " does not support validation.", ex);
 429    }
 430    else {
 431  1 throw new XMLException(parser.getClass().getName()
 432    + " does not support the entity resolution"
 433    + " features XOM requires.", ex);
 434    }
 435    }
 436   
 437    // setup the handlers
 438  1164 this.parser = parser;
 439  1164 this.factory = factory;
 440  1164 setHandlers();
 441   
 442    }
 443   
 444   
 445  1106 private static boolean knownGoodParser(XMLReader parser) {
 446   
 447  1106 String parserName = parser.getClass().getName();
 448   
 449    // In general, a filter may violate the constraints of XML 1.0.
 450    // However, I specifically trust Norm Walsh not to do that, so
 451    // if his filters are being used we look at the parent instead.
 452  1106 if (parserName.equals("org.apache.xml.resolver.tools.ResolvingXMLFilter")) {
 453  1 XMLFilter filter = (XMLFilter) parser;
 454  1 parserName = filter.getParent().getClass().getName();
 455    }
 456   
 457    // These parsers are known to not make all the checks
 458    // they're supposed to. :-(
 459  1 if (parserName.equals("gnu.xml.aelfred2.XmlReader")) return false;
 460  1 if (parserName.equals("net.sf.saxon.aelfred.SAXDriver")) return false;
 461  1 if (parserName.equals("com.icl.saxon.aelfred.SAXDriver")) return false;
 462   
 463  1103 if (parserName.equals("org.apache.xerces.parsers.SAXParser")
 464    && xercesVersion >= 2.4) {
 465  2 return false;
 466    }
 467   
 468  1101 for (int i = 0; i < parsers.length; i++) {
 469  1079 if (parserName.equals(parsers[i])) return true;
 470    }
 471  22 return false;
 472   
 473    }
 474   
 475   
 476  1164 private void setHandlers() {
 477   
 478  1164 XOMHandler handler;
 479  1164 if ((factory == null
 480    || factory.getClass().getName().equals("nu.xom.NodeFactory"))
 481    && knownGoodParser(parser)) {
 482    // If no factory is supplied by user, don't
 483    // return one
 484  1079 NodeFactory tempFactory = factory;
 485  1078 if (tempFactory == null) tempFactory = new NodeFactory();
 486  1079 handler = new NonVerifyingHandler(tempFactory);
 487    }
 488    else {
 489  27 if (factory == null) factory = new NodeFactory();
 490  85 handler = new XOMHandler(factory);
 491    }
 492  1164 parser.setContentHandler(handler);
 493  1164 parser.setDTDHandler(handler);
 494   
 495  1164 try {
 496  1164 parser.setProperty(
 497    "http://xml.org/sax/properties/lexical-handler",
 498    handler);
 499    }
 500    catch (SAXException ex) {
 501    // This parser does not support lexical events.
 502    // We can live without them, though it does mean
 503    // there won't be any comments or a DOCTYPE declaration
 504    // in the tree.
 505    }
 506   
 507  1164 try {
 508  1164 parser.setProperty(
 509    "http://xml.org/sax/properties/declaration-handler",
 510    handler);
 511    // Due to Crimson bugs in misidentifying the internal and
 512    // external DTD subsets, we only build the internal DTD
 513    // subset if there is no external DTD subset.
 514  1159 if (parser.getClass().getName().equals(
 515    "org.apache.crimson.parser.XMLReaderImpl")) {
 516  15 handler.usingCrimson = true;
 517    }
 518    }
 519    catch (SAXException ex) {
 520    // This parser does not support declaration events.
 521    // We can live without them, though it does mean
 522    // they won't be any internal DTD subset.
 523    }
 524   
 525    }
 526   
 527   
 528    /**
 529    * <p>
 530    * Parses the document at the specified URL.
 531    * </p>
 532    *
 533    * <p>
 534    * Note that relative URLs generally do not work here, as
 535    * there's no base to resolve them against. This includes
 536    * relative URLs that point into the file system, though this
 537    * is somewhat platform dependent. Furthermore, <code>file</code>
 538    * URLs often only work when they adhere exactly to RFC 2396
 539    * syntax. URLs that work in Internet Explorer often fail when
 540    * used in Java. If you're reading XML from a file, more reliable
 541    * results are obtained by using the <code>build</code> method
 542    * that takes a <code>java.io.File</code> object as an argument.
 543    * </p>
 544    *
 545    * @param systemID the URL (generally absolute)
 546    * from which the document is read.
 547    * The URL's scheme must be one supported by the Java VM.
 548    *
 549    * @return the parsed <code>Document</code>
 550    *
 551    * @throws ValidityException if a validity error is detected. This
 552    * is only thrown if the builder has been instructed to validate.
 553    * @throws ParsingException if a well-formedness error is detected
 554    * @throws IOException if an I/O error such as a broken socket
 555    * prevents the document from being fully read
 556    */
 557  1889 public Document build(String systemID)
 558    throws ParsingException, ValidityException, IOException {
 559   
 560  1889 systemID = canonicalizeURL(systemID);
 561  1889 InputSource source = new InputSource(systemID);
 562  1889 return build(source);
 563   
 564    }
 565   
 566   
 567    /**
 568    * <p>
 569    * Reads the document from an input stream.
 570    * </p>
 571    *
 572    * @param in the input stream from which the document is read
 573    *
 574    * @return the parsed <code>Document</code>
 575    *
 576    * @throws ValidityException if a validity error is detected;
 577    * only thrown if the builder has been instructed to validate
 578    * @throws ParsingException if a well-formedness error is detected
 579    * @throws IOException if an I/O error such as a broken
 580    * socket prevents the document from being fully read.
 581    */
 582  93 public Document build(InputStream in)
 583    throws ParsingException, ValidityException, IOException {
 584   
 585  93 InputSource source = new InputSource(in);
 586  93 return build(source);
 587   
 588    }
 589   
 590   
 591    /**
 592    * <p>
 593    * Reads the document from an input stream while specifying
 594    * a base URI (which need not be the stream's actual URI).
 595    * </p>
 596    *
 597    * @param in the input stream from which the document is read
 598    * @param baseURI the base URI for this document
 599    *
 600    * @return the parsed <code>Document</code>
 601    *
 602    * @throws ValidityException if a validity error is detected;
 603    * only thrown if the builder has been instructed to validate
 604    * @throws ParsingException if a well-formedness error is detected
 605    * @throws IOException if an I/O error such as a broken
 606    * socket prevents the document from being fully read
 607    */
 608  2866 public Document build(InputStream in, String baseURI)
 609    throws ParsingException, ValidityException, IOException {
 610   
 611  2866 baseURI = canonicalizeURL(baseURI);
 612  2866 InputSource source = new InputSource(in);
 613  2866 source.setSystemId(baseURI);
 614  2866 return build(source);
 615   
 616    }
 617   
 618   
 619    // Nasty hack to make sure we get the right form
 620    // of file URLs on Windows
 621    private static String fileURLPrefix = "file://";
 622   
 623    static {
 624  19 String os = System.getProperty("os.name", "Unix");
 625    // I could do System.setProperty("os.name" "Windows") to test
 626    // this, but I'd need to use a fresh ClassLoader to rerun the
 627    // static initializer block.
 628  19 if (os.indexOf("Windows") >= 0) {
 629  0 fileURLPrefix = "file:/";
 630    }
 631    }
 632   
 633   
 634    /**
 635    * <p>
 636    * Reads the document from a file.
 637    * The base URI of the document is set to the
 638    * location of the file.
 639    * </p>
 640    *
 641    * @param in the file from which the document is read
 642    *
 643    * @return the parsed <code>Document</code>
 644    *
 645    * @throws ValidityException if a validity error is detected. This
 646    * is only thrown if the builder has been instructed to validate.
 647    * @throws ParsingException if a well-formedness error is detected
 648    * @throws IOException if an I/O error such as a bad disk
 649    * prevents the file from being read
 650    */
 651  2213 public Document build(File in)
 652    throws ParsingException, ValidityException, IOException {
 653   
 654  2213 InputStream fin = new FileInputStream(in);
 655    // Java's toURL method doesn't properly escape file
 656    // names so we have to do it manually
 657  2144 String absolute = in.getAbsolutePath();
 658  2144 StringBuffer url = new StringBuffer(fileURLPrefix);
 659  2144 int length = absolute.length();
 660  2144 char separatorChar = File.separatorChar;
 661  2144 for (int i = 0; i < length; i++) {
 662  210618 char c = absolute.charAt(i);
 663  20928 if (c == separatorChar) url.append('/');
 664    else {
 665  189690 switch(c) {
 666  1 case ' ':
 667  1 url.append("%20");
 668  1 break;
 669  1 case '!':
 670  1 url.append(c);
 671  1 break;
 672  2 case '"':
 673  2 url.append("%22");
 674  2 break;
 675  1 case '#':
 676  1 url.append("%23");
 677  1 break;
 678  1 case '$':
 679  1 url.append(c);
 680  1 break;
 681  1 case '%':
 682  1 url.append("%25");
 683  1 break;
 684  1 case '&':
 685    // ampersand does not need to be encoded in
 686    // path part of URL
 687  1 url.append('&');
 688  1 break;
 689  2 case '\'':
 690  2 url.append(c);
 691  2 break;
 692  1 case '(':
 693  1 url.append(c);
 694  1 break;
 695  1 case ')':
 696  1 url.append(c);
 697  1 break;
 698  1 case '*':
 699  1 url.append(c);
 700  1 break;
 701  1 case '+':
 702  1 url.append("%2B");
 703  1 break;
 704  1 case ',':
 705  1 url.append(c);
 706  1 break;
 707  3954 case '-':
 708  3954 url.append(c);
 709  3954 break;
 710  2163 case '.':
 711  2163 url.append(c);
 712  2163 break;
 713  0 case '/':
 714  0 url.append("%2F");
 715  0 break;
 716  495 case '0':
 717  495 url.append(c);
 718  495 break;
 719  590 case '1':
 720  590 url.append(c);
 721  590 break;
 722  354 case '2':
 723  354 url.append(c);
 724  354 break;
 725  323 case '3':
 726  323 url.append(c);
 727  323 break;
 728  277 case '4':
 729  277 url.append(c);
 730  277 break;
 731  186 case '5':
 732  186 url.append(c);
 733  186 break;
 734  154 case '6':
 735  154 url.append(c);
 736  154 break;
 737  253 case '7':
 738  253 url.append(c);
 739  253 break;
 740  369 case '8':
 741  369 url.append(c);
 742  369 break;
 743  326 case '9':
 744  326 url.append(c);
 745  326 break;
 746  1 case ':':
 747  1 url.append(c);
 748  1 break;
 749  1 case ';':
 750  1 url.append(c);
 751  1 break;
 752  1 case '<':
 753  1 url.append("%3C");
 754  1 break;
 755  1 case '=':
 756  1 url.append(c);
 757  1 break;
 758  1 case '>':
 759  1 url.append("%3E");
 760  1 break;
 761  1 case '?':
 762  1 url.append("%3F");
 763  1 break;
 764  1 case '@':
 765  1 url.append("%40");
 766  1 break;
 767  371 case 'A':
 768  371 url.append(c);
 769  371 break;
 770  221 case 'B':
 771  221 url.append(c);
 772  221 break;
 773  1999 case 'C':
 774  1999 url.append(c);
 775  1999 break;
 776  36 case 'D':
 777  36 url.append(c);
 778  36 break;
 779  2555 case 'E':
 780  2555 url.append(c);
 781  2555 break;
 782  1848 case 'F':
 783  1848 url.append(c);
 784  1848 break;
 785  1 case 'G':
 786  1 url.append(c);
 787  1 break;
 788  19 case 'H':
 789  19 url.append(c);
 790  19 break;
 791  188 case 'I':
 792  188 url.append(c);
 793  188 break;
 794  1 case 'J':
 795  1 url.append(c);
 796  1 break;
 797  105 case 'K':
 798  105 url.append(c);
 799  105 break;
 800  71 case 'L':
 801  71 url.append(c);
 802  71 break;
 803  3577 case 'M':
 804  3577 url.append(c);
 805  3577 break;
 806  74 case 'N':
 807  74 url.append(c);
 808  74 break;
 809  2504 case 'O':
 810  2504 url.append(c);
 811  2504 break;
 812  2213 case 'P':
 813  2213 url.append(c);
 814  2213 break;
 815  1 case 'Q':
 816  1 url.append(c);
 817  1 break;
 818  414 case 'R':
 819  414 url.append(c);
 820  414 break;
 821  5162 case 'S':
 822  5162 url.append(c);
 823  5162 break;
 824  7421 case 'T':
 825  7421 url.append(c);
 826  7421 break;
 827  2495 case 'U':
 828  2495 url.append(c);
 829  2495 break;
 830  264 case 'V':
 831  264 url.append(c);
 832  264 break;
 833  27 case 'W':
 834  27 url.append(c);
 835  27 break;
 836  2584 case 'X':
 837  2584 url.append(c);
 838  2584 break;
 839  4 case 'Y':
 840  4 url.append(c);
 841  4 break;
 842  1 case 'Z':
 843  1 url.append(c);
 844  1 break;
 845  1 case '[':
 846  1 url.append("%5B");
 847  1 break;
 848  1 case '\\':
 849  1 url.append("%5C");
 850  1 break;
 851  1 case ']':
 852  1 url.append("%5D");
 853  1 break;
 854  1 case '^':
 855  1 url.append("%5E");
 856  1 break;
 857  4407 case '_':
 858  4407 url.append(c);
 859  4407 break;
 860  1 case '`':
 861  1 url.append("%60");
 862  1 break;
 863  12974 case 'a':
 864  12974 url.append(c);
 865  12974 break;
 866  1147 case 'b':
 867  1147 url.append(c);
 868  1147 break;
 869  4811 case 'c':
 870  4811 url.append(c);
 871  4811 break;
 872  2951 case 'd':
 873  2951 url.append(c);
 874  2951 break;
 875  17287 case 'e':
 876  17287 url.append(c);
 877  17287 break;
 878  2093 case 'f':
 879  2093 url.append(c);
 880  2093 break;
 881  119 case 'g':
 882  119 url.append(c);
 883  119 break;
 884  2316 case 'h':
 885  2316 url.append(c);
 886  2316 break;
 887  5382 case 'i':
 888  5382 url.append(c);
 889  5382 break;
 890  2144 case 'j':
 891  2144 url.append(c);
 892  2144 break;
 893  328 case 'k':
 894  328 url.append(c);
 895  328 break;
 896  7860 case 'l':
 897  7860 url.append(c);
 898  7860 break;
 899  3962 case 'm':
 900  3962 url.append(c);
 901  3962 break;
 902  5596 case 'n':
 903  5596 url.append(c);
 904  5596 break;
 905  11427 case 'o':
 906  11427 url.append(c);
 907  11427 break;
 908  965 case 'p':
 909  965 url.append(c);
 910  965 break;
 911  6 case 'q':
 912  6 url.append(c);
 913  6 break;
 914  10408 case 'r':
 915  10408 url.append(c);
 916  10408 break;
 917  22543 case 's':
 918  22543 url.append(c);
 919  22543 break;
 920  18709 case 't':
 921  18709 url.append(c);
 922  18709 break;
 923  3282 case 'u':
 924  3282 url.append(c);
 925  3282 break;
 926  191 case 'v':
 927  191 url.append(c);
 928  191 break;
 929  103 case 'w':
 930  103 url.append(c);
 931  103 break;
 932  4754 case 'x':
 933  4754 url.append(c);
 934  4754 break;
 935  282 case 'y':
 936  282 url.append(c);
 937  282 break;
 938  9 case 'z':
 939  9 url.append(c);
 940  9 break;
 941  1 case '{':
 942  1 url.append("%7B");
 943  1 break;
 944  1 case '|':
 945  1 url.append("%7C");
 946  1 break;
 947  1 case '}':
 948  1 url.append("%7D");
 949  1 break;
 950  1 case '~':
 951  1 url.append(c);
 952  1 break;
 953  3 default:
 954  3 if (c < 0xD800 || c > 0xDFFF) {
 955  3 url.append(URIUtil.percentEscape(c));
 956    }
 957  0 else if (c <= 0xDBFF) {
 958    // high surrogate; therefore we need to
 959    // grab the next half before encoding
 960  0 i++;
 961  0 try {
 962  0 char low = absolute.charAt(i);
 963  0 String character = String.valueOf(c)+String.valueOf(low);
 964  0 byte[] data = character.getBytes("UTF8");
 965    // Always exactly 4 bytes, unless the encoder is buggy
 966  0 for (int j=0; j < 4; j++) {
 967  0 url.append('%');
 968  0 String hex = Integer.toHexString(data[j]).toUpperCase();
 969  0 url.append(hex.substring(hex.length()-2));
 970    }
 971    }
 972    catch (IndexOutOfBoundsException ex) {
 973    // file name contains a high half and not a low half
 974  0 url = new StringBuffer(0);
 975  0 break;
 976    }
 977    }
 978    else {
 979    // low half not preceded by high half
 980    // Can't create a base URI
 981  0 url = new StringBuffer(0);
 982  0 break;
 983    }
 984    }
 985    }
 986    }
 987   
 988  2144 String base = url.toString();
 989  2144 try {
 990  2144 Document doc = build(fin, base);
 991  1753 return doc;
 992    }
 993    finally {
 994  2144 fin.close();
 995    }
 996   
 997    }
 998   
 999   
 1000    /**
 1001    * <p>
 1002    * Reads the document from a reader.
 1003    * </p>
 1004    *
 1005    * @param in the reader from which the document is read
 1006    *
 1007    * @return the parsed <code>Document</code>
 1008    *
 1009    * @throws ValidityException if a validity error is detected. This
 1010    * is only thrown if the builder has been instructed to validate.
 1011    * @throws ParsingException if a well-formedness error is detected
 1012    * @throws IOException if an I/O error such as a bad disk
 1013    * prevents the document from being fully read
 1014    */
 1015  35 public Document build(Reader in)
 1016    throws ParsingException, ValidityException, IOException {
 1017   
 1018  35 InputSource source = new InputSource(in);
 1019  35 return build(source);
 1020   
 1021    }
 1022   
 1023   
 1024    /**
 1025    * <p>
 1026    * Reads the document from a character stream while
 1027    * specifying a base URI.
 1028    * </p>
 1029    *
 1030    * @param in the reader from which the document
 1031    * is read
 1032    * @param baseURI the base URI for this document
 1033    *
 1034    * @return the parsed <code>Document</code>
 1035    *
 1036    * @throws ValidityException if a validity error is detected. This
 1037    * is only thrown if the builder has been instructed to
 1038    * validate.
 1039    * @throws ParsingException if a well-formedness error is detected
 1040    * @throws IOException if an I/O error such as a bad disk
 1041    * prevents the document from being completely read
 1042    */
 1043  17387 public Document build(Reader in, String baseURI)
 1044    throws ParsingException, ValidityException, IOException {
 1045   
 1046  17387 baseURI = canonicalizeURL(baseURI);
 1047  17387 InputSource source = new InputSource(in);
 1048  17387 source.setSystemId(baseURI);
 1049  17387 return build(source);
 1050   
 1051    }
 1052   
 1053   
 1054    /**
 1055    * <p>
 1056    * Reads the document from the contents of a string.
 1057    * </p>
 1058    *
 1059    * @param document the string that contains
 1060    * the XML document.
 1061    * @param baseURI the base URI for this document
 1062    *
 1063    * @return the parsed <code>Document</code>
 1064    *
 1065    * @throws ValidityException if a validity error is detected. This
 1066    * is only thrown if the builder has been instructed to
 1067    * validate.
 1068    * @throws ParsingException if a well-formedness error is detected
 1069    * @throws IOException if an I/O error such as a bad disk
 1070    * prevents the document's external DTD subset from being read
 1071    */
 1072  17383 public Document build(String document, String baseURI)
 1073    throws ParsingException, ValidityException, IOException {
 1074   
 1075  17383 Reader reader = new StringReader(document);
 1076  17383 return build(reader, baseURI);
 1077   
 1078    }
 1079   
 1080    // needed to work around a bug in Xerces and Crimson
 1081    // for URLs with no trailing slashes (no path part)
 1082    // such as http://www.cafeconleche.org
 1083  22142 private String canonicalizeURL(String uri) {
 1084   
 1085  22142 try {
 1086  22142 URL u = new URL(uri);
 1087  4833 String path = u.getFile();
 1088  4833 if (path == null || path.length() == 0
 1089    // We handle here the case where we have a URL such as
 1090    // http://www.cafeaulait.org with no trailing slash.
 1091    // Java's URL class assigns the path "/" to this case
 1092    // but does not change the URL.
 1093    || ("/".equals(path) && !(uri.endsWith("/")))) {
 1094  29 uri += '/';
 1095    }
 1096  4833 return uri;
 1097    }
 1098    catch (MalformedURLException ex) {
 1099  17309 return uri;
 1100    }
 1101    }
 1102   
 1103   
 1104    /**
 1105    * <p>
 1106    * Reads the document from a SAX <code>InputSource</code>.
 1107    * </p>
 1108    *
 1109    * @param in the input source from
 1110    * which the document is read.
 1111    *
 1112    * @return the parsed <code>Document</code>
 1113    *
 1114    * @throws ValidityException if a validity error is detected. This
 1115    * is only thrown if the builder has been instructed to
 1116    * validate.
 1117    * @throws ParsingException if a well-formedness error is detected
 1118    * @throws IOException if an I/O error such as a bad disk
 1119    * prevents the document from being read
 1120    */
 1121  22270 private Document build(InputSource in)
 1122    throws ParsingException, ValidityException, IOException {
 1123   
 1124  22270 XOMHandler handler = (XOMHandler) parser.getContentHandler();
 1125  22270 Document result = null;
 1126  22270 try {
 1127  22270 parser.parse(in);
 1128  21833 result = handler.getDocument();
 1129   
 1130    }
 1131    catch (SAXParseException ex) {
 1132  165 ParsingException pex = new ParsingException(
 1133    ex.getMessage(),
 1134    ex.getSystemId(),
 1135    ex.getLineNumber(),
 1136    ex.getColumnNumber(),
 1137    ex);
 1138  165 throw pex;
 1139    }
 1140    catch (SAXException ex) {
 1141  1 ParsingException pex
 1142    = new ParsingException(ex.getMessage(), in.getSystemId(), ex);
 1143  1 throw pex;
 1144    }
 1145    catch (XMLException ex) {
 1146  257 throw new ParsingException(ex.getMessage(), ex);
 1147    }
 1148    catch (RuntimeException ex) {
 1149    // Work-around for non-conformant parsers, especially Piccolo
 1150  7 ParsingException pex
 1151    = new ParsingException(ex.getMessage(), in.getSystemId(), ex);
 1152  7 throw pex;
 1153    }
 1154    catch (UTFDataFormatException ex) {
 1155    // Work-around for non-conformant parsers, especially Xerces
 1156    // http://nagoya.apache.org/bugzilla/show_bug.cgi?id=27583
 1157  1 ParsingException pex
 1158    = new ParsingException(ex.getMessage(), in.getSystemId(), ex);
 1159  1 throw pex;
 1160    }
 1161    catch (CharConversionException ex) {
 1162    // Work-around for non-conformant parsers, especially Xerces
 1163    // http://nagoya.apache.org/bugzilla/show_bug.cgi?id=27583
 1164  1 ParsingException pex
 1165    = new ParsingException(ex.getMessage(), in.getSystemId(), ex);
 1166  1 throw pex;
 1167    }
 1168    catch (IOException ex) {
 1169    // Work-around for Xerces; I don't want to just catch
 1170    // org.apache.xerces.util.URI.MalformedURIException
 1171    // because that would introduce a dependence on Xerces
 1172  5 if (ex.getClass().getName().equals(
 1173    "org.apache.xerces.util.URI$MalformedURIException")) {
 1174  1 throw new ParsingException(ex.getMessage(), in.getSystemId(), ex);
 1175    }
 1176    else {
 1177  4 throw ex;
 1178    }
 1179    }
 1180    finally {
 1181  22270 handler.freeMemory();
 1182    }
 1183   
 1184  21833 if (result == null) {
 1185  1 ParsingException ex = new ParsingException(
 1186    "Parser did not build document",
 1187    in.getSystemId(), -1, -1
 1188    );
 1189  1 throw ex;
 1190    }
 1191   
 1192  21832 if ("".equals(result.getBaseURI())) {
 1193  17395 result.setBaseURI(in.getSystemId());
 1194    }
 1195   
 1196  21832 ErrorHandler errorHandler = parser.getErrorHandler();
 1197  21832 if (errorHandler instanceof ValidityRequired) {
 1198  19 ValidityRequired validityHandler
 1199    = (ValidityRequired) errorHandler;
 1200  19 if (!validityHandler.isValid()) {
 1201  9 ValidityException vex = validityHandler.vexception;
 1202  9 vex.setDocument(result);
 1203  9 validityHandler.reset();
 1204  9 throw vex;
 1205    }
 1206    }
 1207  21823 return result;
 1208   
 1209    }
 1210   
 1211   
 1212    private static class ValidityRequired implements ErrorHandler {
 1213   
 1214    ValidityException vexception = null;
 1215   
 1216  9 void reset() {
 1217  9 vexception = null;
 1218    }
 1219   
 1220  1 public void warning(SAXParseException exception) {
 1221    // ignore warnings
 1222    }
 1223   
 1224  83 public void error(SAXParseException exception) {
 1225   
 1226  83 if (vexception == null) {
 1227  12 vexception = new ValidityException(
 1228    exception.getMessage(),
 1229    exception.getSystemId(),
 1230    exception.getLineNumber(),
 1231    exception.getColumnNumber(),
 1232    exception);
 1233    }
 1234  83 vexception.addError(exception);
 1235    }
 1236   
 1237  2 public void fatalError(SAXParseException exception)
 1238    throws SAXParseException {
 1239  2 throw exception;
 1240    }
 1241   
 1242  19 boolean isValid() {
 1243  19 return vexception == null;
 1244    }
 1245   
 1246    }
 1247   
 1248   
 1249    // Because Crimson doesn't report namespace errors as fatal
 1250    private static class NamespaceWellformednessRequired
 1251    implements ErrorHandler {
 1252   
 1253  1 public void warning(SAXParseException exception) {
 1254    // ignore warnings
 1255    }
 1256   
 1257  4 public void error(SAXParseException exception)
 1258    throws SAXParseException {
 1259   
 1260  4 if (exception.getMessage().equals("Illegal Namespace prefix: \"xml\".")) {
 1261  2 return;
 1262    }
 1263   
 1264  2 throw exception;
 1265   
 1266    }
 1267   
 1268  1 public void fatalError(SAXParseException exception)
 1269    throws SAXParseException {
 1270  1 throw exception;
 1271    }
 1272   
 1273    }
 1274   
 1275   
 1276    // I added this because XIncluder needed it.
 1277    /**
 1278    * <p>
 1279    * Returns this builder's <code>NodeFactory</code>. It may return
 1280    * null if a factory was not supplied when the builder was created.
 1281    * </p>
 1282    *
 1283    * @return the node factory that was specified in the constructor
 1284    */
 1285  47 public NodeFactory getNodeFactory() {
 1286  47 return factory;
 1287    }
 1288   
 1289   
 1290    }