Clover coverage report - Clover results for XOM 1.2d1
Coverage timestamp: Wed Feb 8 2006 08:31:33 EST
file stats: LOC: 1,307   Methods: 40
NCLOC: 437   Classes: 1
 
 Source file Conditionals Statements Methods TOTAL
Serializer.java 95.9% 97.2% 100% 97.1%
coverage coverage
 1    /* Copyright 2002-2006 Elliotte Rusty Harold
 2   
 3    This library is free software; you can redistribute it and/or modify
 4    it under the terms of version 2.1 of the GNU Lesser General Public
 5    License as published by the Free Software Foundation.
 6   
 7    This library is distributed in the hope that it will be useful,
 8    but WITHOUT ANY WARRANTY; without even the implied warranty of
 9    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 10    GNU Lesser General Public License for more details.
 11   
 12    You should have received a copy of the GNU Lesser General Public
 13    License along with this library; if not, write to the
 14    Free Software Foundation, Inc., 59 Temple Place, Suite 330,
 15    Boston, MA 02111-1307 USA
 16   
 17    You can contact Elliotte Rusty Harold by sending e-mail to
 18    elharo@metalab.unc.edu. Please include the word "XOM" in the
 19    subject line. The XOM home page is located at http://www.xom.nu/
 20    */
 21   
 22    package nu.xom;
 23   
 24    import java.io.IOException;
 25    import java.io.OutputStream;
 26    import java.io.OutputStreamWriter;
 27    import java.io.UnsupportedEncodingException;
 28    import java.io.Writer;
 29    import java.util.Locale;
 30    import org.xml.sax.helpers.NamespaceSupport;
 31   
 32    /**
 33    * <p>
 34    * Outputs a <code>Document</code> object in a specific encoding using
 35    * various options for controlling white space, normalization,
 36    * indenting, line breaking, and base URIs. However, in general these
 37    * options do affect the document's infoset. In particular, if you set
 38    * either the maximum line length or the indent size to a positive
 39    * value, then the serializer will not respect input white space. It
 40    * may trim leading and trailing space, condense runs of white
 41    * space to a single space, convert carriage returns and linefeeds
 42    * to spaces, add extra space where none was present before,
 43    * and otherwise muck with the document's white space.
 44    * The defaults, however, preserve all significant white space
 45    * including ignorable white space and boundary white space.
 46    * </p>
 47    *
 48    * @author Elliotte Rusty Harold
 49    * @version 1.2d1
 50    *
 51    */
 52    public class Serializer {
 53   
 54    private TextWriter escaper;
 55    private boolean preserveBaseURI = false;
 56    // ???? reset when exception is thrown?
 57    private NamespaceSupport namespaces = new NamespaceSupport();
 58   
 59   
 60    /**
 61    * <p>
 62    * Create a new serializer that uses the UTF-8 encoding.
 63    * </p>
 64    *
 65    * @param out the output stream to write the document on
 66    *
 67    * @throws NullPointerException if <code>out</code> is null
 68    */
 69  19061 public Serializer(OutputStream out) {
 70   
 71  19061 try {
 72  19061 this.setOutputStream(out, "UTF-8");
 73    }
 74    catch (UnsupportedEncodingException ex) {
 75  0 throw new RuntimeException(
 76    "The VM is broken. It does not understand UTF-8.");
 77    }
 78   
 79    }
 80   
 81   
 82    /**
 83    * <p>
 84    * Create a new serializer that uses the specified encoding.
 85    * The encoding must be recognized by the Java virtual machine. If
 86    * you attempt to use an encoding that the local Java virtual
 87    * machine does not support, the constructor will throw an
 88    * <code>UnsupportedEncodingException</code>.
 89    * Currently the following encodings are recognized by XOM:
 90    * </p>
 91    *
 92    * <ul>
 93    * <li>UTF-8</li>
 94    * <li>UTF-16</li>
 95    * <li>UTF-16BE</li>
 96    * <li>UTF-16LE</li>
 97    * <li>ISO-10646-UCS-2</li>
 98    * <li>ISO-8859-1</li>
 99    * <li>ISO-8859-2</li>
 100    * <li>ISO-8859-3</li>
 101    * <li>ISO-8859-4</li>
 102    * <li>ISO-8859-5</li>
 103    * <li>ISO-8859-6</li>
 104    * <li>ISO-8859-7</li>
 105    * <li>ISO-8859-8</li>
 106    * <li>ISO-8859-9</li>
 107    * <li>ISO-8859-10</li>
 108    * <li>ISO-8859-11 (a.k.a. TIS-620)</li>
 109    * <li>ISO-8859-13</li>
 110    * <li>ISO-8859-14</li>
 111    * <li>ISO-8859-15</li>
 112    * <li>ISO-8859-16</li>
 113    * <li>IBM037 (a.k.a. CP037, EBCDIC-CP-US, EBCDIC-CP-CA,
 114    * EBCDIC-CP-WA, EBCDIC-CP-NL, and CSIBM037)</li>
 115    * <li>GB18030</li>
 116    * </ul>
 117    *
 118    * <p>
 119    * You can use encodings not in this list if the virtual
 120    * machine supports them. However, they may be
 121    * significantly slower than the encodings in this list.
 122    * </p>
 123    *
 124    * <p>
 125    * I've noticed Java has significant bugs in its handling of some
 126    * of these encodings. In some cases such as 0x80 in Big5, XOM
 127    * will escape a character that should not need to be escaped
 128    * because Java can't output that character in the specified
 129    * encoding, even though the output character set does contain it.
 130    * :-(
 131    * </p>
 132    *
 133    * @param out the output stream to write the document on
 134    * @param encoding the character encoding for the serialization
 135   
 136    * @throws NullPointerException if <code>out</code>
 137    * or <code>encoding</code> is null
 138    * @throws UnsupportedEncodingException if the VM does not
 139    * support the requested encoding
 140    *
 141    */
 142  135 public Serializer(OutputStream out, String encoding)
 143    throws UnsupportedEncodingException {
 144   
 145  135 if (encoding == null) {
 146  1 throw new NullPointerException("Null encoding");
 147    }
 148  134 this.setOutputStream(out, encoding);
 149   
 150    }
 151   
 152   
 153    /**
 154    * <p>
 155    * Flushes the previous output stream and
 156    * redirects further output to the new output stream.
 157    * </p>
 158    *
 159    *
 160    * @param out the output stream to write the document on
 161   
 162    * @throws NullPointerException if <code>out</code> is null
 163    * @throws IOException if the previous output stream
 164    * encounters an I/O error when flushed
 165    *
 166    */
 167  35 public void setOutputStream(OutputStream out)
 168    throws IOException {
 169   
 170    // flush any data onto the old output stream
 171  35 this.flush();
 172  35 int maxLength = getMaxLength();
 173  35 int indent = this.getIndent();
 174  35 String lineSeparator = getLineSeparator();
 175  35 boolean nfc = getUnicodeNormalizationFormC();
 176  35 String encoding = escaper.getEncoding();
 177  35 boolean lineSeparatorSet = escaper.lineSeparatorSet;
 178  35 setOutputStream(out, encoding);
 179  35 setIndent(indent);
 180  35 setMaxLength(maxLength);
 181  35 setUnicodeNormalizationFormC(nfc);
 182  1 if (lineSeparatorSet) setLineSeparator(lineSeparator);
 183   
 184    }
 185   
 186   
 187  19230 private void setOutputStream(OutputStream out, String encoding)
 188    throws UnsupportedEncodingException {
 189   
 190  19230 if (out == null) {
 191  2 throw new NullPointerException("Null OutputStream");
 192    }
 193  19228 Writer writer;
 194  19228 String encodingUpperCase = encoding.toUpperCase(Locale.ENGLISH);
 195  19228 if (encodingUpperCase.equals("UTF-8")) {
 196  19155 writer = new OutputStreamWriter(out, "UTF-8");
 197    }
 198  73 else if (encodingUpperCase.equals("UTF-16")
 199    || encodingUpperCase.equals("ISO-10646-UCS-2")) {
 200    // For compatibility with Java 1.2 and earlier
 201  4 writer = new OutputStreamWriter(out, "UnicodeBig");
 202    }
 203    // Java's Cp037 encoding is broken, so we have to
 204    // provide our own.
 205  69 else if (encodingUpperCase.equals("IBM037")
 206    || encodingUpperCase.equals("CP037")
 207    || encodingUpperCase.equals("EBCDIC-CP-US")
 208    || encodingUpperCase.equals("EBCDIC-CP-CA")
 209    || encodingUpperCase.equals("EBCDIC-CP-WA")
 210    || encodingUpperCase.equals("EBCDIC-CP-NL")
 211    || encodingUpperCase.equals("CSIBM037")) {
 212  1 writer = new EBCDICWriter(out);
 213    }
 214  68 else if (encodingUpperCase.equals("ISO-8859-11")
 215    || encodingUpperCase.equals("TIS-620")) {
 216    // Java doesn't recognize the name ISO-8859-11 and
 217    // Java 1.3 and earlier don't recognize TIS-620
 218  1 writer = new OutputStreamWriter(out, "TIS620");
 219    }
 220  67 else writer = new OutputStreamWriter(out, encoding);
 221   
 222  19228 writer = new UnsynchronizedBufferedWriter(writer);
 223  19228 this.escaper = TextWriterFactory.getTextWriter(writer, encoding);
 224   
 225    }
 226   
 227   
 228    /**
 229    * <p>
 230    * Serializes a document onto the output
 231    * stream using the current options.
 232    * </p>
 233    *
 234    * @param doc the <code>Document</code> to serialize
 235    *
 236    * @throws IOException if the underlying output stream
 237    * encounters an I/O error
 238    * @throws NullPointerException if <code>doc</code> is null
 239    * @throws UnavailableCharacterException if the document contains
 240    * an unescapable character (e.g. in an element name) that is
 241    * not available in the current encoding
 242    */
 243  17949 public void write(Document doc) throws IOException {
 244   
 245  17949 escaper.reset();
 246  17949 namespaces.reset();
 247  17949 namespaces.declarePrefix("", "");
 248    // The OutputStreamWriter automatically inserts
 249    // the byte order mark if necessary.
 250  17949 writeXMLDeclaration();
 251  17949 int childCount = doc.getChildCount();
 252  17948 for (int i = 0; i < childCount; i++) {
 253  18837 writeChild(doc.getChild(i));
 254   
 255    // Might want to remove this line break in a
 256    // non-XML serializer where it's not guaranteed to be
 257    // OK to add extra line breaks in the prolog
 258  18835 escaper.breakLine();
 259    }
 260  17946 escaper.flush();
 261   
 262    }
 263   
 264   
 265    /**
 266    * <p>
 267    * Writes the XML declaration onto the output stream,
 268    * followed by a line break.
 269    * </p>
 270    *
 271    * @throws IOException if the underlying output stream
 272    * encounters an I/O error
 273    */
 274  17949 protected void writeXMLDeclaration() throws IOException {
 275   
 276  17949 escaper.writeUncheckedMarkup("<?xml version=\"1.0\" encoding=\"");
 277  17949 escaper.writeMarkup(escaper.getEncoding());
 278  17949 escaper.writeUncheckedMarkup("\"?>");
 279  17949 escaper.breakLine();
 280   
 281    }
 282   
 283   
 284    /**
 285    * <p>
 286    * Serializes an element onto the output stream using the current
 287    * options. The result is guaranteed to be well-formed.
 288    * </p>
 289    *
 290    * <p>
 291    * If the element is empty, this method invokes
 292    * <code>writeEmptyElementTag</code>. If the element is not
 293    * empty, then:
 294    * </p>
 295    *
 296    * <ol>
 297    * <li>It calls <code>writeStartTag</code>.</li>
 298    * <li>It passes each of the element's children to
 299    * <code>writeChild</code> in order.</li>
 300    * <li>It calls <code>writeEndTag</code>.</li>
 301    * </ol>
 302    *
 303    * <p>
 304    * It may break lines or add white space if the serializer has
 305    * been configured to indent or use a maximum line length.
 306    * </p>
 307    *
 308    * @param element the <code>Element</code> to serialize
 309    *
 310    * @throws IOException if the underlying output stream
 311    * encounters an I/O error
 312    * @throws UnavailableCharacterException if the element name
 313    * contains a character that is not available in the
 314    * current encoding
 315    */
 316  1688798 protected void write(Element element) throws IOException {
 317   
 318    // workaround for case where only children are empty text nodes
 319  1688798 boolean hasRealChildren = false;
 320  1688798 int childCount = element.getChildCount();
 321  1688798 for (int i = 0; i < childCount; i++) {
 322  1687770 Node child = element.getChild(i);
 323  1687770 if (child.isText()) {
 324  1685488 Text t = (Text) child;
 325  4 if (t.isEmpty()) continue;
 326    }
 327  1687766 hasRealChildren = true;
 328  1687766 break;
 329    }
 330   
 331  1688798 if (hasRealChildren) {
 332  1687766 boolean wasPreservingWhiteSpace = escaper.isPreserveSpace();
 333  1687766 writeStartTag(element);
 334   
 335    // children
 336  1687766 for (int i = 0; i < childCount; i++) {
 337  3370013 Node child = element.getChild(i);
 338    // need to work around a very tricky case here where
 339    // denormalized characters cross boundaries of
 340    // consecutive text nodes
 341  3370013 if (escaper.getNFC() && child.isText()) {
 342  17152 Text t = (Text) child;
 343  17152 while (i < childCount-1) { // not the last node
 344  1 Node next = element.getChild(i+1);
 345  1 if (next.isText()) {
 346  1 t = new Text(t.getValue() + next.getValue());
 347  1 i++;
 348    }
 349  0 else break;
 350    }
 351  17152 writeChild(t);
 352    }
 353    else {
 354  3352861 writeChild(child);
 355    }
 356    }
 357  1687766 writeEndTag(element);
 358   
 359    // restore parent value
 360  1687766 escaper.setPreserveSpace(wasPreservingWhiteSpace);
 361    }
 362    else {
 363  1032 writeEmptyElementTag(element);
 364    }
 365   
 366    }
 367   
 368   
 369  37 private boolean hasNonTextChildren(Element element) {
 370   
 371  37 int childCount = element.getChildCount();
 372  37 for (int i = 0; i < childCount; i++) {
 373  25 if (! element.getChild(i).isText()) return true;
 374    }
 375  12 return false;
 376   
 377    }
 378   
 379   
 380    // writeEndTag should not normally throw UnavailableCharacterException
 381    // because that would already have been thrown for the
 382    // corresponding start-tag.
 383    /**
 384    * <p>
 385    * Writes the end-tag for an element in the form
 386    * <code>&lt;/<i>name</i>&gt;</code>.
 387    * </p>
 388    *
 389    * @param element the element whose end-tag is written
 390    *
 391    * @throws IOException if the underlying output stream
 392    * encounters an I/O error
 393    */
 394  1687772 protected void writeEndTag(Element element) throws IOException {
 395   
 396  1687772 escaper.decrementIndent();
 397  1687772 if (escaper.getIndent() > 0 && !escaper.isPreserveSpace()) {
 398  37 if (hasNonTextChildren(element)) {
 399  25 escaper.breakLine();
 400    }
 401    }
 402  1687772 escaper.write('<');
 403  1687772 escaper.write('/');
 404  1687772 escaper.writeMarkup(element.getQualifiedName());
 405  1687772 escaper.write('>');
 406  1687772 namespaces.popContext();
 407   
 408    }
 409   
 410   
 411    /**
 412    *
 413    * <p>
 414    * Writes the start-tag for the element including
 415    * all its namespace declarations and attributes.
 416    * </p>
 417    *
 418    * <p>
 419    * The <code>writeAttributes</code> method is called to write
 420    * all the non-namespace-declaration attributes.
 421    * The <code>writeNamespaceDeclarations</code> method
 422    * is called to write all the namespace declaration attributes.
 423    * </p>
 424    *
 425    * @param element the element whose start-tag is written
 426    *
 427    * @throws IOException if the underlying output stream
 428    * encounters an I/O error
 429    * @throws UnavailableCharacterException if the name of the element
 430    * or the name of any of its attributes contains a character
 431    * that is not available in the current encoding
 432    */
 433  1687772 protected void writeStartTag(Element element) throws IOException {
 434   
 435  1687772 writeTagBeginning(element);
 436  1687772 escaper.write('>');
 437  1687772 escaper.incrementIndent();
 438  1687772 String xmlSpaceValue = element.getAttributeValue(
 439    "space", "http://www.w3.org/XML/1998/namespace");
 440  1687772 if (xmlSpaceValue != null) {
 441  204 if ("preserve".equals(xmlSpaceValue)){
 442  201 escaper.setPreserveSpace(true);
 443    }
 444  3 else if ("default".equals(xmlSpaceValue)){
 445  2 escaper.setPreserveSpace(false);
 446    }
 447    }
 448   
 449    }
 450   
 451   
 452    /**
 453    *
 454    * <p>
 455    * Writes an empty-element tag for the element
 456    * including all its namespace declarations and attributes.
 457    * </p>
 458    *
 459    * <p>
 460    * The <code>writeAttributes</code> method is called to write
 461    * all the non-namespace-declaration attributes.
 462    * The <code>writeNamespaceDeclarations</code> method
 463    * is called to write all the namespace declaration attributes.
 464    * </p>
 465    *
 466    * <p>
 467    * If subclasses don't wish empty-element tags to be used,
 468    * they can override this method to simply invoke
 469    * <code>writeStartTag</code> followed by
 470    * <code>writeEndTag</code>.
 471    * </p>
 472    *
 473    * @param element the element whose empty-element tag is written
 474    *
 475    * @throws IOException if the underlying output stream
 476    * encounters an I/O error
 477    * @throws UnavailableCharacterException if the name of the element or the name of
 478    * any of its attributes contains a character that is not
 479    * available in the current encoding
 480    */
 481  1032 protected void writeEmptyElementTag(Element element)
 482    throws IOException {
 483  1032 writeTagBeginning(element);
 484  1031 escaper.write('/');
 485  1031 escaper.write('>');
 486  1031 namespaces.popContext();
 487    }
 488   
 489   
 490    // This just extracts the commonality between writeStartTag
 491    // and writeEmptyElementTag
 492  1688804 private void writeTagBeginning(Element element)
 493    throws IOException {
 494   
 495  1688804 namespaces.pushContext();
 496   
 497  1688804 if (escaper.isIndenting()
 498    && !escaper.isPreserveSpace()
 499    && !escaper.justBroke()) {
 500  24 escaper.breakLine();
 501    }
 502  1688804 escaper.write('<');
 503  1688804 escaper.writeMarkup(element.getQualifiedName());
 504  1688803 writeAttributes(element);
 505  1688803 writeNamespaceDeclarations(element);
 506   
 507    }
 508   
 509   
 510    /**
 511    * <p>
 512    * Writes all the attributes of the specified
 513    * element onto the output stream, one at a time, separated
 514    * by white space. If preserveBaseURI is true, and it is
 515    * necessary to add an <code>xml:base</code> attribute
 516    * to the element in order to preserve the base URI, then
 517    * that attribute is also written here.
 518    * Each individual attribute is written by invoking
 519    * <code>write(Attribute)</code>.
 520    * </p>
 521    *
 522    * @param element the <code>Element</code> whose attributes are
 523    * written
 524    * @throws IOException if the underlying output stream
 525    * encounters an I/O error
 526    * @throws UnavailableCharacterException if the name of any of
 527    * the element's attributes contains a character that is not
 528    * available in the current encoding
 529    */
 530  1688803 protected void writeAttributes(Element element)
 531    throws IOException {
 532   
 533    // check to see if we need an xml:base attribute
 534  1688803 if (preserveBaseURI) {
 535  8 ParentNode parent = element.getParent();
 536  8 if (element.getAttribute("base",
 537    "http://www.w3.org/XML/1998/namespace") == null) {
 538  6 String baseValue = element.getBaseURI();
 539  6 if (parent == null
 540    || parent.isDocument()
 541    || !element.getBaseURI()
 542    .equals(parent.getBaseURI())) {
 543   
 544  4 escaper.write(' ');
 545  4 Attribute baseAttribute = new Attribute(
 546    "xml:base",
 547    "http://www.w3.org/XML/1998/namespace",
 548    baseValue);
 549  4 write(baseAttribute);
 550    }
 551    }
 552    }
 553   
 554  1688803 int attributeCount = element.getAttributeCount();
 555  1688803 for (int i = 0; i < attributeCount; i++) {
 556  1836749 Attribute attribute = element.getAttribute(i);
 557  1836749 escaper.write(' ');
 558  1836749 write(attribute);
 559    }
 560    }
 561   
 562   
 563    /**
 564    * <p>
 565    * Writes all the namespace declaration
 566    * attributes of the specified element onto the output stream,
 567    * one at a time, separated by white space. Each individual
 568    * declaration is written by invoking
 569    * <code>writeNamespaceDeclaration</code>.
 570    * </p>
 571    *
 572    * @param element the <code>Element</code> whose namespace
 573    * declarations are written
 574    * @throws IOException if the underlying output stream
 575    * encounters an I/O error
 576    * @throws UnavailableCharacterException if any of the element's
 577    * namespace prefixes contains a character that is not
 578    * available in the current encoding
 579    */
 580  1688803 protected void writeNamespaceDeclarations(Element element)
 581    throws IOException {
 582   
 583  1688803 String prefix = element.getNamespacePrefix();
 584  1688803 if (!("xml".equals(prefix))) {
 585  1688803 writeNamespaceDeclarationIfNecessary(prefix, element.getNamespaceURI());
 586    }
 587   
 588    // write attribute namespaces
 589  1688803 int attCount = element.getAttributeCount();
 590  1688803 for (int i = 0; i < attCount; i++) {
 591  1836749 Attribute att = element.getAttribute(i);
 592  1836749 String attPrefix = att.getNamespacePrefix();
 593  1836749 if (attPrefix.length() != 0 && !("xml".equals(attPrefix))) {
 594  3 writeNamespaceDeclarationIfNecessary(attPrefix, att.getNamespaceURI());
 595    }
 596    }
 597   
 598    // write additional namespaces
 599  1688803 Namespaces namespaces = element.namespaces;
 600  1688803 if (namespaces == null) return;
 601  0 int namespaceCount = namespaces.size();
 602  0 for (int i = 0; i < namespaceCount; i++) {
 603  0 String additionalPrefix = namespaces.getPrefix(i);
 604  0 String uri = namespaces.getURI(additionalPrefix);
 605  0 writeNamespaceDeclarationIfNecessary(additionalPrefix, uri);
 606    }
 607   
 608    }
 609   
 610   
 611  1688806 private void writeNamespaceDeclarationIfNecessary(String prefix, String uri)
 612    throws IOException {
 613   
 614  1688806 String currentValue = namespaces.getURI(prefix);
 615    // NamespaceSupport returns null for no namespace, not the
 616    // empty string like XOM does
 617  1688806 if (currentValue == null && "".equals(uri)) {
 618  1688727 return;
 619    }
 620  79 else if (uri.equals(currentValue)) {
 621  27 return;
 622    }
 623   
 624  52 escaper.write(' ');
 625  52 writeNamespaceDeclaration(prefix, uri);
 626   
 627    }
 628   
 629   
 630    /**
 631    * <p>
 632    * Writes a namespace declaration in the form
 633    * <code>xmlns:<i>prefix</i>="<i>uri</i>"</code> or
 634    * <code>xmlns="<i>uri</i>"</code>. It does not write
 635    * the spaces on either side of the namespace declaration.
 636    * These are written by <code>writeNamespaceDeclarations</code>.
 637    * </p>
 638    *
 639    * @param prefix the namespace prefix; the empty string for the
 640    * default namespace
 641    * @param uri the namespace URI
 642    *
 643    * @throws IOException if the underlying output stream
 644    * encounters an I/O error
 645    * @throws UnavailableCharacterException if the namespace prefix contains a
 646    * character that is not available in the current encoding
 647    */
 648  190 protected void writeNamespaceDeclaration(String prefix, String uri)
 649    throws IOException {
 650   
 651  190 namespaces.declarePrefix(prefix, uri);
 652  190 if ("".equals(prefix)) {
 653  97 escaper.writeUncheckedMarkup("xmlns");
 654    }
 655    else {
 656  93 escaper.writeUncheckedMarkup("xmlns:");
 657  93 escaper.writeMarkup(prefix);
 658    }
 659  190 escaper.write('=');
 660  190 escaper.write('"');
 661  190 escaper.writePCDATA(uri);
 662  190 escaper.write('"');
 663   
 664    }
 665   
 666   
 667    /**
 668    * <p>
 669    * Writes an attribute in the form
 670    * <code><i>name</i>="<i>value</i>"</code>.
 671    * Characters in the attribute value are escaped as necessary.
 672    * </p>
 673    *
 674    * @param attribute the <code>Attribute</code> to write
 675    *
 676    * @throws IOException if the underlying output stream
 677    * encounters an I/O error
 678    * @throws UnavailableCharacterException if the attribute name contains a character
 679    * that is not available in the current encoding
 680    *
 681    */
 682  1836753 protected void write(Attribute attribute) throws IOException {
 683  1836753 escaper.writeMarkup(attribute.getQualifiedName());
 684  1836753 escaper.write('=');
 685  1836753 escaper.write('"');
 686  1836753 escaper.writeAttributeValue(attribute.getValue());
 687  1836753 escaper.write('"');
 688    }
 689   
 690   
 691    /**
 692    * <p>
 693    * Writes a comment onto the output stream using the current
 694    * options. Since character and entity references are not resolved
 695    * in comments, comments can only be serialized when all
 696    * characters they contain are available in the current
 697    * encoding.
 698    * </p>
 699    *
 700    * @param comment the <code>Comment</code> to serialize
 701    *
 702    * @throws IOException if the underlying output stream
 703    * encounters an I/O error
 704    * @throws UnavailableCharacterException if the comment contains a
 705    * character that is not available in the current encoding
 706    */
 707  2828 protected void write(Comment comment) throws IOException {
 708  7 if (escaper.isIndenting()) escaper.breakLine();
 709  2828 escaper.writeMarkup("<!--");
 710  2828 escaper.writeMarkup(comment.getValue());
 711  2828 escaper.writeMarkup("-->");
 712    }
 713   
 714   
 715    /**
 716    * <p>
 717    * Writes a processing instruction
 718    * onto the output stream using the current options.
 719    * Since character and entity references are not resolved
 720    * in processing instructions, processing instructions
 721    * can only be serialized when all
 722    * characters they contain are available in the current
 723    * encoding.
 724    * </p>
 725    *
 726    * @param instruction the <code>ProcessingInstruction</code>
 727    * to serialize
 728    *
 729    * @throws IOException if the underlying output stream
 730    * encounters an I/O error
 731    * @throws UnavailableCharacterException if the comment contains a
 732    * character that is not available in the current encoding
 733    */
 734  190 protected void write(ProcessingInstruction instruction)
 735    throws IOException {
 736   
 737  1 if (escaper.isIndenting()) escaper.breakLine();
 738  190 escaper.writeUncheckedMarkup("<?");
 739  190 escaper.writeMarkup(instruction.getTarget());
 740  190 String value = instruction.getValue();
 741    // for canonical XML, only output a space after the target
 742    // if there is a value
 743  190 if (!"".equals(value)) {
 744  141 escaper.write(' ');
 745  141 escaper.writeMarkup(value);
 746    }
 747  190 escaper.writeMarkup("?>");
 748   
 749    }
 750   
 751    /**
 752    * <p>
 753    * Writes a <code>Text</code> object
 754    * onto the output stream using the current options.
 755    * Reserved characters such as &lt;, &gt; and "
 756    * are escaped using the standard entity references
 757    * such as <code>&amp;lt;</code>, <code>&amp;gt;</code>,
 758    * and <code>&amp;quot;</code>.
 759    * </p>
 760    *
 761    * <p>
 762    * Characters which cannot be encoded in the current character set
 763    * (for example, &Omega; in ISO-8859-1) are encoded using
 764    * character references.
 765    * </p>
 766    *
 767    * @param text the <code>Text</code> to serialize
 768    *
 769    * @throws IOException if the underlying output stream
 770    * encounters an I/O error
 771    */
 772  1698452 protected void write(Text text) throws IOException {
 773   
 774    // XXX Is there a shortcut that takes advantage of the
 775    // data being stored in UTF-8 here? perhaps even if only
 776    // when serializing to UTF-8?
 777  1698452 String value = text.getValue();
 778  1698452 if (text.isCDATASection()
 779    && value.indexOf("]]>") == -1) {
 780  91 if (!(escaper instanceof UnicodeWriter)) {
 781  2 int length = value.length();
 782  2 for (int i = 0; i < length; i++) {
 783  5 if (escaper.needsEscaping(value.charAt(i))) {
 784    // can't use CDATA section
 785  1 escaper.writePCDATA(value);
 786  1 return;
 787    }
 788    }
 789    }
 790  90 escaper.writeUncheckedMarkup("<![CDATA[");
 791  90 escaper.writeMarkup(value);
 792  90 escaper.writeUncheckedMarkup("]]>");
 793    }
 794    // is this boundary whitespace we can ignore?
 795  1698361 else if (isBoundaryWhitespace(text, value)) {
 796  8 return; // without writing node
 797    }
 798    else {
 799  1698353 escaper.writePCDATA(value);
 800    }
 801   
 802    }
 803   
 804   
 805  1698361 private boolean isBoundaryWhitespace(Text text, String value) {
 806   
 807  1698340 if (getIndent() <= 0) return false;
 808   
 809  21 ParentNode parent = text.getParent();
 810  21 if (parent == null) {
 811  2 return "".equals(value.trim());
 812    }
 813   
 814    // ???? cutting next line only breaks a few tests; and what it does
 815    // break might be better off if the breakage is accepted as correct behavior
 816  19 int childCount = parent.getChildCount();
 817  9 if (childCount == 1) return false;
 818  1 if (! "".equals(value.trim())) return false;
 819   
 820    // ???? This is a huge Hotspot. maybe 12% of serialization time
 821    // when indenting. Is there any way to eliminate this?
 822    // We only actually need to test a couple of positions, 0 and
 823    // parent.getChildCount()-1
 824    // Instead of getting position we could get those two elements and compare
 825    // to the text. But you still need the previous and next
 826  9 int position = parent.indexOf(text);
 827   
 828  9 Node previous = null;
 829  9 Node next = null;
 830   
 831  5 if (position != 0) previous = parent.getChild(position-1);
 832  9 if (position != childCount-1) {
 833  5 next = parent.getChild(position+1);
 834    }
 835  9 if (previous == null || !previous.isText()) {
 836  8 if (next == null || !next.isText()) {
 837  7 return true;
 838    }
 839    }
 840   
 841  2 return false;
 842   
 843    }
 844   
 845   
 846    /**
 847    * <p>
 848    * Writes a <code>DocType</code> object
 849    * onto the output stream using the current options.
 850    * </p>
 851    *
 852    * @param doctype the document type declaration to serialize
 853    *
 854    * @throws IOException if the underlying
 855    * output stream encounters an I/O error
 856    * @throws UnavailableCharacterException if the document type
 857    * declaration contains a character that is not available
 858    * in the current encoding
 859    */
 860  552 protected void write(DocType doctype) throws IOException {
 861   
 862  552 escaper.writeUncheckedMarkup("<!DOCTYPE ");
 863  552 escaper.writeMarkup(doctype.getRootElementName());
 864  552 if (doctype.getPublicID() != null) {
 865  8 escaper.writeMarkup(" PUBLIC \"" + doctype.getPublicID()
 866    + "\" \"" + doctype.getSystemID() + "\"");
 867    }
 868  544 else if (doctype.getSystemID() != null) {
 869  146 escaper.writeMarkup(
 870    " SYSTEM \"" + doctype.getSystemID() + "\"");
 871    }
 872   
 873  552 String internalDTDSubset = doctype.getInternalDTDSubset();
 874  552 if (!internalDTDSubset.equals("")) {
 875  443 escaper.writeUncheckedMarkup(" [");
 876  443 escaper.breakLine();
 877  443 escaper.setInDocType(true);
 878  443 escaper.writeMarkup(internalDTDSubset);
 879  442 escaper.setInDocType(false);
 880  442 escaper.write(']');
 881    }
 882   
 883  551 escaper.write('>');
 884   
 885    }
 886   
 887   
 888    /**
 889    * <p>
 890    * Writes a child node onto the output stream using the
 891    * current options. It is invoked when walking the tree to
 892    * serialize the entire document. It is not called, and indeed
 893    * should not be called, for either the <code>Document</code>
 894    * node or for attributes.
 895    * </p>
 896    *
 897    * @param node the <code>Node</code> to serialize
 898    *
 899    * @throws IOException if the underlying output stream
 900    * encounters an I/O error
 901    * @throws XMLException if an <code>Attribute</code>, a
 902    * <code>Document</code>, or <code>Namespace</code>
 903    * is passed to this method
 904    */
 905  3444194 protected void writeChild(Node node) throws IOException {
 906   
 907  3444194 if (node.isElement()) {
 908  1691443 write((Element) node);
 909    }
 910  1752750 else if (node.isText()) {
 911  1748086 write((Text) node);
 912    }
 913  4664 else if (node.isComment()) {
 914  2836 write((Comment) node);
 915    }
 916  1828 else if (node.isProcessingInstruction()) {
 917  191 write((ProcessingInstruction) node);
 918    }
 919  1637 else if (node.isDocType()) {
 920  1635 write((DocType) node);
 921    }
 922    else {
 923  2 throw new XMLException("Cannot write a " +
 924    node.getClass().getName() +
 925    " from the writeChild() method");
 926    }
 927   
 928    }
 929   
 930   
 931    /** <p>
 932    * Writes a string onto the underlying output stream.
 933    * Non-ASCII characters that are not available in the
 934    * current character set are encoded with numeric character
 935    * references. The three reserved characters &lt;, &gt;, and &amp;
 936    * are escaped using the standard entity references
 937    * <code>&amp;lt;</code>, <code>&amp;gt;</code>,
 938    * and <code>&amp;amp;</code>.
 939    * Double and single quotes are not escaped.
 940    * </p>
 941    *
 942    * @param text the parsed character data to serialize
 943    *
 944    * @throws IOException if the underlying output stream
 945    * encounters an I/O error
 946    */
 947  1 protected final void writeEscaped(String text) throws IOException {
 948  1 escaper.writePCDATA(text);
 949    }
 950   
 951    /** <p>
 952    * Writes a string onto the underlying output stream.
 953    * Non-ASCII characters that are not available in the
 954    * current character set are escaped using hexadecimal numeric
 955    * character references. Carriage returns, line feeds, and tabs
 956    * are also escaped using hexadecimal numeric character
 957    * references in order to ensure their preservation on a round
 958    * trip. The four reserved characters &lt;, &gt;, &amp;,
 959    * and &quot; are escaped using the standard entity references
 960    * <code>&amp;lt;</code>, <code>&amp;gt;</code>,
 961    * <code>&amp;amp;</code>, and <code>&amp;quot;</code>.
 962    * The single quote is not escaped.
 963    * </p>
 964    *
 965    * @param value the attribute value to serialize
 966    *
 967    * @throws IOException if the underlying output stream
 968    * encounters an I/O error
 969    */
 970  4 protected final void writeAttributeValue(String value)
 971    throws IOException {
 972  4 escaper.writeAttributeValue(value);
 973    }
 974   
 975   
 976    /** <p>
 977    * Writes a string onto the underlying output stream.
 978    * without escaping any characters.
 979    * Non-ASCII characters that are not available in the
 980    * current character set cause an <code>IOException</code>.
 981    * </p>
 982    *
 983    * @param text the <code>String</code> to serialize
 984    *
 985    * @throws IOException if the underlying output stream
 986    * encounters an I/O error or <code>text</code> contains
 987    * characters not available in the current character set
 988    */
 989  319873 protected final void writeRaw(String text) throws IOException {
 990  319873 escaper.writeMarkup(text);
 991    }
 992   
 993   
 994    /** <p>
 995    * Writes the current line break string
 996    * onto the underlying output stream and indents
 997    * as specified by the current level and the indent property.
 998    * </p>
 999    *
 1000    * @throws IOException if the underlying output stream
 1001    * encounters an I/O error
 1002    */
 1003  562 protected final void breakLine() throws IOException {
 1004  562 escaper.breakLine();
 1005    }
 1006   
 1007   
 1008    /**
 1009    * <p>
 1010    * Flushes the data onto the output stream.
 1011    * It is not enough to flush the output stream.
 1012    * You must flush the serializer object itself because it
 1013    * uses some internal buffering.
 1014    * The serializer will flush the underlying output stream.
 1015    * </p>
 1016    *
 1017    * @throws IOException if the underlying
 1018    * output stream encounters an I/O error
 1019    */
 1020  19757 public void flush() throws IOException {
 1021  19757 escaper.flush();
 1022    }
 1023   
 1024   
 1025    /**
 1026    * <p>
 1027    * Returns the number of spaces this serializer indents.
 1028    * </p>
 1029    *
 1030    * @return the number of spaces this serializer indents
 1031    * each successive level beyond the previous one
 1032    */
 1033  1698400 public int getIndent() {
 1034  1698400 return escaper.getIndent();
 1035    }
 1036   
 1037   
 1038    /**
 1039    * <p>
 1040    * Sets the number of additional spaces to add to each successive
 1041    * level in the hierarchy. Use 0 for no extra indenting. The
 1042    * maximum indentation is in limited to approximately half the
 1043    * maximum line length. The serializer will not indent further
 1044    * than that no matter how many levels deep the hierarchy is.
 1045    * </p>
 1046    *
 1047    * <p>
 1048    * When this variable is set to a value greater than 0,
 1049    * the serializer does not preserve white space. Spaces,
 1050    * tabs, carriage returns, and line feeds can all be
 1051    * interchanged at the serializer's discretion, and additional
 1052    * white space may be added before and after tags.
 1053    * Carriage returns, line feeds, and tabs will not be
 1054    * escaped with numeric character references.
 1055    * </p>
 1056    *
 1057    * <p>
 1058    * Inside elements with an <code>xml:space="preserve"</code>
 1059    * attribute, white space is preserved and no indenting
 1060    * takes place, regardless of the setting of the indent
 1061    * property, unless, of course, an
 1062    * <code>xml:space="default"</code> attribute overrides the
 1063    * <code>xml:space="preserve"</code> attribute.
 1064    * </p>
 1065    *
 1066    * <p>
 1067    * The default value for indent is 0; that is, the default is
 1068    * not to add or subtract any white space from the source
 1069    * document.
 1070    * </p>
 1071    *
 1072    * @param indent the number of spaces to indent
 1073    * each successive level of the hierarchy
 1074    *
 1075    * @throws IllegalArgumentException if indent is less than zero
 1076    *
 1077    */
 1078  68 public void setIndent(int indent) {
 1079  68 if (indent < 0) {
 1080  1 throw new IllegalArgumentException(
 1081    "Indent cannot be negative"
 1082    );
 1083    }
 1084  67 escaper.setIndent(indent);
 1085    }
 1086   
 1087   
 1088    /**
 1089    * <p>
 1090    * Returns the string used as a line separator.
 1091    * This is always <code>"\n"</code>, <code>"\r"</code>,
 1092    * or <code>"\r\n"</code>.
 1093    * </p>
 1094    *
 1095    * @return the line separator
 1096    */
 1097  38 public String getLineSeparator() {
 1098  38 return escaper.getLineSeparator();
 1099    }
 1100   
 1101   
 1102    /**
 1103    * <p>
 1104    * Sets the line separator. This can only be one of the
 1105    * three strings <code>"\n"</code>, <code>"\r"</code>,
 1106    * or <code>"\r\n"</code>. All other values are forbidden.
 1107    * If this method is invoked, then
 1108    * line separators in the character data will be changed to this
 1109    * string. Line separators in attribute values will be changed
 1110    * to the hexadecimal numeric character references corresponding
 1111    * to this string.
 1112    * </p>
 1113    *
 1114    * <p>
 1115    * The default line separator is <code>"\r\n"</code>. However,
 1116    * line separators in character data and attribute values are not
 1117    * changed to this string, unless this method is called first.
 1118    * </p>
 1119    *
 1120    * @param lineSeparator the line separator to set
 1121    *
 1122    * @throws IllegalArgumentException if you attempt to use any line
 1123    * separator other than <code>"\n"</code>, <code>"\r"</code>,
 1124    * or <code>"\r\n"</code>.
 1125    *
 1126    */
 1127  2548 public void setLineSeparator(String lineSeparator) {
 1128  2548 escaper.setLineSeparator(lineSeparator);
 1129    }
 1130   
 1131   
 1132    /**
 1133    * <p>
 1134    * Returns the preferred maximum line length.
 1135    * </p>
 1136    *
 1137    * @return the preferred maximum line length.
 1138    */
 1139  40 public int getMaxLength() {
 1140  40 return escaper.getMaxLength();
 1141    }
 1142   
 1143   
 1144    /**
 1145    * <p>
 1146    * Sets the suggested maximum line length for this serializer.
 1147    * Setting this to 0 indicates that no automatic wrapping is to be
 1148    * performed. When a line approaches this length, the serializer
 1149    * begins looking for opportunities to break the line. Generally
 1150    * it will break on any ASCII white space character (tab, carriage
 1151    * return, linefeed, and space). In some circumstances the
 1152    * serializer may not be able to break the line before the maximum
 1153    * length is reached. For instance, if an element name is longer
 1154    * than the maximum line length the only way to correctly
 1155    * serialize it is to exceed the maximum line length. In this case,
 1156    * the serializer will exceed the maximum line length.
 1157    * </p>
 1158    *
 1159    * <p>
 1160    * The default value for maximum line length is 0, which is
 1161    * interpreted as no maximum line length.
 1162    * Setting this to a negative value just sets it to 0.
 1163    * </p>
 1164    *
 1165    * <p>
 1166    * When this variable is set to a value greater than 0,
 1167    * the serializer does not preserve white space. Spaces,
 1168    * tabs, carriage returns, and line feeds can all be
 1169    * interchanged at the serializer's discretion.
 1170    * Carriage returns, line feeds, and tabs will not be
 1171    * escaped with numeric character references.
 1172    * </p>
 1173    *
 1174    * <p>
 1175    * Inside elements with an <code>xml:space="preserve"</code>
 1176    * attribute, the maximum line length is not enforced,
 1177    * regardless of the setting of the this property, unless,
 1178    * of course, an <code>xml:space="default"</code> attribute
 1179    * overrides the <code>xml:space="preserve"</code> attribute.
 1180    * </p>
 1181    *
 1182    * @param maxLength the preferred maximum line length
 1183    */
 1184  52 public void setMaxLength(int maxLength) {
 1185  52 escaper.setMaxLength(maxLength);
 1186    }
 1187   
 1188   
 1189    /**
 1190    * <p>
 1191    * Returns true if this serializer preserves the original
 1192    * base URIs by inserting extra <code>xml:base</code> attributes.
 1193    * </p>
 1194    *
 1195    * @return true if this <code>Serializer</code> inserts
 1196    * extra <code>xml:base</code> attributes to attempt to
 1197    * preserve base URI information from the document.
 1198    */
 1199  3 public boolean getPreserveBaseURI() {
 1200  3 return preserveBaseURI;
 1201    }
 1202   
 1203   
 1204    /**
 1205    * <p>
 1206    * Determines whether this serializer inserts
 1207    * extra <code>xml:base</code> attributes to attempt to
 1208    * preserve base URI information from the document.
 1209    * The default is false, do not preserve base URI information.
 1210    * <code>xml:base</code> attributes that have been explicitly
 1211    * added to an element are always output. This property only
 1212    * determines whether or not extra <code>xml:base</code>
 1213    * attributes are added.
 1214    * </p>
 1215    *
 1216    * @param preserve true if <code>xml:base</code>
 1217    * attributes should be added as necessary
 1218    * to preserve base URI information
 1219    */
 1220  6 public void setPreserveBaseURI(boolean preserve) {
 1221  6 this.preserveBaseURI = preserve;
 1222    }
 1223   
 1224   
 1225    /**
 1226    * <p>
 1227    * Returns the name of the character encoding used by
 1228    * this serializer.
 1229    * </p>
 1230    *
 1231    * @return the encoding used for the output document
 1232    */
 1233  1 public String getEncoding() {
 1234  1 return escaper.getEncoding();
 1235    }
 1236   
 1237   
 1238    /**
 1239    * <p>
 1240    * If true, this property indicates serialization will
 1241    * perform Unicode normalization on all data using normalization
 1242    * form C (NFC). Performing Unicode normalization may change the
 1243    * document's infoset. The default is false; do not normalize.
 1244    * This version is based on Unicode 4.0.
 1245    * </p>
 1246    *
 1247    * <p>
 1248    * This feature has not yet been benchmarked or optimized.
 1249    * It may result in substantially slower code.
 1250    * </p>
 1251    *
 1252    * <p>
 1253    * If all your data is in the first 256 code points of Unicode
 1254    * (i.e. the ISO-8859-1, Latin-1 character set), then it's
 1255    * already in normalization form C and normalizing won't change
 1256    * anything.
 1257    * </p>
 1258    *
 1259    * @param normalize true if normalization is performed;
 1260    * false if it isn't
 1261    */
 1262  17190 public void setUnicodeNormalizationFormC(boolean normalize) {
 1263  17190 escaper.setNFC(normalize);
 1264    }
 1265   
 1266   
 1267    /**
 1268    * <p>
 1269    * Indicates whether serialization will
 1270    * perform Unicode normalization on all data using normalization
 1271    * form C (NFC). The default is false; do not normalize.
 1272    * </p>
 1273    *
 1274    * @return true if this serializer performs Unicode
 1275    * normalization; false if it doesn't
 1276    */
 1277  37 public boolean getUnicodeNormalizationFormC() {
 1278  37 return escaper.getNFC();
 1279    }
 1280   
 1281   
 1282    /**
 1283    * <p>
 1284    * Returns the current column number of the output stream. This
 1285    * method useful for subclasses that implement their own pretty
 1286    * printing strategies by inserting white space and line breaks
 1287    * at appropriate points.
 1288    * </p>
 1289    *
 1290    * <p>
 1291    * Columns are counted based on Unicode characters, not Java
 1292    * chars. A surrogate pair counts as one character in this
 1293    * context, not two. However, a character followed by a
 1294    * combining character (e.g. e followed by combining accent
 1295    * acute) counts as two characters. This latter choice
 1296    * (treating combining characters like regular characters)
 1297    * is under review, and may change in the future if it's not
 1298    * too big a performance hit.
 1299    * </p>
 1300    *
 1301    * @return the current column number
 1302    */
 1303  4 protected final int getColumnNumber() {
 1304  4 return escaper.getColumnNumber();
 1305    }
 1306   
 1307    }