Clover coverage report - Clover results for XOM 1.2d1
Coverage timestamp: Wed Feb 8 2006 08:31:33 EST
file stats: LOC: 1,081   Methods: 30
NCLOC: 665   Classes: 3
 
 Source file Conditionals Statements Methods TOTAL
Canonicalizer.java 100% 100% 100% 100%
coverage
 1    /* Copyright 2002-2005 Elliotte Rusty Harold
 2   
 3    This library is free software; you can redistribute it and/or modify
 4    it under the terms of version 2.1 of the GNU Lesser General Public
 5    License as published by the Free Software Foundation.
 6   
 7    This library is distributed in the hope that it will be useful,
 8    but WITHOUT ANY WARRANTY; without even the implied warranty of
 9    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 10    GNU Lesser General Public License for more details.
 11   
 12    You should have received a copy of the GNU Lesser General Public
 13    License along with this library; if not, write to the
 14    Free Software Foundation, Inc., 59 Temple Place, Suite 330,
 15    Boston, MA 02111-1307 USA
 16   
 17    You can contact Elliotte Rusty Harold by sending e-mail to
 18    elharo@metalab.unc.edu. Please include the word "XOM" in the
 19    subject line. The XOM home page is located at http://www.xom.nu/
 20    */
 21   
 22    package nu.xom.canonical;
 23   
 24    import java.io.IOException;
 25    import java.io.OutputStream;
 26    import java.util.ArrayList;
 27    import java.util.Arrays;
 28    import java.util.Comparator;
 29    import java.util.Iterator;
 30    import java.util.List;
 31    import java.util.Map;
 32    import java.util.SortedMap;
 33    import java.util.StringTokenizer;
 34    import java.util.TreeMap;
 35    import java.util.Map.Entry;
 36   
 37    import org.xml.sax.helpers.NamespaceSupport;
 38   
 39    import nu.xom.Attribute;
 40    import nu.xom.Comment;
 41    import nu.xom.DocType;
 42    import nu.xom.Document;
 43    import nu.xom.Element;
 44    import nu.xom.Namespace;
 45    import nu.xom.Node;
 46    import nu.xom.Nodes;
 47    import nu.xom.ParentNode;
 48    import nu.xom.ProcessingInstruction;
 49    import nu.xom.Serializer;
 50    import nu.xom.Text;
 51    import nu.xom.XPathContext;
 52   
 53    /**
 54    * <p>
 55    * Writes XML in the format specified by <a target="_top"
 56    * href="http://www.w3.org/TR/2001/REC-xml-c14n-20010315">Canonical
 57    * XML Version 1.0</a> or <a target="_top"
 58    * href="http://www.w3.org/TR/2002/REC-xml-exc-c14n-20020718/">Exclusive
 59    * XML Canonicalization Version 1.0</a>.
 60    * </p>
 61    *
 62    * @author Elliotte Rusty Harold
 63    * @version 1.1b4
 64    *
 65    */
 66    public class Canonicalizer {
 67   
 68    private boolean withComments;
 69    private boolean exclusive = false;
 70    private CanonicalXMLSerializer serializer;
 71    private List inclusiveNamespacePrefixes = new ArrayList();
 72   
 73    private static Comparator comparator = new AttributeComparator();
 74   
 75   
 76    public final static String CANONICAL_XML =
 77    "http://www.w3.org/TR/2001/REC-xml-c14n-20010315";
 78    public final static String CANONICAL_XML_WITH_COMMENTS =
 79    "http://www.w3.org/TR/2001/REC-xml-c14n-20010315#WithComments";
 80    public final static String EXCLUSIVE_XML_CANONICALIZATION =
 81    "http://www.w3.org/2001/10/xml-exc-c14n#";
 82    public final static String EXCLUSIVE_XML_CANONICALIZATION_WITH_COMMENTS =
 83    "http://www.w3.org/2001/10/xml-exc-c14n#WithComments";
 84   
 85   
 86    private static class AttributeComparator implements Comparator {
 87   
 88  9310 public int compare(Object o1, Object o2) {
 89  9310 Attribute a1 = (Attribute) o1;
 90  9310 Attribute a2 = (Attribute) o2;
 91   
 92  9310 String namespace1 = a1.getNamespaceURI();
 93  9310 String namespace2 = a2.getNamespaceURI();
 94  9310 if (namespace1.equals(namespace2)) {
 95  9260 return a1.getLocalName().compareTo(a2.getLocalName());
 96    }
 97  50 else if (namespace1.equals("")) {
 98  6 return -1;
 99    }
 100  44 else if (namespace2.equals("")) {
 101  32 return 1;
 102    }
 103    else { // compare namespace URIs
 104  12 return namespace1.compareTo(namespace2);
 105    }
 106   
 107    }
 108   
 109    }
 110   
 111   
 112    /**
 113    * <p>
 114    * Creates a <code>Canonicalizer</code> that outputs a
 115    * canonical XML document with comments.
 116    * </p>
 117    *
 118    * @param out the output stream the document
 119    * is written onto
 120    */
 121  614 public Canonicalizer(OutputStream out) {
 122  614 this(out, true, false);
 123    }
 124   
 125   
 126    /**
 127    * <p>
 128    * Creates a <code>Canonicalizer</code> that outputs a
 129    * canonical XML document with or without comments.
 130    * </p>
 131    *
 132    * @param out the output stream the document
 133    * is written onto
 134    * @param withComments true if comments should be included
 135    * in the output, false otherwise
 136    */
 137  25 public Canonicalizer(
 138    OutputStream out, boolean withComments) {
 139  25 this(out, withComments, false);
 140    }
 141   
 142   
 143    /**
 144    * <p>
 145    * Creates a <code>Canonicalizer</code> that outputs a
 146    * canonical XML document with or without comments,
 147    * using either the original or the exclusive canonicalization
 148    * algorithm.
 149    * </p>
 150    *
 151    * @param out the output stream the document
 152    * is written onto
 153    * @param withComments true if comments should be included
 154    * in the output, false otherwise
 155    * @param exclusive true if exclusive XML canonicalization
 156    * should be performed, false if regular XML canonicalization
 157    * should be performed
 158    */
 159  639 private Canonicalizer(
 160    OutputStream out, boolean withComments, boolean exclusive) {
 161   
 162  639 this.serializer = new CanonicalXMLSerializer(out);
 163  639 serializer.setLineSeparator("\n");
 164  639 this.withComments = withComments;
 165  639 this.exclusive = exclusive;
 166   
 167    }
 168   
 169   
 170    /**
 171    * <p>
 172    * Creates a <code>Canonicalizer</code> that outputs a
 173    * canonical XML document using the specified algorithm.
 174    * Currently, four algorithms are defined and supported:
 175    * </p>
 176    *
 177    * <ul>
 178    * <li>Canonical XML without comments:
 179    * <code>http://www.w3.org/TR/2001/REC-xml-c14n-20010315</code></li>
 180    * <li>Canonical XML with comments:
 181    * <code>http://www.w3.org/TR/2001/REC-xml-c14n-20010315#WithComments</code></li>
 182    * <li>Exclusive XML canonicalization without comments:
 183    * <code>http://www.w3.org/2001/10/xml-exc-c14n#</code></li>
 184    * <li>Exclusive XML canonicalization with comments:
 185    * <code>http://www.w3.org/2001/10/xml-exc-c14n#WithComments</code></li>
 186    * </ul>
 187    *
 188    * @param out the output stream the document
 189    * is written onto
 190    * @param algorithm the URI for the canonicalization algorithm
 191    *
 192    * @throws CanonicalizationException if the algorithm is
 193    * not recognized
 194    * @throws NullPointerException if the algorithm is null
 195    *
 196    */
 197  625 public Canonicalizer(
 198    OutputStream out, String algorithm) {
 199   
 200  625 if (algorithm == null) {
 201  1 throw new NullPointerException("Null algorithm");
 202    }
 203  624 this.serializer = new CanonicalXMLSerializer(out);
 204  624 serializer.setLineSeparator("\n");
 205  624 if (algorithm.equals(CANONICAL_XML)) {
 206  12 this.withComments = false;
 207  12 this.exclusive = false;
 208    }
 209  612 else if (algorithm.equals(CANONICAL_XML_WITH_COMMENTS)) {
 210  11 this.withComments = true;
 211  11 this.exclusive = false;
 212    }
 213  601 else if (algorithm.equals(EXCLUSIVE_XML_CANONICALIZATION)) {
 214  7 this.withComments = false;
 215  7 this.exclusive = true;
 216    }
 217  594 else if (algorithm.equals(EXCLUSIVE_XML_CANONICALIZATION_WITH_COMMENTS)) {
 218  593 this.withComments = true;
 219  593 this.exclusive = true;
 220    }
 221    else {
 222  1 throw new CanonicalizationException(
 223    "Unsupported canonicalization algorithm: " + algorithm);
 224    }
 225   
 226    }
 227   
 228   
 229    private class CanonicalXMLSerializer extends Serializer {
 230   
 231    // If nodes is null we're canonicalizing all nodes;
 232    // the entire document; this is somewhat easier than when
 233    // canonicalizing only a document subset embedded in nodes
 234    private Nodes nodes;
 235    private NamespaceSupport inScope;
 236   
 237    /**
 238    * <p>
 239    * Creates a <code>Serializer</code> that outputs a
 240    * canonical XML document with or without comments.
 241    * </p>
 242    *
 243    * @param out the <code>OutputStream</code> the document
 244    * is written onto
 245    * @param withComments true if comments should be included
 246    * in the output, false otherwise
 247    */
 248  1263 CanonicalXMLSerializer(OutputStream out) {
 249  1263 super(out);
 250  1263 setLineSeparator("\n");
 251    }
 252   
 253   
 254    /**
 255    * <p>
 256    * Serializes a document onto the output
 257    * stream using the canonical XML algorithm.
 258    * </p>
 259    *
 260    * @param doc the <code>Document</code> to serialize
 261    *
 262    * @throws IOException if the underlying <code>OutputStream</code>
 263    * encounters an I/O error
 264    */
 265  1250 public final void write(Document doc) throws IOException {
 266   
 267  1250 inScope = new NamespaceSupport();
 268  1250 int position = 0;
 269  1250 while (true) {
 270  2735 Node child = doc.getChild(position);
 271  2735 if (nodes == null || child instanceof Element || nodes.contains(child)) {
 272  2728 writeChild(child);
 273  71 if (child instanceof ProcessingInstruction) breakLine();
 274  2657 else if (child instanceof Comment && withComments) {
 275  323 breakLine();
 276    }
 277    }
 278  2735 position++;
 279  1250 if (child instanceof Element) break;
 280    }
 281   
 282  1250 for (int i = position; i < doc.getChildCount(); i++) {
 283  174 Node child = doc.getChild(i);
 284  174 if (nodes == null || child instanceof Element || nodes.contains(child)) {
 285  27 if (child instanceof ProcessingInstruction) breakLine();
 286  145 else if (child instanceof Comment && withComments) {
 287  141 breakLine();
 288    }
 289  172 writeChild(child);
 290    }
 291    }
 292   
 293  1250 flush();
 294   
 295    }
 296   
 297   
 298    /**
 299    * <p>
 300    * Serializes an element onto the output stream using the canonical
 301    * XML algorithm. The result is guaranteed to be well-formed.
 302    * If <code>element</code> does not have a parent element, it will
 303    * also be namespace well-formed.
 304    * </p>
 305    *
 306    * @param element the <code>Element</code> to serialize
 307    *
 308    * @throws IOException if the underlying <code>OutputStream</code>
 309    * encounters an I/O error
 310    */
 311  2641 protected final void write(Element element)
 312    throws IOException {
 313   
 314    // treat empty elements differently to avoid an
 315    // instanceof test
 316  2641 if (element.getChildCount() == 0) {
 317  1990 writeStartTag(element, false);
 318  1990 writeEndTag(element);
 319    }
 320    else {
 321  651 Node current = element;
 322  651 boolean end = false;
 323  651 int index = -1;
 324  651 int[] indexes = new int[10];
 325  651 int top = 0;
 326  651 indexes[0] = -1;
 327  651 while (true) {
 328  108368 if (!end && current.getChildCount() > 0) {
 329  27968 writeStartTag((Element) current, false);
 330  27968 current = current.getChild(0);
 331  27968 index = 0;
 332  27968 top++;
 333  27968 indexes = grow(indexes, top);
 334  27968 indexes[top] = 0;
 335    }
 336    else {
 337  80400 if (end) {
 338  27968 writeEndTag((Element) current);
 339  651 if (current == element) break;
 340    }
 341    else {
 342  52432 writeChild(current);
 343    }
 344  79749 end = false;
 345  79749 ParentNode parent = current.getParent();
 346  79749 if (parent.getChildCount() - 1 == index) {
 347  27968 current = parent;
 348  27968 top--;
 349  27968 if (current != element) {
 350  27317 index = indexes[top];
 351    }
 352  27968 end = true;
 353    }
 354    else {
 355  51781 index++;
 356  51781 indexes[top] = index;
 357  51781 current = parent.getChild(index);
 358    }
 359    }
 360    }
 361    }
 362   
 363    }
 364   
 365   
 366  27968 private int[] grow(int[] indexes, int top) {
 367   
 368  27956 if (top < indexes.length) return indexes;
 369  12 int[] result = new int[indexes.length*2];
 370  12 System.arraycopy(indexes, 0, result, 0, indexes.length);
 371  12 return result;
 372   
 373    }
 374   
 375   
 376  29958 protected void writeStartTag(Element element, boolean isEmpty)
 377    throws IOException {
 378   
 379  29958 boolean writeElement = nodes == null || nodes.contains(element);
 380  29958 if (writeElement) {
 381  29903 inScope.pushContext();
 382  29903 writeRaw("<");
 383  29903 writeRaw(element.getQualifiedName());
 384    }
 385   
 386  29958 SortedMap map = new TreeMap();
 387  29958 if (nodes == null) {
 388  29851 ParentNode parent = element.getParent();
 389  29851 Element parentElement = null;
 390  29851 if (parent instanceof Element) {
 391  28648 parentElement = (Element) parent;
 392    }
 393  29851 for (int i = 0;
 394  59753 i < element.getNamespaceDeclarationCount();
 395    i++) {
 396  29902 String prefix = element.getNamespacePrefix(i);
 397  29902 String uri = element.getNamespaceURI(prefix);
 398   
 399  29902 if (uri.equals(inScope.getURI(prefix))) {
 400  21 continue;
 401    }
 402  29881 else if (exclusive) {
 403  14840 if (needToDeclareNamespace(element, prefix, uri)) {
 404  5 map.put(prefix, uri);
 405    }
 406    }
 407  15041 else if (uri.equals("")) {
 408    // no need to say xmlns=""
 409  614 if (parentElement == null) continue;
 410  14348 if ("".equals(parentElement.getNamespaceURI(""))) {
 411  14335 continue;
 412    }
 413  13 map.put(prefix, uri);
 414    }
 415    else {
 416  79 map.put(prefix, uri);
 417    }
 418   
 419    }
 420   
 421  29851 writeNamespaceDeclarations(map);
 422   
 423    }
 424    else {
 425  107 int position = indexOf(element);
 426    // do we need to undeclare a default namespace?
 427    // You know, should I instead create an output tree and then just
 428    // canonicalize that? probably not
 429  107 if (position != -1 && "".equals(element.getNamespaceURI())) {
 430  23 ParentNode parent = element.getParent();
 431    // Here we have to check for the nearest default on parents in the
 432    // output tree, not the input tree
 433  23 while (parent instanceof Element
 434    && !(nodes.contains(parent))) {
 435  16 parent = parent.getParent();
 436    }
 437  23 if (parent instanceof Element) {
 438  11 String uri = ((Element) parent).getNamespaceURI("");
 439  11 if (! "".equals(uri)) {
 440  4 map.put("", "");
 441    }
 442    }
 443    }
 444   
 445  107 for (int i = position+1; i < nodes.size(); i++) {
 446  175 Node next = nodes.get(i);
 447  79 if ( !(next instanceof Namespace) ) break;
 448  96 Namespace namespace = (Namespace) next;
 449  96 String prefix = namespace.getPrefix();
 450  96 String uri = namespace.getValue();
 451   
 452  96 if (uri.equals(inScope.getURI(prefix))) {
 453  50 continue;
 454    }
 455  46 else if (exclusive) {
 456  28 if (needToDeclareNamespace(element, prefix, uri)) {
 457  19 map.put(prefix, uri);
 458    }
 459    }
 460    else {
 461  18 map.put(prefix, uri);
 462    }
 463   
 464    }
 465   
 466  107 writeNamespaceDeclarations(map);
 467   
 468    }
 469   
 470  29958 Attribute[] sorted = sortAttributes(element);
 471  29958 for (int i = 0; i < sorted.length; i++) {
 472  18155 if (nodes == null || nodes.contains(sorted[i])
 473    || (sorted[i].getNamespaceURI().equals(Namespace.XML_NAMESPACE)
 474    && sorted[i].getParent() != element)) {
 475  18137 write(sorted[i]);
 476    }
 477    }
 478   
 479  29958 if (writeElement) {
 480  29903 writeRaw(">");
 481    }
 482   
 483    }
 484   
 485   
 486  29958 private void writeNamespaceDeclarations(SortedMap map) throws IOException {
 487   
 488  29958 Iterator prefixes = map.entrySet().iterator();
 489  29958 while (prefixes.hasNext()) {
 490  138 Map.Entry entry = (Entry) prefixes.next();
 491  138 String prefix = (String) entry.getKey();
 492  138 String uri = (String) entry.getValue();
 493  138 writeRaw(" ");
 494  138 writeNamespaceDeclaration(prefix, uri);
 495  138 inScope.declarePrefix(prefix, uri);
 496    }
 497   
 498    }
 499   
 500   
 501  14868 private boolean needToDeclareNamespace(
 502    Element parent, String prefix, String uri) {
 503   
 504  14868 boolean match = visiblyUtilized(parent, prefix, uri);
 505   
 506  14868 if (match || inclusiveNamespacePrefixes.contains(prefix)) {
 507  14858 return noOutputAncestorUsesPrefix(parent, prefix, uri);
 508    }
 509   
 510  10 return false;
 511   
 512    }
 513   
 514   
 515  14868 private boolean visiblyUtilized(Element element, String prefix, String uri) {
 516   
 517  14868 boolean match = false;
 518  14868 String pfx = element.getNamespacePrefix();
 519  14868 String local = element.getNamespaceURI();
 520  14868 if (prefix.equals(pfx) && local.equals(uri)) {
 521  14854 match = true;
 522    }
 523    else {
 524  14 for (int i = 0; i < element.getAttributeCount(); i++) {
 525  8 Attribute attribute = element.getAttribute(i);
 526  8 if (nodes == null || nodes.contains(attribute)) {
 527  7 pfx = attribute.getNamespacePrefix();
 528  7 if (prefix.equals(pfx)) {
 529  3 match = true;
 530  3 break;
 531    }
 532    }
 533    }
 534    }
 535  14868 return match;
 536    }
 537   
 538   
 539  14858 private boolean noOutputAncestorUsesPrefix(Element original, String prefix, String uri) {
 540   
 541  14858 ParentNode parent = original.getParent();
 542  14858 if (parent instanceof Document && "".equals(uri)) {
 543  580 return false;
 544    }
 545   
 546  14278 while (parent != null && !(parent instanceof Document)) {
 547  14275 if (nodes == null || nodes.contains(parent)) {
 548  14257 Element element = (Element) parent;
 549  14257 String pfx = element.getNamespacePrefix();
 550  14257 if (pfx.equals(prefix)) {
 551  14254 String newURI = element.getNamespaceURI(prefix);
 552  14254 return ! newURI.equals(uri);
 553    }
 554   
 555  3 for (int i = 0; i < element.getAttributeCount(); i++) {
 556  3 Attribute attribute = element.getAttribute(i);
 557  3 String current = attribute.getNamespacePrefix();
 558  3 if (current.equals(prefix)) {
 559  1 String newURI = element.getNamespaceURI(prefix);
 560  1 return ! newURI.equals(uri);
 561    }
 562    }
 563    }
 564  20 parent = parent.getParent();
 565    }
 566  23 return true;
 567   
 568    }
 569   
 570   
 571    // ???? move into Nodes?
 572  107 private int indexOf(Element element) {
 573  107 for (int i = 0; i < nodes.size(); i++) {
 574  52 if (nodes.get(i) == element) return i;
 575    }
 576  55 return -1;
 577    }
 578   
 579   
 580  18138 protected void write(Attribute attribute) throws IOException {
 581   
 582  18138 writeRaw(" ");
 583  18138 writeRaw(attribute.getQualifiedName());
 584  18138 writeRaw("=\"");
 585  18138 writeRaw(prepareAttributeValue(attribute));
 586  18138 writeRaw("\"");
 587   
 588    }
 589   
 590   
 591  29958 protected void writeEndTag(Element element) throws IOException {
 592   
 593  29958 if (nodes == null || nodes.contains(element)) {
 594  29903 writeRaw("</");
 595  29903 writeRaw(element.getQualifiedName());
 596  29903 writeRaw(">");
 597  29903 inScope.popContext();
 598    }
 599   
 600    }
 601   
 602    private final XPathContext xmlcontext = new XPathContext("xml", Namespace.XML_NAMESPACE);
 603   
 604  29958 private Attribute[] sortAttributes(Element element) {
 605   
 606  29958 Map nearest = new TreeMap();
 607    // add in any inherited xml: attributes
 608  29958 if (!exclusive && nodes != null && nodes.contains(element)
 609    && ! nodes.contains(element.getParent())) {
 610    // grab all xml: attributes
 611  24 Nodes attributes = element.query("ancestor::*/@xml:*", xmlcontext);
 612  24 if (attributes.size() != 0) {
 613    // It's important to count backwards here because
 614    // XPath returns all nodes in document order, which
 615    // is top-down. To get the nearest we need to go
 616    // bottom up instead.
 617  9 for (int i = attributes.size()-1; i >= 0; i--) {
 618  11 Attribute a = (Attribute) attributes.get(i);
 619  11 String name = a.getLocalName();
 620  11 if (element.getAttribute(name, Namespace.XML_NAMESPACE) != null) {
 621    // this element already has that attribute
 622  2 continue;
 623    }
 624  9 if (! nearest.containsKey(name)) {
 625  7 Element parent = (Element) a.getParent();
 626  7 if (! nodes.contains(parent)) {
 627  6 nearest.put(name, a);
 628    }
 629    else {
 630  1 nearest.put(name, null);
 631    }
 632    }
 633    }
 634    }
 635   
 636    // remove null values
 637  24 Iterator iterator = nearest.values().iterator();
 638  24 while (iterator.hasNext()) {
 639  1 if (iterator.next() == null) iterator.remove();
 640    }
 641   
 642    }
 643   
 644  29958 int localCount = element.getAttributeCount();
 645  29958 Attribute[] result
 646    = new Attribute[localCount + nearest.size()];
 647  29958 for (int i = 0; i < localCount; i++) {
 648  18149 result[i] = element.getAttribute(i);
 649    }
 650   
 651  29958 Iterator iterator = nearest.values().iterator();
 652  29958 for (int j = localCount; j < result.length; j++) {
 653  6 result[j] = (Attribute) iterator.next();
 654    }
 655   
 656  29958 Arrays.sort(result, comparator);
 657   
 658  29958 return result;
 659   
 660    }
 661   
 662   
 663  18138 private String prepareAttributeValue(Attribute attribute) {
 664   
 665  18138 String value = attribute.getValue();
 666  18138 StringBuffer result = new StringBuffer(value.length());
 667   
 668  18138 if (attribute.getType().equals(Attribute.Type.CDATA)
 669    || attribute.getType().equals(Attribute.Type.UNDECLARED)) {
 670  6944 char[] data = value.toCharArray();
 671  6944 for (int i = 0; i < data.length; i++) {
 672  47013 char c = data[i];
 673  47013 if (c == '\t') {
 674  7 result.append("&#x9;");
 675    }
 676  47006 else if (c == '\n') {
 677  7 result.append("&#xA;");
 678    }
 679  46999 else if (c == '\r') {
 680  7 result.append("&#xD;");
 681    }
 682  46992 else if (c == '\"') {
 683  57 result.append("&quot;");
 684    }
 685  46935 else if (c == '&') {
 686  15 result.append("&amp;");
 687    }
 688  46920 else if (c == '<') {
 689  11 result.append("&lt;");
 690    }
 691    else {
 692  46909 result.append(c);
 693    }
 694    }
 695    }
 696    else {
 697    // According to the spec, "Whitespace character references
 698    // other than &#x20; are not affected by attribute value
 699    // normalization. For parsed documents, the parser will
 700    // still replace these with the actual character. I am
 701    // going to assume that if one is found here, that the
 702    // user meant to put it there; and so we will escape it
 703    // with a character reference
 704  11194 char[] data = value.toCharArray();
 705  11194 boolean seenFirstNonSpace = false;
 706  11194 for (int i = 0; i < data.length; i++) {
 707  103820 if (data[i] == ' ') {
 708  188 if (i != data.length-1 && data[i+1] != ' ' && seenFirstNonSpace) {
 709  183 result.append(data[i]);
 710    }
 711  188 continue;
 712    }
 713  103632 seenFirstNonSpace = true;
 714  103632 if (data[i] == '\t') {
 715  8 result.append("&#x9;");
 716    }
 717  103624 else if (data[i] == '\n') {
 718  12 result.append("&#xA;");
 719    }
 720  103612 else if (data[i] == '\r') {
 721  12 result.append("&#xD;");
 722    }
 723  103600 else if (data[i] == '\"') {
 724  8 result.append("&quot;");
 725    }
 726  103592 else if (data[i] == '&') {
 727  4 result.append("&amp;");
 728    }
 729  103588 else if (data[i] == '<') {
 730  4 result.append("&lt;");
 731    }
 732    else {
 733  103584 result.append(data[i]);
 734    }
 735    }
 736    }
 737   
 738  18138 return result.toString();
 739   
 740    }
 741   
 742   
 743    /**
 744    * <p>
 745    * Serializes a <code>Text</code> object
 746    * onto the output stream using the UTF-8 encoding.
 747    * The reserved characters &lt;, &gt;, and &amp;
 748    * are escaped using the standard entity references such as
 749    * <code>&amp;lt;</code>, <code>&amp;gt;</code>,
 750    * and <code>&amp;amp;</code>.
 751    * </p>
 752    *
 753    * @param text the <code>Text</code> to serialize
 754    *
 755    * @throws IOException if the underlying <code>OutputStream</code>
 756    * encounters an I/O error
 757    */
 758  49637 protected final void write(Text text) throws IOException {
 759   
 760  49637 if (nodes == null || nodes.contains(text)) {
 761  49613 String input = text.getValue();
 762  49613 StringBuffer result = new StringBuffer(input.length());
 763  49613 for (int i = 0; i < input.length(); i++) {
 764  793539 char c = input.charAt(i);
 765  793539 if (c == '\r') {
 766  12 result.append("&#xD;");
 767    }
 768  793527 else if (c == '&') {
 769  790 result.append("&amp;");
 770    }
 771  792737 else if (c == '<') {
 772  1676 result.append("&lt;");
 773    }
 774  791061 else if (c == '>') {
 775  1586 result.append("&gt;");
 776    }
 777    else {
 778  789475 result.append(c);
 779    }
 780    }
 781  49613 writeRaw(result.toString());
 782    }
 783   
 784    }
 785   
 786   
 787    /**
 788    * <p>
 789    * Serializes a <code>Comment</code> object
 790    * onto the output stream if and only if this
 791    * serializer is configured to produce canonical XML
 792    * with comments.
 793    * </p>
 794    *
 795    * @param comment the <code>Comment</code> to serialize
 796    *
 797    * @throws IOException if the underlying <code>OutputStream</code>
 798    * encounters an I/O error
 799    */
 800  1865 protected final void write(Comment comment)
 801    throws IOException {
 802  1865 if (withComments && (nodes == null || nodes.contains(comment))) {
 803  1857 super.write(comment);
 804    }
 805    }
 806   
 807   
 808  110 protected final void write(ProcessingInstruction pi)
 809    throws IOException {
 810  110 if (nodes == null || nodes.contains(pi)) {
 811  109 super.write(pi);
 812    }
 813    }
 814   
 815   
 816    /**
 817    * <p>
 818    * Does nothing because canonical XML does not include
 819    * document type declarations.
 820    * </p>
 821    *
 822    * @param doctype the document type declaration to serialize
 823    */
 824  1083 protected final void write(DocType doctype) {
 825    // DocType is not serialized in canonical XML
 826    }
 827   
 828   
 829  1212 public void write(Node node) throws IOException {
 830   
 831  1212 if (node instanceof Document) {
 832  1203 write((Document) node);
 833    }
 834  9 else if (node instanceof Attribute) {
 835  1 write((Attribute) node);
 836    }
 837  8 else if (node instanceof Namespace) {
 838  3 write((Namespace) node);
 839    }
 840    else {
 841  5 writeChild(node);
 842    }
 843   
 844    }
 845   
 846   
 847  3 private void write(Namespace namespace) throws IOException {
 848   
 849  3 String prefix = namespace.getPrefix();
 850  3 String uri = namespace.getValue();
 851  3 writeRaw(" xmlns" );
 852  3 if (!"".equals(prefix)) {
 853  2 writeRaw(":");
 854  2 writeRaw(prefix);
 855    }
 856  3 writeRaw("=\"");
 857  3 writeAttributeValue(uri);
 858  3 writeRaw("\"");
 859   
 860    }
 861   
 862    }
 863   
 864   
 865    /**
 866    * <p>
 867    * Serializes a node onto the output stream using the specified
 868    * canonicalization algorithm. If the node is a document or an
 869    * element, then the node's entire subtree is written out.
 870    * </p>
 871    *
 872    * @param node the node to canonicalize
 873    *
 874    * @throws IOException if the underlying <code>OutputStream</code>
 875    * encounters an I/O error
 876    */
 877  1223 public final void write(Node node) throws IOException {
 878   
 879    // See this thread:
 880    // http://lists.ibiblio.org/pipermail/xom-interest/2005-October/002656.html
 881  1223 if (node instanceof Element) {
 882  11 Document doc = node.getDocument();
 883  11 Element pseudoRoot = null;
 884  11 if (doc == null) {
 885  9 pseudoRoot = new Element("pseudo");
 886  9 doc = new Document(pseudoRoot);
 887  9 ParentNode root = (ParentNode) node;
 888  4 while (root.getParent() != null) root = root.getParent();
 889  9 pseudoRoot.appendChild(root);
 890    }
 891  11 try {
 892  11 write(node.query(".//. | .//@* | .//namespace::*"));
 893    }
 894    finally {
 895  9 if (pseudoRoot != null) pseudoRoot.removeChild(0);
 896    }
 897    }
 898    else {
 899  1212 serializer.nodes = null;
 900  1212 serializer.write(node);
 901    }
 902  1221 serializer.flush();
 903   
 904    }
 905   
 906   
 907    /**
 908    * <p>
 909    * Serializes a document subset onto the output stream using the
 910    * canonical XML algorithm. All nodes in the list must come from
 911    * same document. Furthermore, they must come from a document.
 912    * They cannot be detached. The nodes need not be sorted. This
 913    * method will sort them into the appropriate order for
 914    * canonicalization.
 915    * </p>
 916    *
 917    * <p>
 918    * In most common use cases, these nodes will be the result of
 919    * evaluating an XPath expression. For example,
 920    * </p>
 921    *
 922    * <pre><code> Canonicalizer canonicalizer
 923    * = new Canonicalizer(System.out, Canonicalizer.CANONICAL_XML);
 924    * Nodes result = doc.query("//. | //@* | //namespace::*");
 925    * canonicalizer.write(result);
 926    * </code></pre>
 927    *
 928    * <p>
 929    * Children are not output unless the subset also includes them.
 930    * Including an element in the subset does not automatically
 931    * select all the element's children, attributes, and namespaces.
 932    * Furthermore, not selecting an element does not imply that its
 933    * children, namespaces, attributes will not be output.
 934    * </p>
 935    *
 936    * @param documentSubset the nodes to serialize
 937    *
 938    * @throws IOException if the underlying <code>OutputStream</code>
 939    * encounters an I/O error
 940    * @throws CanonicalizationException if the nodes come from more
 941    * than one document; or if a detached node is in the list
 942    */
 943  50 public final void write(Nodes documentSubset) throws IOException {
 944   
 945  50 if (documentSubset.size() > 0) {
 946  49 Document doc = documentSubset.get(0).getDocument();
 947  49 if (doc == null) {
 948  1 throw new CanonicalizationException(
 949    "Canonicalization is not defined for detached nodes");
 950    }
 951  48 Nodes result = sort(documentSubset);
 952  47 serializer.nodes = result;
 953  47 serializer.write(doc);
 954  46 serializer.flush();
 955    }
 956   
 957    }
 958   
 959   
 960    /**
 961    * <p>
 962    * Specifies the prefixes that will be output as specified in
 963    * regular canonical XML, even when doing exclusive
 964    * XML canonicalization.
 965    * </p>
 966    *
 967    * @param inclusiveNamespacePrefixes a whitespace separated list
 968    * of namespace prefixes that will always be included in the
 969    * output, even in exclusive canonicalization
 970    */
 971  3 public final void setInclusiveNamespacePrefixList(String inclusiveNamespacePrefixes)
 972    throws IOException {
 973   
 974  3 this.inclusiveNamespacePrefixes.clear();
 975  3 if (this.exclusive && inclusiveNamespacePrefixes != null) {
 976  2 StringTokenizer tokenizer = new StringTokenizer(
 977    inclusiveNamespacePrefixes, " \t\r\n", false);
 978  2 while (tokenizer.hasMoreTokens()) {
 979  2 this.inclusiveNamespacePrefixes.add(tokenizer.nextToken());
 980    }
 981    }
 982   
 983    }
 984   
 985   
 986    // XXX remove recursion
 987    // recursively descend through document; in document
 988    // order, and add results as they are found
 989  48 private Nodes sort(Nodes in) {
 990   
 991  48 Node root = in.get(0).getDocument();
 992  48 if (in.size() > 1) {
 993  41 Nodes out = new Nodes();
 994  41 List list = new ArrayList(in.size());
 995  41 List namespaces = new ArrayList();
 996  41 for (int i = 0; i < in.size(); i++) {
 997  183 Node node = in.get(i);
 998  183 list.add(node);
 999  96 if (node instanceof Namespace) namespaces.add(node);
 1000    }
 1001  41 sort(list, namespaces, out, (ParentNode) root);
 1002  41 if (! list.isEmpty() ) {
 1003    // Are these just duplicates; or is there really a node
 1004    // from a different document?
 1005  2 Iterator iterator = list.iterator();
 1006  2 while (iterator.hasNext()) {
 1007  3 Node next = (Node) iterator.next();
 1008  3 if (root != next.getDocument()) {
 1009  1 throw new CanonicalizationException(
 1010    "Cannot canonicalize subsets that contain nodes from more than one document");
 1011    }
 1012    }
 1013    }
 1014  40 return out;
 1015    }
 1016    else {
 1017  7 return new Nodes(in.get(0));
 1018    }
 1019   
 1020    }
 1021   
 1022   
 1023  125 private static void sort(List in, List namespaces, Nodes out, ParentNode parent) {
 1024   
 1025  23 if (in.isEmpty()) return;
 1026  102 if (in.contains(parent)) {
 1027  1 out.append(parent);
 1028  1 in.remove(parent);
 1029    // I'm fairly sure this next line is unreachable, but just
 1030    // in case it isn't I'll leave this comment here.
 1031    // if (in.isEmpty()) return;
 1032    }
 1033   
 1034  102 int childCount = parent.getChildCount();
 1035  102 for (int i = 0; i < childCount; i++) {
 1036  130 Node child = parent.getChild(i);
 1037  130 if (child instanceof Element) {
 1038  90 Element element = (Element) child;
 1039  90 if (in.contains(element)) {
 1040  48 out.append(element);
 1041  48 in.remove(element);
 1042    }
 1043    // attach namespaces
 1044  90 if (!namespaces.isEmpty()) {
 1045  69 Iterator iterator = in.iterator();
 1046  69 while (iterator.hasNext()) {
 1047  305 Object o = iterator.next();
 1048  305 if (o instanceof Namespace) {
 1049  207 Namespace n = (Namespace) o;
 1050  207 if (element == n.getParent()) {
 1051  96 out.append(n);
 1052  96 iterator.remove();
 1053    }
 1054    }
 1055    }
 1056    }
 1057   
 1058    // attach attributes
 1059  90 for (int a = 0; a < element.getAttributeCount(); a++) {
 1060  25 Attribute att = element.getAttribute(a);
 1061  25 if (in.contains(att)) {
 1062  17 out.append(att);
 1063  17 in.remove(att);
 1064  6 if (in.isEmpty()) return;
 1065    }
 1066    }
 1067  84 sort(in, namespaces, out, element);
 1068    }
 1069    else {
 1070  40 if (in.contains(child)) {
 1071  18 out.append(child);
 1072  18 in.remove(child);
 1073  10 if (in.isEmpty()) return;
 1074    }
 1075    }
 1076    }
 1077   
 1078    }
 1079   
 1080   
 1081    }