Clover coverage report - Clover results for XOM 1.2d1
Coverage timestamp: Wed Feb 8 2006 08:31:33 EST
file stats: LOC: 320   Methods: 35
NCLOC: 194   Classes: 1
 
 Source file Conditionals Statements Methods TOTAL
EncodingTest.java 71.4% 95% 100% 92.1%
coverage coverage
 1    /* Copyright 2002-2005 Elliotte Rusty Harold
 2   
 3    This library is free software; you can redistribute it and/or modify
 4    it under the terms of version 2.1 of the GNU Lesser General Public
 5    License as published by the Free Software Foundation.
 6   
 7    This library is distributed in the hope that it will be useful,
 8    but WITHOUT ANY WARRANTY; without even the implied warranty of
 9    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 10    GNU Lesser General Public License for more details.
 11   
 12    You should have received a copy of the GNU Lesser General Public
 13    License along with this library; if not, write to the
 14    Free Software Foundation, Inc., 59 Temple Place, Suite 330,
 15    Boston, MA 02111-1307 USA
 16   
 17    You can contact Elliotte Rusty Harold by sending e-mail to
 18    elharo@metalab.unc.edu. Please include the word "XOM" in the
 19    subject line. The XOM home page is located at http://www.xom.nu/
 20    */
 21   
 22    package nu.xom.tests;
 23   
 24    import java.io.ByteArrayInputStream;
 25    import java.io.ByteArrayOutputStream;
 26    import java.io.IOException;
 27    import java.io.InputStream;
 28    import java.io.UnsupportedEncodingException;
 29   
 30    import nu.xom.Attribute;
 31    import nu.xom.Builder;
 32    import nu.xom.Document;
 33    import nu.xom.Element;
 34    import nu.xom.ParsingException;
 35    import nu.xom.Serializer;
 36   
 37    /**
 38    * <p>
 39    * Check serialization of almost all of Unicode
 40    * in a variety of encodings.
 41    * </p>
 42    *
 43    * @author Elliotte Rusty Harold
 44    * @version 1.1a2
 45    *
 46    */
 47    public class EncodingTest extends XOMTestCase {
 48   
 49   
 50  30 public EncodingTest(String name) {
 51  30 super(name);
 52    }
 53   
 54   
 55    private Document doc;
 56   
 57    private static int SURROGATE_OFFSET = 0x10000 - (0xD800 << 10) - 0xDC00;
 58   
 59  30 protected void setUp() {
 60   
 61  30 Element root = new Element("root");
 62  30 doc = new Document(root);
 63   
 64  30 Element prototype = new Element("d");
 65  30 for (int i = 0x20; i <= 0xD7FF; i++) {
 66  1657920 Element data = (Element) prototype.copy();
 67  1657920 data.appendChild(String.valueOf(((char) i)));
 68  1657920 data.addAttribute(new Attribute("c", String.valueOf(i)));
 69  1657920 root.appendChild(data);
 70    }
 71   
 72    // skip surrogates between 0xD800 and 0xDFFF
 73  30 for (int i = 0xE000; i <= 0xFFFD; i++) {
 74  245700 Element data = (Element) prototype.copy();
 75  245700 data.appendChild(String.valueOf(((char) i)));
 76  245700 data.addAttribute(new Attribute("c", String.valueOf(i)));
 77  245700 root.appendChild(data);
 78    }
 79   
 80    // Test Plane-1 characters. These are tricky because Java
 81    // strings encode them as surrogate pairs. We'll test with
 82    // the characters from 1D100 to 1D1FF (the musical symbols)
 83  30 StringBuffer sb = new StringBuffer(2);
 84  30 char high = 0xD834;
 85  30 sb.append(high);
 86  30 for (int i = 0; i < 256; i++) {
 87  7680 char low = (char) (0xDD00+i);
 88  7680 sb.setLength(1);
 89  7680 sb.append(low);
 90  7680 String s = sb.toString();
 91  7680 Element data = (Element) prototype.copy();
 92  7680 data.appendChild( s );
 93  7680 data.addAttribute(new Attribute("c", String.valueOf(0x1D100 + i)));
 94  7680 root.appendChild(data);
 95    }
 96   
 97    }
 98   
 99   
 100  30 protected void tearDown() {
 101  30 doc = null;
 102  30 System.gc();
 103    }
 104   
 105   
 106  1 public void testEUCJP() throws ParsingException, IOException {
 107  1 checkAll("EUC-JP");
 108    }
 109   
 110   
 111  1 public void testShift_JIS() throws ParsingException, IOException {
 112  1 checkAll("Shift_JIS");
 113    }
 114   
 115   
 116  1 public void testISO2022JP() throws ParsingException, IOException {
 117  1 checkAll("ISO-2022-JP");
 118    }
 119   
 120   
 121  1 public void testGeneric() throws ParsingException, IOException {
 122  1 checkAll("Cp1252");
 123    }
 124   
 125   
 126    // Main purpose here is to test a character set whose name is
 127    // case dependent
 128  1 public void testMacRoman() throws ParsingException, IOException {
 129  1 checkAll("MacRoman");
 130    }
 131   
 132   
 133  1 public void testBig5() throws ParsingException, IOException {
 134  1 checkAll("Big5");
 135    }
 136   
 137  1 public void testUSASCII() throws ParsingException, IOException {
 138  1 checkAll("US-ASCII");
 139    }
 140   
 141  1 public void testASCII() throws ParsingException, IOException {
 142  1 checkAll("ASCII");
 143    }
 144   
 145  1 public void testLatin1() throws ParsingException, IOException {
 146  1 checkAll("ISO-8859-1");
 147    }
 148   
 149  1 public void testLatin2() throws ParsingException, IOException {
 150  1 checkAll("ISO-8859-2");
 151    }
 152   
 153  1 public void testLatin3() throws ParsingException, IOException {
 154  1 checkAll("ISO-8859-3");
 155    }
 156   
 157  1 public void testLatin4() throws ParsingException, IOException {
 158  1 checkAll("ISO-8859-4");
 159    }
 160   
 161  1 public void testCyrillic() throws ParsingException, IOException {
 162  1 checkAll("ISO-8859-5");
 163    }
 164   
 165  1 public void testArabic() throws ParsingException, IOException {
 166  1 checkAll("ISO-8859-6");
 167    }
 168   
 169  1 public void testGreek() throws ParsingException, IOException {
 170    // This test seems to fail in Java 1.5, at least on Mac OS X
 171    // It passes in 1.4. The problem is the delete character 127
 172  1 checkAll("ISO-8859-7");
 173    }
 174   
 175  1 public void testThai() throws ParsingException, IOException {
 176  1 checkAll("TIS-620");
 177    }
 178   
 179  1 public void testHebrew() throws ParsingException, IOException {
 180  1 checkAll("ISO-8859-8");
 181    }
 182   
 183  1 public void testLatin5() throws ParsingException, IOException {
 184  1 checkAll("ISO-8859-9");
 185    }
 186   
 187  1 public void testUTF8() throws ParsingException, IOException {
 188  1 checkAll("UTF-8");
 189    }
 190   
 191  1 public void testUTF16() throws ParsingException, IOException {
 192  1 checkAll("UTF-16");
 193    }
 194   
 195  1 public void testUCS2() throws ParsingException, IOException {
 196  1 checkAll("ISO-10646-UCS-2");
 197    }
 198   
 199  1 public void testEBCDIC() throws ParsingException, IOException {
 200  1 checkAll("Cp037");
 201    }
 202   
 203    // These encodings are only available after Java 1.3
 204    private static boolean java14OrLater = false;
 205   
 206    static {
 207  1 String version = System.getProperty("java.version");
 208  1 String majorVersion = version.substring(0, 3);
 209  1 double versionNumber = Double.parseDouble(majorVersion);
 210  1 if (versionNumber >= 1.4) java14OrLater = true;
 211    }
 212   
 213  1 public void testLatin7() throws ParsingException, IOException {
 214  1 if (java14OrLater) checkAll("ISO-8859-13");
 215    }
 216   
 217  1 public void testLatin9() throws ParsingException, IOException {
 218  1 if (java14OrLater) checkAll("ISO-8859-15");
 219    }
 220   
 221  1 public void testGB18030() throws ParsingException, IOException {
 222  1 if (java14OrLater) checkAll("GB18030");
 223    }
 224   
 225    // These encodings are not installed in all distributions by
 226    // default. They are only found currently in IBM's Java 1.4.1 VM.
 227    // They don't seem to be supported in the 1.5 alpha
 228    // either.
 229  1 public void testUCS4() throws ParsingException, IOException {
 230  0 if (charsetAvailable("ISO-10646-UCS-4")) checkAll("ISO-10646-UCS-4");
 231    }
 232   
 233  1 public void testLatin6() throws ParsingException, IOException {
 234  0 if (charsetAvailable("ISO-8859-10")) checkAll("ISO-8859-10");
 235    }
 236   
 237  1 public void testLatin8() throws ParsingException, IOException {
 238  0 if (charsetAvailable("ISO-8859-14")) checkAll("ISO-8859-14");
 239    }
 240   
 241  1 public void testLatin10() throws ParsingException, IOException {
 242  0 if (charsetAvailable("ISO-8859-16")) checkAll("ISO-8859-16");
 243    }
 244   
 245   
 246    // Test that with an encoding XOM does not specifically support
 247    // but the VM does, everything still works.
 248  1 public void testUnsupportedEncoding()
 249    throws ParsingException, IOException {
 250  1 checkAll("Cp1252");
 251    }
 252   
 253   
 254  4 private static boolean charsetAvailable(String name) {
 255    // hack to avoid using 1.4 classes
 256  4 try {
 257  4 "d".getBytes(name);
 258  0 return true;
 259    }
 260    catch (UnsupportedEncodingException ex) {
 261  4 return false;
 262    }
 263   
 264    }
 265   
 266   
 267  26 private void checkAll(String encoding)
 268    throws ParsingException, IOException {
 269   
 270  26 Builder builder = new Builder();
 271  26 byte[] data = null;
 272  26 ByteArrayOutputStream out = new ByteArrayOutputStream(100000);
 273    // Write data into a byte array using encoding
 274  26 Serializer serializer = new Serializer(out, encoding);
 275  26 serializer.write(doc);
 276  26 serializer.flush();
 277  26 out.flush();
 278  26 out.close();
 279  26 data = out.toByteArray();
 280  26 InputStream in = new ByteArrayInputStream(data);
 281  26 Document reparsed = builder.build(in);
 282  26 in.close();
 283  26 serializer = null;
 284   
 285  26 Element reparsedRoot = reparsed.getRootElement();
 286  26 int childCount = reparsedRoot.getChildCount();
 287  26 for (int i = 0; i < childCount; i++) {
 288  1656460 Element test = (Element) reparsedRoot.getChild(i);
 289  1656460 String value = test.getValue();
 290  1656460 int expected
 291    = Integer.parseInt(test.getAttributeValue("c"));
 292    // workaround for EBCDIC bugs
 293  1656460 if (expected == 133 && encoding.equalsIgnoreCase("Cp037")) {
 294  1 continue;
 295    }
 296  1656459 int actual = value.charAt(0);
 297  1656459 if (value.length() > 1) {
 298  6656 int low = value.charAt(1);
 299  6656 actual = (actual << 10) + low + SURROGATE_OFFSET;
 300    }
 301    // This doesn't work for all encodings, because there are
 302    // a few cases where you write a Unicode compatibility
 303    // character such as an Arabic presentation form,
 304    // but read back what is essentially a different version
 305    // of the same character. That is the mapping from some
 306    // legacy character sets to Unicode is not always 1-1.
 307    /*
 308    assertEquals("Expected 0x"
 309    + Integer.toHexString(expected).toUpperCase()
 310    + " but was 0x"
 311    + Integer.toHexString(actual).toUpperCase(), expected, actual); */
 312  1656459 assertEquals(expected, actual);
 313    }
 314   
 315  26 in = null;
 316   
 317    }
 318   
 319   
 320    }