10888a09821a98ac0680fad765217302858e70fa4Paul Duffin/* 20888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Copyright (C) 2013 The Guava Authors 30888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 40888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Licensed under the Apache License, Version 2.0 (the "License"); 50888a09821a98ac0680fad765217302858e70fa4Paul Duffin * you may not use this file except in compliance with the License. 60888a09821a98ac0680fad765217302858e70fa4Paul Duffin * You may obtain a copy of the License at 70888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 80888a09821a98ac0680fad765217302858e70fa4Paul Duffin * http://www.apache.org/licenses/LICENSE-2.0 90888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 100888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Unless required by applicable law or agreed to in writing, software 110888a09821a98ac0680fad765217302858e70fa4Paul Duffin * distributed under the License is distributed on an "AS IS" BASIS, 120888a09821a98ac0680fad765217302858e70fa4Paul Duffin * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 130888a09821a98ac0680fad765217302858e70fa4Paul Duffin * See the License for the specific language governing permissions and 140888a09821a98ac0680fad765217302858e70fa4Paul Duffin * limitations under the License. 150888a09821a98ac0680fad765217302858e70fa4Paul Duffin */ 160888a09821a98ac0680fad765217302858e70fa4Paul Duffin 170888a09821a98ac0680fad765217302858e70fa4Paul Duffinpackage com.google.common.base; 180888a09821a98ac0680fad765217302858e70fa4Paul Duffin 190888a09821a98ac0680fad765217302858e70fa4Paul Duffinimport com.google.common.annotations.GwtCompatible; 200888a09821a98ac0680fad765217302858e70fa4Paul Duffinimport com.google.common.annotations.GwtIncompatible; 210888a09821a98ac0680fad765217302858e70fa4Paul Duffin 220888a09821a98ac0680fad765217302858e70fa4Paul Duffinimport junit.framework.TestCase; 230888a09821a98ac0680fad765217302858e70fa4Paul Duffin 240888a09821a98ac0680fad765217302858e70fa4Paul Duffinimport java.util.Arrays; 250888a09821a98ac0680fad765217302858e70fa4Paul Duffinimport java.util.HashMap; 260888a09821a98ac0680fad765217302858e70fa4Paul Duffinimport java.util.Random; 270888a09821a98ac0680fad765217302858e70fa4Paul Duffin 280888a09821a98ac0680fad765217302858e70fa4Paul Duffin/** 290888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Unit tests for {@link Utf8}. 300888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 310888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @author Jon Perlow 320888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @author Martin Buchholz 330888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @author Clément Roux 340888a09821a98ac0680fad765217302858e70fa4Paul Duffin */ 350888a09821a98ac0680fad765217302858e70fa4Paul Duffin@GwtCompatible(emulated = true) 360888a09821a98ac0680fad765217302858e70fa4Paul Duffinpublic class Utf8Test extends TestCase { 370888a09821a98ac0680fad765217302858e70fa4Paul Duffin public void testEncodedLength_validStrings() { 380888a09821a98ac0680fad765217302858e70fa4Paul Duffin assertEquals(0, Utf8.encodedLength("")); 390888a09821a98ac0680fad765217302858e70fa4Paul Duffin assertEquals(11, Utf8.encodedLength("Hello world")); 400888a09821a98ac0680fad765217302858e70fa4Paul Duffin assertEquals(8, Utf8.encodedLength("Résumé")); 410888a09821a98ac0680fad765217302858e70fa4Paul Duffin assertEquals(461, Utf8.encodedLength("威廉·莎士比亞(William Shakespeare," 420888a09821a98ac0680fad765217302858e70fa4Paul Duffin + "1564年4月26號—1616年4月23號[1])係隻英國嗰演員、劇作家同詩人," 430888a09821a98ac0680fad765217302858e70fa4Paul Duffin + "有時間佢簡稱莎翁;中國清末民初哈拕翻譯做舌克斯毕、沙斯皮耳、筛斯比耳、" 440888a09821a98ac0680fad765217302858e70fa4Paul Duffin + "莎基斯庇尔、索士比尔、夏克思芘尔、希哀苦皮阿、叶斯壁、沙克皮尔、" 450888a09821a98ac0680fad765217302858e70fa4Paul Duffin + "狹斯丕爾。[2]莎士比亞編寫過好多作品,佢嗰劇作響西洋文學好有影響," 460888a09821a98ac0680fad765217302858e70fa4Paul Duffin + "哈都拕人翻譯做好多話。")); 470888a09821a98ac0680fad765217302858e70fa4Paul Duffin // A surrogate pair 480888a09821a98ac0680fad765217302858e70fa4Paul Duffin assertEquals(4, Utf8.encodedLength( 490888a09821a98ac0680fad765217302858e70fa4Paul Duffin newString(Character.MIN_HIGH_SURROGATE, Character.MIN_LOW_SURROGATE))); 500888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 510888a09821a98ac0680fad765217302858e70fa4Paul Duffin 520888a09821a98ac0680fad765217302858e70fa4Paul Duffin @GwtIncompatible("StringBuilder.appendCodePoint()") 530888a09821a98ac0680fad765217302858e70fa4Paul Duffin public void testEncodedLength_validStrings2() { 540888a09821a98ac0680fad765217302858e70fa4Paul Duffin HashMap<Integer, Integer> utf8Lengths = new HashMap<Integer, Integer>(); 550888a09821a98ac0680fad765217302858e70fa4Paul Duffin utf8Lengths.put(0x00, 1); 560888a09821a98ac0680fad765217302858e70fa4Paul Duffin utf8Lengths.put(0x7f, 1); 570888a09821a98ac0680fad765217302858e70fa4Paul Duffin utf8Lengths.put(0x80, 2); 580888a09821a98ac0680fad765217302858e70fa4Paul Duffin utf8Lengths.put(0x7ff, 2); 590888a09821a98ac0680fad765217302858e70fa4Paul Duffin utf8Lengths.put(0x800, 3); 600888a09821a98ac0680fad765217302858e70fa4Paul Duffin utf8Lengths.put(Character.MIN_SUPPLEMENTARY_CODE_POINT - 1, 3); 610888a09821a98ac0680fad765217302858e70fa4Paul Duffin utf8Lengths.put(Character.MIN_SUPPLEMENTARY_CODE_POINT, 4); 620888a09821a98ac0680fad765217302858e70fa4Paul Duffin utf8Lengths.put(Character.MAX_CODE_POINT, 4); 630888a09821a98ac0680fad765217302858e70fa4Paul Duffin 640888a09821a98ac0680fad765217302858e70fa4Paul Duffin Integer[] codePoints = utf8Lengths.keySet().toArray(new Integer[]{}); 650888a09821a98ac0680fad765217302858e70fa4Paul Duffin StringBuilder sb = new StringBuilder(); 660888a09821a98ac0680fad765217302858e70fa4Paul Duffin Random rnd = new Random(); 670888a09821a98ac0680fad765217302858e70fa4Paul Duffin for (int trial = 0; trial < 100; trial++) { 680888a09821a98ac0680fad765217302858e70fa4Paul Duffin sb.setLength(0); 690888a09821a98ac0680fad765217302858e70fa4Paul Duffin int utf8Length = 0; 700888a09821a98ac0680fad765217302858e70fa4Paul Duffin for (int i = 0; i < 6; i++) { 710888a09821a98ac0680fad765217302858e70fa4Paul Duffin Integer randomCodePoint = codePoints[rnd.nextInt(codePoints.length)]; 720888a09821a98ac0680fad765217302858e70fa4Paul Duffin sb.appendCodePoint(randomCodePoint); 730888a09821a98ac0680fad765217302858e70fa4Paul Duffin utf8Length += utf8Lengths.get(randomCodePoint); 740888a09821a98ac0680fad765217302858e70fa4Paul Duffin if (utf8Length != Utf8.encodedLength(sb)) { 750888a09821a98ac0680fad765217302858e70fa4Paul Duffin StringBuilder repro = new StringBuilder(); 760888a09821a98ac0680fad765217302858e70fa4Paul Duffin for (int j = 0; j < sb.length(); j++) { 770888a09821a98ac0680fad765217302858e70fa4Paul Duffin repro.append(" " + (int) sb.charAt(j)); // GWT compatible 780888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 790888a09821a98ac0680fad765217302858e70fa4Paul Duffin assertEquals(repro.toString(), utf8Length, Utf8.encodedLength(sb)); 800888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 810888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 820888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 830888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 840888a09821a98ac0680fad765217302858e70fa4Paul Duffin 850888a09821a98ac0680fad765217302858e70fa4Paul Duffin public void testEncodedLength_invalidStrings() { 860888a09821a98ac0680fad765217302858e70fa4Paul Duffin testEncodedLengthFails(newString(Character.MIN_HIGH_SURROGATE), 0); 870888a09821a98ac0680fad765217302858e70fa4Paul Duffin testEncodedLengthFails("foobar" + newString(Character.MIN_HIGH_SURROGATE), 6); 880888a09821a98ac0680fad765217302858e70fa4Paul Duffin testEncodedLengthFails(newString(Character.MIN_LOW_SURROGATE), 0); 890888a09821a98ac0680fad765217302858e70fa4Paul Duffin testEncodedLengthFails("foobar" + newString(Character.MIN_LOW_SURROGATE), 6); 900888a09821a98ac0680fad765217302858e70fa4Paul Duffin testEncodedLengthFails( 910888a09821a98ac0680fad765217302858e70fa4Paul Duffin newString( 920888a09821a98ac0680fad765217302858e70fa4Paul Duffin Character.MIN_HIGH_SURROGATE, 930888a09821a98ac0680fad765217302858e70fa4Paul Duffin Character.MIN_HIGH_SURROGATE), 0); 940888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 950888a09821a98ac0680fad765217302858e70fa4Paul Duffin 960888a09821a98ac0680fad765217302858e70fa4Paul Duffin private static void testEncodedLengthFails(String invalidString, 970888a09821a98ac0680fad765217302858e70fa4Paul Duffin int invalidCodePointIndex) { 980888a09821a98ac0680fad765217302858e70fa4Paul Duffin try { 990888a09821a98ac0680fad765217302858e70fa4Paul Duffin Utf8.encodedLength(invalidString); 1000888a09821a98ac0680fad765217302858e70fa4Paul Duffin fail(); 1010888a09821a98ac0680fad765217302858e70fa4Paul Duffin } catch (IllegalArgumentException expected) { 1020888a09821a98ac0680fad765217302858e70fa4Paul Duffin assertEquals("Unpaired surrogate at index " + invalidCodePointIndex, 1030888a09821a98ac0680fad765217302858e70fa4Paul Duffin expected.getMessage()); 1040888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 1050888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 1060888a09821a98ac0680fad765217302858e70fa4Paul Duffin 1070888a09821a98ac0680fad765217302858e70fa4Paul Duffin // 128 - [chars 0x0000 to 0x007f] 1080888a09821a98ac0680fad765217302858e70fa4Paul Duffin private static final long ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS = 1090888a09821a98ac0680fad765217302858e70fa4Paul Duffin 0x007f - 0x0000 + 1; 1100888a09821a98ac0680fad765217302858e70fa4Paul Duffin 1110888a09821a98ac0680fad765217302858e70fa4Paul Duffin // 128 1120888a09821a98ac0680fad765217302858e70fa4Paul Duffin private static final long EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT = 1130888a09821a98ac0680fad765217302858e70fa4Paul Duffin ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS; 1140888a09821a98ac0680fad765217302858e70fa4Paul Duffin 1150888a09821a98ac0680fad765217302858e70fa4Paul Duffin // 1920 [chars 0x0080 to 0x07FF] 1160888a09821a98ac0680fad765217302858e70fa4Paul Duffin private static final long TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS = 1170888a09821a98ac0680fad765217302858e70fa4Paul Duffin 0x07FF - 0x0080 + 1; 1180888a09821a98ac0680fad765217302858e70fa4Paul Duffin 1190888a09821a98ac0680fad765217302858e70fa4Paul Duffin // 18,304 1200888a09821a98ac0680fad765217302858e70fa4Paul Duffin private static final long EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT = 1210888a09821a98ac0680fad765217302858e70fa4Paul Duffin // Both bytes are one byte characters 1220888a09821a98ac0680fad765217302858e70fa4Paul Duffin (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 2) + 1230888a09821a98ac0680fad765217302858e70fa4Paul Duffin // The possible number of two byte characters 1240888a09821a98ac0680fad765217302858e70fa4Paul Duffin TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS; 1250888a09821a98ac0680fad765217302858e70fa4Paul Duffin 1260888a09821a98ac0680fad765217302858e70fa4Paul Duffin // 2048 1270888a09821a98ac0680fad765217302858e70fa4Paul Duffin private static final long THREE_BYTE_SURROGATES = 2 * 1024; 1280888a09821a98ac0680fad765217302858e70fa4Paul Duffin 1290888a09821a98ac0680fad765217302858e70fa4Paul Duffin // 61,440 [chars 0x0800 to 0xFFFF, minus surrogates] 1300888a09821a98ac0680fad765217302858e70fa4Paul Duffin private static final long THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS = 1310888a09821a98ac0680fad765217302858e70fa4Paul Duffin 0xFFFF - 0x0800 + 1 - THREE_BYTE_SURROGATES; 1320888a09821a98ac0680fad765217302858e70fa4Paul Duffin 1330888a09821a98ac0680fad765217302858e70fa4Paul Duffin // 2,650,112 1340888a09821a98ac0680fad765217302858e70fa4Paul Duffin private static final long EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT = 1350888a09821a98ac0680fad765217302858e70fa4Paul Duffin // All one byte characters 1360888a09821a98ac0680fad765217302858e70fa4Paul Duffin (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 3) + 1370888a09821a98ac0680fad765217302858e70fa4Paul Duffin // One two byte character and a one byte character 1380888a09821a98ac0680fad765217302858e70fa4Paul Duffin 2 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS * 1390888a09821a98ac0680fad765217302858e70fa4Paul Duffin ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS + 1400888a09821a98ac0680fad765217302858e70fa4Paul Duffin // Three byte characters 1410888a09821a98ac0680fad765217302858e70fa4Paul Duffin THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS; 1420888a09821a98ac0680fad765217302858e70fa4Paul Duffin 1430888a09821a98ac0680fad765217302858e70fa4Paul Duffin // 1,048,576 [chars 0x10000L to 0x10FFFF] 1440888a09821a98ac0680fad765217302858e70fa4Paul Duffin private static final long FOUR_BYTE_ROUNDTRIPPABLE_CHARACTERS = 1450888a09821a98ac0680fad765217302858e70fa4Paul Duffin 0x10FFFF - 0x10000L + 1; 1460888a09821a98ac0680fad765217302858e70fa4Paul Duffin 1470888a09821a98ac0680fad765217302858e70fa4Paul Duffin // 289,571,839 1480888a09821a98ac0680fad765217302858e70fa4Paul Duffin private static final long EXPECTED_FOUR_BYTE_ROUNDTRIPPABLE_COUNT = 1490888a09821a98ac0680fad765217302858e70fa4Paul Duffin // All one byte characters 1500888a09821a98ac0680fad765217302858e70fa4Paul Duffin (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 4) + 1510888a09821a98ac0680fad765217302858e70fa4Paul Duffin // One and three byte characters 1520888a09821a98ac0680fad765217302858e70fa4Paul Duffin 2 * THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS * 1530888a09821a98ac0680fad765217302858e70fa4Paul Duffin ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS + 1540888a09821a98ac0680fad765217302858e70fa4Paul Duffin // Two two byte characters 1550888a09821a98ac0680fad765217302858e70fa4Paul Duffin TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS + 1560888a09821a98ac0680fad765217302858e70fa4Paul Duffin // Permutations of one and two byte characters 1570888a09821a98ac0680fad765217302858e70fa4Paul Duffin 3 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS * 1580888a09821a98ac0680fad765217302858e70fa4Paul Duffin ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS * 1590888a09821a98ac0680fad765217302858e70fa4Paul Duffin ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS + 1600888a09821a98ac0680fad765217302858e70fa4Paul Duffin // Four byte characters 1610888a09821a98ac0680fad765217302858e70fa4Paul Duffin FOUR_BYTE_ROUNDTRIPPABLE_CHARACTERS; 1620888a09821a98ac0680fad765217302858e70fa4Paul Duffin 1630888a09821a98ac0680fad765217302858e70fa4Paul Duffin /** Tests that round tripping of all two byte permutations work. */ 1640888a09821a98ac0680fad765217302858e70fa4Paul Duffin @GwtIncompatible("java.nio.charset.Charset") 1650888a09821a98ac0680fad765217302858e70fa4Paul Duffin public void testIsWellFormed_1Byte() { 1660888a09821a98ac0680fad765217302858e70fa4Paul Duffin testBytes(1, EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT); 1670888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 1680888a09821a98ac0680fad765217302858e70fa4Paul Duffin 1690888a09821a98ac0680fad765217302858e70fa4Paul Duffin /** Tests that round tripping of all two byte permutations work. */ 1700888a09821a98ac0680fad765217302858e70fa4Paul Duffin @GwtIncompatible("java.nio.charset.Charset") 1710888a09821a98ac0680fad765217302858e70fa4Paul Duffin public void testIsWellFormed_2Bytes() { 1720888a09821a98ac0680fad765217302858e70fa4Paul Duffin testBytes(2, EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT); 1730888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 1740888a09821a98ac0680fad765217302858e70fa4Paul Duffin 1750888a09821a98ac0680fad765217302858e70fa4Paul Duffin /** Tests that round tripping of all three byte permutations work. */ 1760888a09821a98ac0680fad765217302858e70fa4Paul Duffin @GwtIncompatible("java.nio.charset.Charset") 1770888a09821a98ac0680fad765217302858e70fa4Paul Duffin public void testIsWellFormed_3Bytes() { 1780888a09821a98ac0680fad765217302858e70fa4Paul Duffin testBytes(3, EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT); 1790888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 1800888a09821a98ac0680fad765217302858e70fa4Paul Duffin 1810888a09821a98ac0680fad765217302858e70fa4Paul Duffin /** 1820888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Tests that round tripping of a sample of four byte permutations work. 1830888a09821a98ac0680fad765217302858e70fa4Paul Duffin * All permutations are prohibitively expensive to test for automated runs. 1840888a09821a98ac0680fad765217302858e70fa4Paul Duffin * This method tests specific four-byte cases. 1850888a09821a98ac0680fad765217302858e70fa4Paul Duffin */ 1860888a09821a98ac0680fad765217302858e70fa4Paul Duffin public void testIsWellFormed_4BytesSamples() { 1870888a09821a98ac0680fad765217302858e70fa4Paul Duffin // Valid 4 byte. 1880888a09821a98ac0680fad765217302858e70fa4Paul Duffin assertWellFormed(0xF0, 0xA4, 0xAD, 0xA2); 1890888a09821a98ac0680fad765217302858e70fa4Paul Duffin // Bad trailing bytes 1900888a09821a98ac0680fad765217302858e70fa4Paul Duffin assertNotWellFormed(0xF0, 0xA4, 0xAD, 0x7F); 1910888a09821a98ac0680fad765217302858e70fa4Paul Duffin assertNotWellFormed(0xF0, 0xA4, 0xAD, 0xC0); 1920888a09821a98ac0680fad765217302858e70fa4Paul Duffin // Special cases for byte2 1930888a09821a98ac0680fad765217302858e70fa4Paul Duffin assertNotWellFormed(0xF0, 0x8F, 0xAD, 0xA2); 1940888a09821a98ac0680fad765217302858e70fa4Paul Duffin assertNotWellFormed(0xF4, 0x90, 0xAD, 0xA2); 1950888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 1960888a09821a98ac0680fad765217302858e70fa4Paul Duffin 1970888a09821a98ac0680fad765217302858e70fa4Paul Duffin /** Tests some hard-coded test cases. */ 1980888a09821a98ac0680fad765217302858e70fa4Paul Duffin public void testSomeSequences() { 1990888a09821a98ac0680fad765217302858e70fa4Paul Duffin // Empty 2000888a09821a98ac0680fad765217302858e70fa4Paul Duffin assertWellFormed(); 2010888a09821a98ac0680fad765217302858e70fa4Paul Duffin // One-byte characters, including control characters 2020888a09821a98ac0680fad765217302858e70fa4Paul Duffin assertWellFormed(0x00, 0x61, 0x62, 0x63, 0x7F); // "\u0000abc\u007f" 2030888a09821a98ac0680fad765217302858e70fa4Paul Duffin // Two-byte characters 2040888a09821a98ac0680fad765217302858e70fa4Paul Duffin assertWellFormed(0xC2, 0xA2, 0xC2, 0xA2); // "\u00a2\u00a2" 2050888a09821a98ac0680fad765217302858e70fa4Paul Duffin // Three-byte characters 2060888a09821a98ac0680fad765217302858e70fa4Paul Duffin assertWellFormed(0xc8, 0x8a, 0x63, 0xc8, 0x8a, 0x63); // "\u020ac\u020ac" 2070888a09821a98ac0680fad765217302858e70fa4Paul Duffin // Four-byte characters 2080888a09821a98ac0680fad765217302858e70fa4Paul Duffin // "\u024B62\u024B62" 2090888a09821a98ac0680fad765217302858e70fa4Paul Duffin assertWellFormed(0xc9, 0x8b, 0x36, 0x32, 0xc9, 0x8b, 0x36, 0x32); 2100888a09821a98ac0680fad765217302858e70fa4Paul Duffin // Mixed string 2110888a09821a98ac0680fad765217302858e70fa4Paul Duffin // "a\u020ac\u00a2b\\u024B62u020acc\u00a2de\u024B62" 2120888a09821a98ac0680fad765217302858e70fa4Paul Duffin assertWellFormed(0x61, 0xc8, 0x8a, 0x63, 0xc2, 0xa2, 0x62, 0x5c, 0x75, 0x30, 2130888a09821a98ac0680fad765217302858e70fa4Paul Duffin 0x32, 0x34, 0x42, 0x36, 0x32, 0x75, 0x30, 0x32, 0x30, 0x61, 0x63, 0x63, 2140888a09821a98ac0680fad765217302858e70fa4Paul Duffin 0xc2, 0xa2, 0x64, 0x65, 0xc9, 0x8b, 0x36, 0x32); 2150888a09821a98ac0680fad765217302858e70fa4Paul Duffin // Not a valid string 2160888a09821a98ac0680fad765217302858e70fa4Paul Duffin assertNotWellFormed(-1, 0, -1, 0); 2170888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 2180888a09821a98ac0680fad765217302858e70fa4Paul Duffin 2190888a09821a98ac0680fad765217302858e70fa4Paul Duffin public void testShardsHaveExpectedRoundTrippables() { 2200888a09821a98ac0680fad765217302858e70fa4Paul Duffin // A sanity check. 2210888a09821a98ac0680fad765217302858e70fa4Paul Duffin long actual = 0; 2220888a09821a98ac0680fad765217302858e70fa4Paul Duffin for (long expected : generateFourByteShardsExpectedRunnables()) { 2230888a09821a98ac0680fad765217302858e70fa4Paul Duffin actual += expected; 2240888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 2250888a09821a98ac0680fad765217302858e70fa4Paul Duffin assertEquals(EXPECTED_FOUR_BYTE_ROUNDTRIPPABLE_COUNT, actual); 2260888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 2270888a09821a98ac0680fad765217302858e70fa4Paul Duffin 2280888a09821a98ac0680fad765217302858e70fa4Paul Duffin private String newString(char... chars) { 2290888a09821a98ac0680fad765217302858e70fa4Paul Duffin return new String(chars); 2300888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 2310888a09821a98ac0680fad765217302858e70fa4Paul Duffin 2320888a09821a98ac0680fad765217302858e70fa4Paul Duffin private byte[] toByteArray(int... bytes) { 2330888a09821a98ac0680fad765217302858e70fa4Paul Duffin byte[] realBytes = new byte[bytes.length]; 2340888a09821a98ac0680fad765217302858e70fa4Paul Duffin for (int i = 0; i < bytes.length; i++) { 2350888a09821a98ac0680fad765217302858e70fa4Paul Duffin realBytes[i] = (byte) bytes[i]; 2360888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 2370888a09821a98ac0680fad765217302858e70fa4Paul Duffin return realBytes; 2380888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 2390888a09821a98ac0680fad765217302858e70fa4Paul Duffin 2400888a09821a98ac0680fad765217302858e70fa4Paul Duffin private void assertWellFormed(int... bytes) { 2410888a09821a98ac0680fad765217302858e70fa4Paul Duffin assertTrue(Utf8.isWellFormed(toByteArray(bytes))); 2420888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 2430888a09821a98ac0680fad765217302858e70fa4Paul Duffin 2440888a09821a98ac0680fad765217302858e70fa4Paul Duffin private void assertNotWellFormed(int... bytes) { 2450888a09821a98ac0680fad765217302858e70fa4Paul Duffin assertFalse(Utf8.isWellFormed(toByteArray(bytes))); 2460888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 2470888a09821a98ac0680fad765217302858e70fa4Paul Duffin 2480888a09821a98ac0680fad765217302858e70fa4Paul Duffin private static long[] generateFourByteShardsExpectedRunnables() { 2490888a09821a98ac0680fad765217302858e70fa4Paul Duffin long[] expected = new long[128]; 2500888a09821a98ac0680fad765217302858e70fa4Paul Duffin // 0-63 are all 5300224 2510888a09821a98ac0680fad765217302858e70fa4Paul Duffin for (int i = 0; i <= 63; i++) { 2520888a09821a98ac0680fad765217302858e70fa4Paul Duffin expected[i] = 5300224; 2530888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 2540888a09821a98ac0680fad765217302858e70fa4Paul Duffin // 97-111 are all 2342912 2550888a09821a98ac0680fad765217302858e70fa4Paul Duffin for (int i = 97; i <= 111; i++) { 2560888a09821a98ac0680fad765217302858e70fa4Paul Duffin expected[i] = 2342912; 2570888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 2580888a09821a98ac0680fad765217302858e70fa4Paul Duffin // 113-117 are all 1048576 2590888a09821a98ac0680fad765217302858e70fa4Paul Duffin for (int i = 113; i <= 117; i++) { 2600888a09821a98ac0680fad765217302858e70fa4Paul Duffin expected[i] = 1048576; 2610888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 2620888a09821a98ac0680fad765217302858e70fa4Paul Duffin // One offs 2630888a09821a98ac0680fad765217302858e70fa4Paul Duffin expected[112] = 786432; 2640888a09821a98ac0680fad765217302858e70fa4Paul Duffin expected[118] = 786432; 2650888a09821a98ac0680fad765217302858e70fa4Paul Duffin expected[119] = 1048576; 2660888a09821a98ac0680fad765217302858e70fa4Paul Duffin expected[120] = 458752; 2670888a09821a98ac0680fad765217302858e70fa4Paul Duffin expected[121] = 524288; 2680888a09821a98ac0680fad765217302858e70fa4Paul Duffin expected[122] = 65536; 2690888a09821a98ac0680fad765217302858e70fa4Paul Duffin // Anything not assigned was the default 0. 2700888a09821a98ac0680fad765217302858e70fa4Paul Duffin return expected; 2710888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 2720888a09821a98ac0680fad765217302858e70fa4Paul Duffin 2730888a09821a98ac0680fad765217302858e70fa4Paul Duffin /** 2740888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Helper to run the loop to test all the permutations for the number of bytes 2750888a09821a98ac0680fad765217302858e70fa4Paul Duffin * specified. 2760888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 2770888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @param numBytes the number of bytes in the byte array 2780888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @param expectedCount the expected number of roundtrippable permutations 2790888a09821a98ac0680fad765217302858e70fa4Paul Duffin */ 2800888a09821a98ac0680fad765217302858e70fa4Paul Duffin @GwtIncompatible("java.nio.charset.Charset") 2810888a09821a98ac0680fad765217302858e70fa4Paul Duffin private static void testBytes(int numBytes, long expectedCount) { 2820888a09821a98ac0680fad765217302858e70fa4Paul Duffin testBytes(numBytes, expectedCount, 0, -1); 2830888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 2840888a09821a98ac0680fad765217302858e70fa4Paul Duffin 2850888a09821a98ac0680fad765217302858e70fa4Paul Duffin /** 2860888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Helper to run the loop to test all the permutations for the number of bytes 2870888a09821a98ac0680fad765217302858e70fa4Paul Duffin * specified. This overload is useful for debugging to get the loop to start 2880888a09821a98ac0680fad765217302858e70fa4Paul Duffin * at a certain character. 2890888a09821a98ac0680fad765217302858e70fa4Paul Duffin * 2900888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @param numBytes the number of bytes in the byte array 2910888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @param expectedCount the expected number of roundtrippable permutations 2920888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @param start the starting bytes encoded as a long as big-endian 2930888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @param lim the limit of bytes to process encoded as a long as big-endian, 2940888a09821a98ac0680fad765217302858e70fa4Paul Duffin * or -1 to mean the max limit for numBytes 2950888a09821a98ac0680fad765217302858e70fa4Paul Duffin */ 2960888a09821a98ac0680fad765217302858e70fa4Paul Duffin @GwtIncompatible("java.nio.charset.Charset") 2970888a09821a98ac0680fad765217302858e70fa4Paul Duffin private static void testBytes(int numBytes, long expectedCount, long start, 2980888a09821a98ac0680fad765217302858e70fa4Paul Duffin long lim) { 2990888a09821a98ac0680fad765217302858e70fa4Paul Duffin byte[] bytes = new byte[numBytes]; 3000888a09821a98ac0680fad765217302858e70fa4Paul Duffin if (lim == -1) { 3010888a09821a98ac0680fad765217302858e70fa4Paul Duffin lim = 1L << (numBytes * 8); 3020888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 3030888a09821a98ac0680fad765217302858e70fa4Paul Duffin long countRoundTripped = 0; 3040888a09821a98ac0680fad765217302858e70fa4Paul Duffin for (long byteChar = start; byteChar < lim; byteChar++) { 3050888a09821a98ac0680fad765217302858e70fa4Paul Duffin long tmpByteChar = byteChar; 3060888a09821a98ac0680fad765217302858e70fa4Paul Duffin for (int i = 0; i < numBytes; i++) { 3070888a09821a98ac0680fad765217302858e70fa4Paul Duffin bytes[bytes.length - i - 1] = (byte) tmpByteChar; 3080888a09821a98ac0680fad765217302858e70fa4Paul Duffin tmpByteChar = tmpByteChar >> 8; 3090888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 3100888a09821a98ac0680fad765217302858e70fa4Paul Duffin boolean isRoundTrippable = Utf8.isWellFormed(bytes); 3110888a09821a98ac0680fad765217302858e70fa4Paul Duffin assertEquals(isRoundTrippable, Utf8.isWellFormed(bytes, 0, numBytes)); 3123ecfa412eddc4b084663f38d562537b86b9734d5Paul Duffin String s = new String(bytes, Charsets.UTF_8); 3133ecfa412eddc4b084663f38d562537b86b9734d5Paul Duffin byte[] bytesReencoded = s.getBytes(Charsets.UTF_8); 3143ecfa412eddc4b084663f38d562537b86b9734d5Paul Duffin boolean bytesEqual = Arrays.equals(bytes, bytesReencoded); 3150888a09821a98ac0680fad765217302858e70fa4Paul Duffin 3160888a09821a98ac0680fad765217302858e70fa4Paul Duffin if (bytesEqual != isRoundTrippable) { 3170888a09821a98ac0680fad765217302858e70fa4Paul Duffin fail(); 3180888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 3190888a09821a98ac0680fad765217302858e70fa4Paul Duffin if (isRoundTrippable) { 3200888a09821a98ac0680fad765217302858e70fa4Paul Duffin countRoundTripped++; 3210888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 3220888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 3230888a09821a98ac0680fad765217302858e70fa4Paul Duffin assertEquals(expectedCount, countRoundTripped); 3240888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 3250888a09821a98ac0680fad765217302858e70fa4Paul Duffin} 326