10888a09821a98ac0680fad765217302858e70fa4Paul Duffin/*
20888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Copyright (C) 2013 The Guava Authors
30888a09821a98ac0680fad765217302858e70fa4Paul Duffin *
40888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Licensed under the Apache License, Version 2.0 (the "License");
50888a09821a98ac0680fad765217302858e70fa4Paul Duffin * you may not use this file except in compliance with the License.
60888a09821a98ac0680fad765217302858e70fa4Paul Duffin * You may obtain a copy of the License at
70888a09821a98ac0680fad765217302858e70fa4Paul Duffin *
80888a09821a98ac0680fad765217302858e70fa4Paul Duffin * http://www.apache.org/licenses/LICENSE-2.0
90888a09821a98ac0680fad765217302858e70fa4Paul Duffin *
100888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Unless required by applicable law or agreed to in writing, software
110888a09821a98ac0680fad765217302858e70fa4Paul Duffin * distributed under the License is distributed on an "AS IS" BASIS,
120888a09821a98ac0680fad765217302858e70fa4Paul Duffin * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
130888a09821a98ac0680fad765217302858e70fa4Paul Duffin * See the License for the specific language governing permissions and
140888a09821a98ac0680fad765217302858e70fa4Paul Duffin * limitations under the License.
150888a09821a98ac0680fad765217302858e70fa4Paul Duffin */
160888a09821a98ac0680fad765217302858e70fa4Paul Duffin
170888a09821a98ac0680fad765217302858e70fa4Paul Duffinpackage com.google.common.base;
180888a09821a98ac0680fad765217302858e70fa4Paul Duffin
190888a09821a98ac0680fad765217302858e70fa4Paul Duffinimport com.google.common.annotations.GwtCompatible;
200888a09821a98ac0680fad765217302858e70fa4Paul Duffinimport com.google.common.annotations.GwtIncompatible;
210888a09821a98ac0680fad765217302858e70fa4Paul Duffin
220888a09821a98ac0680fad765217302858e70fa4Paul Duffinimport junit.framework.TestCase;
230888a09821a98ac0680fad765217302858e70fa4Paul Duffin
240888a09821a98ac0680fad765217302858e70fa4Paul Duffinimport java.util.Arrays;
250888a09821a98ac0680fad765217302858e70fa4Paul Duffinimport java.util.HashMap;
260888a09821a98ac0680fad765217302858e70fa4Paul Duffinimport java.util.Random;
270888a09821a98ac0680fad765217302858e70fa4Paul Duffin
280888a09821a98ac0680fad765217302858e70fa4Paul Duffin/**
290888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Unit tests for {@link Utf8}.
300888a09821a98ac0680fad765217302858e70fa4Paul Duffin *
310888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @author Jon Perlow
320888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @author Martin Buchholz
330888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @author Clément Roux
340888a09821a98ac0680fad765217302858e70fa4Paul Duffin */
350888a09821a98ac0680fad765217302858e70fa4Paul Duffin@GwtCompatible(emulated = true)
360888a09821a98ac0680fad765217302858e70fa4Paul Duffinpublic class Utf8Test extends TestCase {
370888a09821a98ac0680fad765217302858e70fa4Paul Duffin  public void testEncodedLength_validStrings() {
380888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertEquals(0, Utf8.encodedLength(""));
390888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertEquals(11, Utf8.encodedLength("Hello world"));
400888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertEquals(8, Utf8.encodedLength("Résumé"));
410888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertEquals(461, Utf8.encodedLength("威廉·莎士比亞(William Shakespeare,"
420888a09821a98ac0680fad765217302858e70fa4Paul Duffin        + "1564年4月26號—1616年4月23號[1])係隻英國嗰演員、劇作家同詩人,"
430888a09821a98ac0680fad765217302858e70fa4Paul Duffin        + "有時間佢簡稱莎翁;中國清末民初哈拕翻譯做舌克斯毕、沙斯皮耳、筛斯比耳、"
440888a09821a98ac0680fad765217302858e70fa4Paul Duffin        + "莎基斯庇尔、索士比尔、夏克思芘尔、希哀苦皮阿、叶斯壁、沙克皮尔、"
450888a09821a98ac0680fad765217302858e70fa4Paul Duffin        + "狹斯丕爾。[2]莎士比亞編寫過好多作品,佢嗰劇作響西洋文學好有影響,"
460888a09821a98ac0680fad765217302858e70fa4Paul Duffin        + "哈都拕人翻譯做好多話。"));
470888a09821a98ac0680fad765217302858e70fa4Paul Duffin    // A surrogate pair
480888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertEquals(4, Utf8.encodedLength(
490888a09821a98ac0680fad765217302858e70fa4Paul Duffin        newString(Character.MIN_HIGH_SURROGATE, Character.MIN_LOW_SURROGATE)));
500888a09821a98ac0680fad765217302858e70fa4Paul Duffin  }
510888a09821a98ac0680fad765217302858e70fa4Paul Duffin
520888a09821a98ac0680fad765217302858e70fa4Paul Duffin  @GwtIncompatible("StringBuilder.appendCodePoint()")
530888a09821a98ac0680fad765217302858e70fa4Paul Duffin  public void testEncodedLength_validStrings2() {
540888a09821a98ac0680fad765217302858e70fa4Paul Duffin    HashMap<Integer, Integer> utf8Lengths = new HashMap<Integer, Integer>();
550888a09821a98ac0680fad765217302858e70fa4Paul Duffin    utf8Lengths.put(0x00, 1);
560888a09821a98ac0680fad765217302858e70fa4Paul Duffin    utf8Lengths.put(0x7f, 1);
570888a09821a98ac0680fad765217302858e70fa4Paul Duffin    utf8Lengths.put(0x80, 2);
580888a09821a98ac0680fad765217302858e70fa4Paul Duffin    utf8Lengths.put(0x7ff, 2);
590888a09821a98ac0680fad765217302858e70fa4Paul Duffin    utf8Lengths.put(0x800, 3);
600888a09821a98ac0680fad765217302858e70fa4Paul Duffin    utf8Lengths.put(Character.MIN_SUPPLEMENTARY_CODE_POINT - 1, 3);
610888a09821a98ac0680fad765217302858e70fa4Paul Duffin    utf8Lengths.put(Character.MIN_SUPPLEMENTARY_CODE_POINT, 4);
620888a09821a98ac0680fad765217302858e70fa4Paul Duffin    utf8Lengths.put(Character.MAX_CODE_POINT, 4);
630888a09821a98ac0680fad765217302858e70fa4Paul Duffin
640888a09821a98ac0680fad765217302858e70fa4Paul Duffin    Integer[] codePoints = utf8Lengths.keySet().toArray(new Integer[]{});
650888a09821a98ac0680fad765217302858e70fa4Paul Duffin    StringBuilder sb = new StringBuilder();
660888a09821a98ac0680fad765217302858e70fa4Paul Duffin    Random rnd = new Random();
670888a09821a98ac0680fad765217302858e70fa4Paul Duffin    for (int trial = 0; trial < 100; trial++) {
680888a09821a98ac0680fad765217302858e70fa4Paul Duffin      sb.setLength(0);
690888a09821a98ac0680fad765217302858e70fa4Paul Duffin      int utf8Length = 0;
700888a09821a98ac0680fad765217302858e70fa4Paul Duffin      for (int i = 0; i < 6; i++) {
710888a09821a98ac0680fad765217302858e70fa4Paul Duffin        Integer randomCodePoint = codePoints[rnd.nextInt(codePoints.length)];
720888a09821a98ac0680fad765217302858e70fa4Paul Duffin        sb.appendCodePoint(randomCodePoint);
730888a09821a98ac0680fad765217302858e70fa4Paul Duffin        utf8Length += utf8Lengths.get(randomCodePoint);
740888a09821a98ac0680fad765217302858e70fa4Paul Duffin        if (utf8Length != Utf8.encodedLength(sb)) {
750888a09821a98ac0680fad765217302858e70fa4Paul Duffin          StringBuilder repro = new StringBuilder();
760888a09821a98ac0680fad765217302858e70fa4Paul Duffin          for (int j = 0; j < sb.length(); j++) {
770888a09821a98ac0680fad765217302858e70fa4Paul Duffin            repro.append(" " + (int) sb.charAt(j));  // GWT compatible
780888a09821a98ac0680fad765217302858e70fa4Paul Duffin          }
790888a09821a98ac0680fad765217302858e70fa4Paul Duffin          assertEquals(repro.toString(), utf8Length, Utf8.encodedLength(sb));
800888a09821a98ac0680fad765217302858e70fa4Paul Duffin        }
810888a09821a98ac0680fad765217302858e70fa4Paul Duffin      }
820888a09821a98ac0680fad765217302858e70fa4Paul Duffin    }
830888a09821a98ac0680fad765217302858e70fa4Paul Duffin  }
840888a09821a98ac0680fad765217302858e70fa4Paul Duffin
850888a09821a98ac0680fad765217302858e70fa4Paul Duffin  public void testEncodedLength_invalidStrings() {
860888a09821a98ac0680fad765217302858e70fa4Paul Duffin    testEncodedLengthFails(newString(Character.MIN_HIGH_SURROGATE), 0);
870888a09821a98ac0680fad765217302858e70fa4Paul Duffin    testEncodedLengthFails("foobar" + newString(Character.MIN_HIGH_SURROGATE), 6);
880888a09821a98ac0680fad765217302858e70fa4Paul Duffin    testEncodedLengthFails(newString(Character.MIN_LOW_SURROGATE), 0);
890888a09821a98ac0680fad765217302858e70fa4Paul Duffin    testEncodedLengthFails("foobar" + newString(Character.MIN_LOW_SURROGATE), 6);
900888a09821a98ac0680fad765217302858e70fa4Paul Duffin    testEncodedLengthFails(
910888a09821a98ac0680fad765217302858e70fa4Paul Duffin        newString(
920888a09821a98ac0680fad765217302858e70fa4Paul Duffin            Character.MIN_HIGH_SURROGATE,
930888a09821a98ac0680fad765217302858e70fa4Paul Duffin            Character.MIN_HIGH_SURROGATE), 0);
940888a09821a98ac0680fad765217302858e70fa4Paul Duffin  }
950888a09821a98ac0680fad765217302858e70fa4Paul Duffin
960888a09821a98ac0680fad765217302858e70fa4Paul Duffin  private static void testEncodedLengthFails(String invalidString,
970888a09821a98ac0680fad765217302858e70fa4Paul Duffin      int invalidCodePointIndex) {
980888a09821a98ac0680fad765217302858e70fa4Paul Duffin    try {
990888a09821a98ac0680fad765217302858e70fa4Paul Duffin      Utf8.encodedLength(invalidString);
1000888a09821a98ac0680fad765217302858e70fa4Paul Duffin      fail();
1010888a09821a98ac0680fad765217302858e70fa4Paul Duffin    } catch (IllegalArgumentException expected) {
1020888a09821a98ac0680fad765217302858e70fa4Paul Duffin      assertEquals("Unpaired surrogate at index " + invalidCodePointIndex,
1030888a09821a98ac0680fad765217302858e70fa4Paul Duffin          expected.getMessage());
1040888a09821a98ac0680fad765217302858e70fa4Paul Duffin    }
1050888a09821a98ac0680fad765217302858e70fa4Paul Duffin  }
1060888a09821a98ac0680fad765217302858e70fa4Paul Duffin
1070888a09821a98ac0680fad765217302858e70fa4Paul Duffin  // 128 - [chars 0x0000 to 0x007f]
1080888a09821a98ac0680fad765217302858e70fa4Paul Duffin  private static final long ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS =
1090888a09821a98ac0680fad765217302858e70fa4Paul Duffin      0x007f - 0x0000 + 1;
1100888a09821a98ac0680fad765217302858e70fa4Paul Duffin
1110888a09821a98ac0680fad765217302858e70fa4Paul Duffin  // 128
1120888a09821a98ac0680fad765217302858e70fa4Paul Duffin  private static final long EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT =
1130888a09821a98ac0680fad765217302858e70fa4Paul Duffin      ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS;
1140888a09821a98ac0680fad765217302858e70fa4Paul Duffin
1150888a09821a98ac0680fad765217302858e70fa4Paul Duffin  // 1920 [chars 0x0080 to 0x07FF]
1160888a09821a98ac0680fad765217302858e70fa4Paul Duffin  private static final long TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS =
1170888a09821a98ac0680fad765217302858e70fa4Paul Duffin      0x07FF - 0x0080 + 1;
1180888a09821a98ac0680fad765217302858e70fa4Paul Duffin
1190888a09821a98ac0680fad765217302858e70fa4Paul Duffin  // 18,304
1200888a09821a98ac0680fad765217302858e70fa4Paul Duffin  private static final long EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT =
1210888a09821a98ac0680fad765217302858e70fa4Paul Duffin      // Both bytes are one byte characters
1220888a09821a98ac0680fad765217302858e70fa4Paul Duffin      (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 2) +
1230888a09821a98ac0680fad765217302858e70fa4Paul Duffin      // The possible number of two byte characters
1240888a09821a98ac0680fad765217302858e70fa4Paul Duffin      TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS;
1250888a09821a98ac0680fad765217302858e70fa4Paul Duffin
1260888a09821a98ac0680fad765217302858e70fa4Paul Duffin  // 2048
1270888a09821a98ac0680fad765217302858e70fa4Paul Duffin  private static final long THREE_BYTE_SURROGATES = 2 * 1024;
1280888a09821a98ac0680fad765217302858e70fa4Paul Duffin
1290888a09821a98ac0680fad765217302858e70fa4Paul Duffin  // 61,440 [chars 0x0800 to 0xFFFF, minus surrogates]
1300888a09821a98ac0680fad765217302858e70fa4Paul Duffin  private static final long THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS =
1310888a09821a98ac0680fad765217302858e70fa4Paul Duffin      0xFFFF - 0x0800 + 1 - THREE_BYTE_SURROGATES;
1320888a09821a98ac0680fad765217302858e70fa4Paul Duffin
1330888a09821a98ac0680fad765217302858e70fa4Paul Duffin  // 2,650,112
1340888a09821a98ac0680fad765217302858e70fa4Paul Duffin  private static final long EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT =
1350888a09821a98ac0680fad765217302858e70fa4Paul Duffin      // All one byte characters
1360888a09821a98ac0680fad765217302858e70fa4Paul Duffin      (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 3) +
1370888a09821a98ac0680fad765217302858e70fa4Paul Duffin      // One two byte character and a one byte character
1380888a09821a98ac0680fad765217302858e70fa4Paul Duffin      2 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS *
1390888a09821a98ac0680fad765217302858e70fa4Paul Duffin          ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS +
1400888a09821a98ac0680fad765217302858e70fa4Paul Duffin       // Three byte characters
1410888a09821a98ac0680fad765217302858e70fa4Paul Duffin      THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS;
1420888a09821a98ac0680fad765217302858e70fa4Paul Duffin
1430888a09821a98ac0680fad765217302858e70fa4Paul Duffin  // 1,048,576 [chars 0x10000L to 0x10FFFF]
1440888a09821a98ac0680fad765217302858e70fa4Paul Duffin  private static final long FOUR_BYTE_ROUNDTRIPPABLE_CHARACTERS =
1450888a09821a98ac0680fad765217302858e70fa4Paul Duffin      0x10FFFF - 0x10000L + 1;
1460888a09821a98ac0680fad765217302858e70fa4Paul Duffin
1470888a09821a98ac0680fad765217302858e70fa4Paul Duffin  // 289,571,839
1480888a09821a98ac0680fad765217302858e70fa4Paul Duffin  private static final long EXPECTED_FOUR_BYTE_ROUNDTRIPPABLE_COUNT =
1490888a09821a98ac0680fad765217302858e70fa4Paul Duffin      // All one byte characters
1500888a09821a98ac0680fad765217302858e70fa4Paul Duffin      (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 4) +
1510888a09821a98ac0680fad765217302858e70fa4Paul Duffin      // One and three byte characters
1520888a09821a98ac0680fad765217302858e70fa4Paul Duffin      2 * THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS *
1530888a09821a98ac0680fad765217302858e70fa4Paul Duffin          ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS +
1540888a09821a98ac0680fad765217302858e70fa4Paul Duffin      // Two two byte characters
1550888a09821a98ac0680fad765217302858e70fa4Paul Duffin      TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS +
1560888a09821a98ac0680fad765217302858e70fa4Paul Duffin      // Permutations of one and two byte characters
1570888a09821a98ac0680fad765217302858e70fa4Paul Duffin      3 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS *
1580888a09821a98ac0680fad765217302858e70fa4Paul Duffin          ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS *
1590888a09821a98ac0680fad765217302858e70fa4Paul Duffin          ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS +
1600888a09821a98ac0680fad765217302858e70fa4Paul Duffin      // Four byte characters
1610888a09821a98ac0680fad765217302858e70fa4Paul Duffin      FOUR_BYTE_ROUNDTRIPPABLE_CHARACTERS;
1620888a09821a98ac0680fad765217302858e70fa4Paul Duffin
1630888a09821a98ac0680fad765217302858e70fa4Paul Duffin  /** Tests that round tripping of all two byte permutations work. */
1640888a09821a98ac0680fad765217302858e70fa4Paul Duffin  @GwtIncompatible("java.nio.charset.Charset")
1650888a09821a98ac0680fad765217302858e70fa4Paul Duffin  public void testIsWellFormed_1Byte() {
1660888a09821a98ac0680fad765217302858e70fa4Paul Duffin    testBytes(1, EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT);
1670888a09821a98ac0680fad765217302858e70fa4Paul Duffin  }
1680888a09821a98ac0680fad765217302858e70fa4Paul Duffin
1690888a09821a98ac0680fad765217302858e70fa4Paul Duffin  /** Tests that round tripping of all two byte permutations work. */
1700888a09821a98ac0680fad765217302858e70fa4Paul Duffin  @GwtIncompatible("java.nio.charset.Charset")
1710888a09821a98ac0680fad765217302858e70fa4Paul Duffin  public void testIsWellFormed_2Bytes() {
1720888a09821a98ac0680fad765217302858e70fa4Paul Duffin    testBytes(2, EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT);
1730888a09821a98ac0680fad765217302858e70fa4Paul Duffin  }
1740888a09821a98ac0680fad765217302858e70fa4Paul Duffin
1750888a09821a98ac0680fad765217302858e70fa4Paul Duffin  /** Tests that round tripping of all three byte permutations work. */
1760888a09821a98ac0680fad765217302858e70fa4Paul Duffin  @GwtIncompatible("java.nio.charset.Charset")
1770888a09821a98ac0680fad765217302858e70fa4Paul Duffin  public void testIsWellFormed_3Bytes() {
1780888a09821a98ac0680fad765217302858e70fa4Paul Duffin    testBytes(3, EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT);
1790888a09821a98ac0680fad765217302858e70fa4Paul Duffin  }
1800888a09821a98ac0680fad765217302858e70fa4Paul Duffin
1810888a09821a98ac0680fad765217302858e70fa4Paul Duffin  /**
1820888a09821a98ac0680fad765217302858e70fa4Paul Duffin   * Tests that round tripping of a sample of four byte permutations work.
1830888a09821a98ac0680fad765217302858e70fa4Paul Duffin   * All permutations are prohibitively expensive to test for automated runs.
1840888a09821a98ac0680fad765217302858e70fa4Paul Duffin   * This method tests specific four-byte cases.
1850888a09821a98ac0680fad765217302858e70fa4Paul Duffin   */
1860888a09821a98ac0680fad765217302858e70fa4Paul Duffin  public void testIsWellFormed_4BytesSamples() {
1870888a09821a98ac0680fad765217302858e70fa4Paul Duffin    // Valid 4 byte.
1880888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertWellFormed(0xF0, 0xA4, 0xAD, 0xA2);
1890888a09821a98ac0680fad765217302858e70fa4Paul Duffin    // Bad trailing bytes
1900888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertNotWellFormed(0xF0, 0xA4, 0xAD, 0x7F);
1910888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertNotWellFormed(0xF0, 0xA4, 0xAD, 0xC0);
1920888a09821a98ac0680fad765217302858e70fa4Paul Duffin    // Special cases for byte2
1930888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertNotWellFormed(0xF0, 0x8F, 0xAD, 0xA2);
1940888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertNotWellFormed(0xF4, 0x90, 0xAD, 0xA2);
1950888a09821a98ac0680fad765217302858e70fa4Paul Duffin  }
1960888a09821a98ac0680fad765217302858e70fa4Paul Duffin
1970888a09821a98ac0680fad765217302858e70fa4Paul Duffin  /** Tests some hard-coded test cases. */
1980888a09821a98ac0680fad765217302858e70fa4Paul Duffin  public void testSomeSequences() {
1990888a09821a98ac0680fad765217302858e70fa4Paul Duffin    // Empty
2000888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertWellFormed();
2010888a09821a98ac0680fad765217302858e70fa4Paul Duffin    // One-byte characters, including control characters
2020888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertWellFormed(0x00, 0x61, 0x62, 0x63, 0x7F); // "\u0000abc\u007f"
2030888a09821a98ac0680fad765217302858e70fa4Paul Duffin    // Two-byte characters
2040888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertWellFormed(0xC2, 0xA2, 0xC2, 0xA2); // "\u00a2\u00a2"
2050888a09821a98ac0680fad765217302858e70fa4Paul Duffin    // Three-byte characters
2060888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertWellFormed(0xc8, 0x8a, 0x63, 0xc8, 0x8a, 0x63); // "\u020ac\u020ac"
2070888a09821a98ac0680fad765217302858e70fa4Paul Duffin    // Four-byte characters
2080888a09821a98ac0680fad765217302858e70fa4Paul Duffin    // "\u024B62\u024B62"
2090888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertWellFormed(0xc9, 0x8b, 0x36, 0x32, 0xc9, 0x8b, 0x36, 0x32);
2100888a09821a98ac0680fad765217302858e70fa4Paul Duffin    // Mixed string
2110888a09821a98ac0680fad765217302858e70fa4Paul Duffin    // "a\u020ac\u00a2b\\u024B62u020acc\u00a2de\u024B62"
2120888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertWellFormed(0x61, 0xc8, 0x8a, 0x63, 0xc2, 0xa2, 0x62, 0x5c, 0x75, 0x30,
2130888a09821a98ac0680fad765217302858e70fa4Paul Duffin        0x32, 0x34, 0x42, 0x36, 0x32, 0x75, 0x30, 0x32, 0x30, 0x61, 0x63, 0x63,
2140888a09821a98ac0680fad765217302858e70fa4Paul Duffin        0xc2, 0xa2, 0x64, 0x65, 0xc9, 0x8b, 0x36, 0x32);
2150888a09821a98ac0680fad765217302858e70fa4Paul Duffin    // Not a valid string
2160888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertNotWellFormed(-1, 0, -1, 0);
2170888a09821a98ac0680fad765217302858e70fa4Paul Duffin  }
2180888a09821a98ac0680fad765217302858e70fa4Paul Duffin
2190888a09821a98ac0680fad765217302858e70fa4Paul Duffin  public void testShardsHaveExpectedRoundTrippables() {
2200888a09821a98ac0680fad765217302858e70fa4Paul Duffin    // A sanity check.
2210888a09821a98ac0680fad765217302858e70fa4Paul Duffin    long actual = 0;
2220888a09821a98ac0680fad765217302858e70fa4Paul Duffin    for (long expected : generateFourByteShardsExpectedRunnables()) {
2230888a09821a98ac0680fad765217302858e70fa4Paul Duffin      actual += expected;
2240888a09821a98ac0680fad765217302858e70fa4Paul Duffin    }
2250888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertEquals(EXPECTED_FOUR_BYTE_ROUNDTRIPPABLE_COUNT, actual);
2260888a09821a98ac0680fad765217302858e70fa4Paul Duffin  }
2270888a09821a98ac0680fad765217302858e70fa4Paul Duffin
2280888a09821a98ac0680fad765217302858e70fa4Paul Duffin  private String newString(char... chars) {
2290888a09821a98ac0680fad765217302858e70fa4Paul Duffin    return new String(chars);
2300888a09821a98ac0680fad765217302858e70fa4Paul Duffin  }
2310888a09821a98ac0680fad765217302858e70fa4Paul Duffin
2320888a09821a98ac0680fad765217302858e70fa4Paul Duffin  private byte[] toByteArray(int... bytes) {
2330888a09821a98ac0680fad765217302858e70fa4Paul Duffin    byte[] realBytes = new byte[bytes.length];
2340888a09821a98ac0680fad765217302858e70fa4Paul Duffin    for (int i = 0; i < bytes.length; i++) {
2350888a09821a98ac0680fad765217302858e70fa4Paul Duffin      realBytes[i] = (byte) bytes[i];
2360888a09821a98ac0680fad765217302858e70fa4Paul Duffin    }
2370888a09821a98ac0680fad765217302858e70fa4Paul Duffin    return realBytes;
2380888a09821a98ac0680fad765217302858e70fa4Paul Duffin  }
2390888a09821a98ac0680fad765217302858e70fa4Paul Duffin
2400888a09821a98ac0680fad765217302858e70fa4Paul Duffin  private void assertWellFormed(int... bytes) {
2410888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertTrue(Utf8.isWellFormed(toByteArray(bytes)));
2420888a09821a98ac0680fad765217302858e70fa4Paul Duffin  }
2430888a09821a98ac0680fad765217302858e70fa4Paul Duffin
2440888a09821a98ac0680fad765217302858e70fa4Paul Duffin  private void assertNotWellFormed(int... bytes) {
2450888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertFalse(Utf8.isWellFormed(toByteArray(bytes)));
2460888a09821a98ac0680fad765217302858e70fa4Paul Duffin  }
2470888a09821a98ac0680fad765217302858e70fa4Paul Duffin
2480888a09821a98ac0680fad765217302858e70fa4Paul Duffin  private static long[] generateFourByteShardsExpectedRunnables() {
2490888a09821a98ac0680fad765217302858e70fa4Paul Duffin    long[] expected = new long[128];
2500888a09821a98ac0680fad765217302858e70fa4Paul Duffin    // 0-63 are all 5300224
2510888a09821a98ac0680fad765217302858e70fa4Paul Duffin    for (int i = 0; i <= 63; i++) {
2520888a09821a98ac0680fad765217302858e70fa4Paul Duffin      expected[i] = 5300224;
2530888a09821a98ac0680fad765217302858e70fa4Paul Duffin    }
2540888a09821a98ac0680fad765217302858e70fa4Paul Duffin    // 97-111 are all 2342912
2550888a09821a98ac0680fad765217302858e70fa4Paul Duffin    for (int i = 97; i <= 111; i++) {
2560888a09821a98ac0680fad765217302858e70fa4Paul Duffin     expected[i] = 2342912;
2570888a09821a98ac0680fad765217302858e70fa4Paul Duffin    }
2580888a09821a98ac0680fad765217302858e70fa4Paul Duffin    // 113-117 are all 1048576
2590888a09821a98ac0680fad765217302858e70fa4Paul Duffin    for (int i = 113; i <= 117; i++) {
2600888a09821a98ac0680fad765217302858e70fa4Paul Duffin      expected[i] = 1048576;
2610888a09821a98ac0680fad765217302858e70fa4Paul Duffin    }
2620888a09821a98ac0680fad765217302858e70fa4Paul Duffin    // One offs
2630888a09821a98ac0680fad765217302858e70fa4Paul Duffin    expected[112] = 786432;
2640888a09821a98ac0680fad765217302858e70fa4Paul Duffin    expected[118] = 786432;
2650888a09821a98ac0680fad765217302858e70fa4Paul Duffin    expected[119] = 1048576;
2660888a09821a98ac0680fad765217302858e70fa4Paul Duffin    expected[120] = 458752;
2670888a09821a98ac0680fad765217302858e70fa4Paul Duffin    expected[121] = 524288;
2680888a09821a98ac0680fad765217302858e70fa4Paul Duffin    expected[122] = 65536;
2690888a09821a98ac0680fad765217302858e70fa4Paul Duffin    // Anything not assigned was the default 0.
2700888a09821a98ac0680fad765217302858e70fa4Paul Duffin    return expected;
2710888a09821a98ac0680fad765217302858e70fa4Paul Duffin  }
2720888a09821a98ac0680fad765217302858e70fa4Paul Duffin
2730888a09821a98ac0680fad765217302858e70fa4Paul Duffin  /**
2740888a09821a98ac0680fad765217302858e70fa4Paul Duffin   * Helper to run the loop to test all the permutations for the number of bytes
2750888a09821a98ac0680fad765217302858e70fa4Paul Duffin   * specified.
2760888a09821a98ac0680fad765217302858e70fa4Paul Duffin   *
2770888a09821a98ac0680fad765217302858e70fa4Paul Duffin   * @param numBytes the number of bytes in the byte array
2780888a09821a98ac0680fad765217302858e70fa4Paul Duffin   * @param expectedCount the expected number of roundtrippable permutations
2790888a09821a98ac0680fad765217302858e70fa4Paul Duffin   */
2800888a09821a98ac0680fad765217302858e70fa4Paul Duffin  @GwtIncompatible("java.nio.charset.Charset")
2810888a09821a98ac0680fad765217302858e70fa4Paul Duffin  private static void testBytes(int numBytes, long expectedCount) {
2820888a09821a98ac0680fad765217302858e70fa4Paul Duffin    testBytes(numBytes, expectedCount, 0, -1);
2830888a09821a98ac0680fad765217302858e70fa4Paul Duffin  }
2840888a09821a98ac0680fad765217302858e70fa4Paul Duffin
2850888a09821a98ac0680fad765217302858e70fa4Paul Duffin  /**
2860888a09821a98ac0680fad765217302858e70fa4Paul Duffin   * Helper to run the loop to test all the permutations for the number of bytes
2870888a09821a98ac0680fad765217302858e70fa4Paul Duffin   * specified. This overload is useful for debugging to get the loop to start
2880888a09821a98ac0680fad765217302858e70fa4Paul Duffin   * at a certain character.
2890888a09821a98ac0680fad765217302858e70fa4Paul Duffin   *
2900888a09821a98ac0680fad765217302858e70fa4Paul Duffin   * @param numBytes the number of bytes in the byte array
2910888a09821a98ac0680fad765217302858e70fa4Paul Duffin   * @param expectedCount the expected number of roundtrippable permutations
2920888a09821a98ac0680fad765217302858e70fa4Paul Duffin   * @param start the starting bytes encoded as a long as big-endian
2930888a09821a98ac0680fad765217302858e70fa4Paul Duffin   * @param lim the limit of bytes to process encoded as a long as big-endian,
2940888a09821a98ac0680fad765217302858e70fa4Paul Duffin   *     or -1 to mean the max limit for numBytes
2950888a09821a98ac0680fad765217302858e70fa4Paul Duffin   */
2960888a09821a98ac0680fad765217302858e70fa4Paul Duffin  @GwtIncompatible("java.nio.charset.Charset")
2970888a09821a98ac0680fad765217302858e70fa4Paul Duffin  private static void testBytes(int numBytes, long expectedCount, long start,
2980888a09821a98ac0680fad765217302858e70fa4Paul Duffin      long lim) {
2990888a09821a98ac0680fad765217302858e70fa4Paul Duffin    byte[] bytes = new byte[numBytes];
3000888a09821a98ac0680fad765217302858e70fa4Paul Duffin    if (lim == -1) {
3010888a09821a98ac0680fad765217302858e70fa4Paul Duffin      lim = 1L << (numBytes * 8);
3020888a09821a98ac0680fad765217302858e70fa4Paul Duffin    }
3030888a09821a98ac0680fad765217302858e70fa4Paul Duffin    long countRoundTripped = 0;
3040888a09821a98ac0680fad765217302858e70fa4Paul Duffin    for (long byteChar = start; byteChar < lim; byteChar++) {
3050888a09821a98ac0680fad765217302858e70fa4Paul Duffin      long tmpByteChar = byteChar;
3060888a09821a98ac0680fad765217302858e70fa4Paul Duffin      for (int i = 0; i < numBytes; i++) {
3070888a09821a98ac0680fad765217302858e70fa4Paul Duffin        bytes[bytes.length - i - 1] = (byte) tmpByteChar;
3080888a09821a98ac0680fad765217302858e70fa4Paul Duffin        tmpByteChar = tmpByteChar >> 8;
3090888a09821a98ac0680fad765217302858e70fa4Paul Duffin      }
3100888a09821a98ac0680fad765217302858e70fa4Paul Duffin      boolean isRoundTrippable = Utf8.isWellFormed(bytes);
3110888a09821a98ac0680fad765217302858e70fa4Paul Duffin      assertEquals(isRoundTrippable, Utf8.isWellFormed(bytes, 0, numBytes));
3123ecfa412eddc4b084663f38d562537b86b9734d5Paul Duffin      String s = new String(bytes, Charsets.UTF_8);
3133ecfa412eddc4b084663f38d562537b86b9734d5Paul Duffin      byte[] bytesReencoded = s.getBytes(Charsets.UTF_8);
3143ecfa412eddc4b084663f38d562537b86b9734d5Paul Duffin      boolean bytesEqual = Arrays.equals(bytes, bytesReencoded);
3150888a09821a98ac0680fad765217302858e70fa4Paul Duffin
3160888a09821a98ac0680fad765217302858e70fa4Paul Duffin      if (bytesEqual != isRoundTrippable) {
3170888a09821a98ac0680fad765217302858e70fa4Paul Duffin        fail();
3180888a09821a98ac0680fad765217302858e70fa4Paul Duffin      }
3190888a09821a98ac0680fad765217302858e70fa4Paul Duffin      if (isRoundTrippable) {
3200888a09821a98ac0680fad765217302858e70fa4Paul Duffin        countRoundTripped++;
3210888a09821a98ac0680fad765217302858e70fa4Paul Duffin      }
3220888a09821a98ac0680fad765217302858e70fa4Paul Duffin    }
3230888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertEquals(expectedCount, countRoundTripped);
3240888a09821a98ac0680fad765217302858e70fa4Paul Duffin  }
3250888a09821a98ac0680fad765217302858e70fa4Paul Duffin}
326