1// Copyright (c) 2009 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4//
5// Unit test compact language detector
6//
7// Small version, covering these languages only:
8// Arabic Bulgarian Catalan Chinese ChineseT Croatian Czech Danish Dutch
9// English Estonian Finnish French German Greek Hebrew Hindi Hungarian
10// Icelandic Indonesian Italian Japanese Korean Latvian Lithuanian Norwegian
11// Polish Portuguese Romanian Russian Serbian Slovak Slovenian Spanish
12// Swedish Tagalog Thai Turkish Ukrainian Vietnamese
13
14// Additional single-language scripts recognized for free:
15// Armenian Cherokee Dhivehi Georgian Gujarati Inuktitut Kannada Khmer
16// Laothian Malayalam Oriya Punjabi Sinhalese Syriac Telugu Tamil
17//
18
19#include <string>
20#include "testing/gtest/include/gtest/gtest.h"
21#include "encodings/compact_lang_det/compact_lang_det.h"
22#include "encodings/compact_lang_det/ext_lang_enc.h"
23#include "encodings/compact_lang_det/unittest_data.h"
24
25#include "encodings/compact_lang_det/win/cld_commandlineflags.h"
26#include "encodings/compact_lang_det/win/cld_google.h"
27
28// Test strings.
29// These are all included here to make the unit test self-contained.
30const char* kTeststr_en =
31  "confiscation of goods is assigned as the penalty part most of the courts "
32  "consist of members and when it is necessary to bring public cases before a "
33  "jury of members two courts combine for the purpose the most important cases "
34  "of all are brought jurors or";
35
36
37// UTF8 constants. Use a UTF-8 aware editor for this file
38const char* kTeststr_ks =
39  "\xe0\xa4\xa8\xe0\xa5\x87\xe0\xa4\xaa\xe0\xa4\xbe\xe0\xa4\xb2 \xe0\xa4\x8f"
40  "\xe0\xa4\xb8\xe0\xa4\xbf\xe0\xa4\xaf\xe0\xa4\xbe \xe0\xa4\xae\xe0\xa4\x82"
41  "\xe0\xa4\x9c \xe0\xa4\x85\xe0\xa4\x96 \xe0\xa4\xae\xe0\xa5\x81\xe0\xa4\xb2"
42  "\xe0\xa5\x81\xe0\xa4\x95 \xe0\xa4\xb0\xe0\xa4\xbe\xe0\xa4\x9c\xe0\xa4\xa7"
43  "\xe0\xa4\xbe\xe0\xa4\xa8\xe0\xa5\x80 \xe0\xa4\x95\xe0\xa4\xbe\xe0\xa4\xa0"
44  "\xe0\xa4\xae\xe0\xa4\xbe\xe0\xa4\xa1\xe0\xa5\x8c\xe0\xa4\x82 \xe0\xa4\xa8"
45  "\xe0\xa5\x87\xe0\xa4\xaa\xe0\xa4\xbe\xe0\xa4\xb2 \xe0\xa4\x85\xe0\xa4\xa7"
46  "\xe0\xa4\xbf\xe0\xa4\xb0\xe0\xa4\xbe\xe0\xa4\x9c\xe0\xa5\x8d\xe0\xa4\xaf "
47  "\xe0\xa4\xaa\xe0\xa5\x87\xe0\xa4\xb0\xe0\xa5\x87\xe0\xa4\x97\xe0\xa5\x8d"
48  "\xe0\xa4\xb5\xe0\xa4\xbe\xe0\xa4\xaf \xe0\xa4\xa6\xe0\xa4\x95\xe0\xa5\x8d"
49  "\xe0\xa4\xb7\xe0\xa4\xbf\xe0\xa4\xa3 \xe0\xa4\x85\xe0\xa4\xae\xe0\xa5\x87"
50  "\xe0\xa4\xb0\xe0\xa4\xbf\xe0\xa4\x95\xe0\xa4\xbe \xe0\xa4\xae\xe0\xa4\xb9"
51  "\xe0\xa4\xbe\xe0\xa4\xa6\xe0\xa5\x8d\xe0\xa4\xb5\xe0\xa5\x80\xe0\xa4\xaa"
52  "\xe0\xa5\x87 \xe0\xa4\xae\xe0\xa4\xa7\xe0\xa5\x8d \xe0\xa4\xaf\xe0\xa4\x95"
53  "\xe0\xa5\x8d\xe0\xa4\xb7\xe0\xa5\x87\xe0\xa4\xa4\xe0\xa5\x8d\xe0\xa4\xb0"
54  "\xe0\xa5\x87 \xe0\xa4\x8f\xe0\xa4\x95 \xe0\xa4\xa6\xe0\xa5\x87\xe0\xa4\xb6 "
55  "\xe0\xa4\x85\xe0\xa4\xb8\xe0\xa5\x8d \xe0\xa4\xa4\xe0\xa4\xbf \xe0\xa4\xab"
56  "\xe0\xa4\xa3\xe0\xa5\x80\xe0\xa4\xb6\xe0\xa5\x8d\xe0\xa4\xb5\xe0\xa4\xb0 "
57  "\xe0\xa4\xa8\xe0\xa4\xbe\xe0\xa4\xa5 \xe0\xa4\xb0\xe0\xa5\x87\xe0\xa4\xa3"
58  "\xe0\xa5\x81 \xe0\xa4\xab\xe0\xa4\xbf\xe0\xa4\x9c\xe0\xa5\x80 \xe0\xa4\x9b"
59  "\xe0\xa5\x81 \xe0\xa4\xa6\xe0\xa4\x95\xe0\xa5\x8d\xe0\xa4\xb7\xe0\xa4\xbf"
60  "\xe0\xa4\xa3 \xe0\xa4\xaa\xe0\xa5\x8d\xe0\xa4\xb0\xe0\xa4\xb6\xe0\xa4\xbe"
61  "\xe0\xa4\xa8\xe0\xa5\x8d \xe0\xa4\xa4 \xe0\xa4\xae\xe0\xa4\xb9\xe0\xa4\xbe"
62  "\xe0\xa4\xb8\xe0\xa4\xbe\xe0\xa4\x97\xe0\xa4\xb0 \xe0\xa4\xae\xe0\xa4\x82"
63  "\xe0\xa4\x9c \xe0\xa4\x85\xe0\xa4\x96 \xe0\xa4\xa6\xe0\xa5\x87\xe0\xa4\xb6 "
64  "\xe0\xa4\xac\xe0\xa4\xb9\xe0\xa4\xbe\xe0\xa4\xae\xe0\xa4\xbe\xe0\xa4\xb8 "
65  "\xe0\xa4\x9b\xe0\xa5\x81 \xe0\xa4\x95\xe0\xa5\x87\xe0\xa4\xb0\xe0\xa5\x87"
66  "\xe0\xa4\xac\xe0\xa4\xbf\xe0\xa4\xaf\xe0\xa4\xa8 \xe0\xa4\xae\xe0\xa4\x82"
67  "\xe0\xa4\x9c \xe0\xa4\x85\xe0\xa4\x96 \xe0\xa4\xae\xe0\xa5\x81\xe0\xa4\xb2"
68  "\xe0\xa5\x81\xe0\xa4\x96 \xe0\xa4\xb0\xe0\xa4\xbe\xe0\xa4\x9c\xe0\xa4\xa7"
69  "\xe0\xa4\xbe\xe0\xa4\xa8\xe0\xa5\x80 \xe0\xa4\xa8\xe0\xa4\xb8\xe0\xa5\x8c "
70  "\xe0\xa4\xb8\xe0\xa4\xae\xe0\xa5\x8d \xe0\xa4\xac\xe0\xa4\xa6\xe0\xa5\x8d"
71  "\xe0\xa4\x98 \xe0\xa4\xb5\xe0\xa4\xbf\xe0\xa4\xb7\xe0\xa4\xaf \xe0\xa4\xac"
72  "\xe0\xa5\x81\xe0\xa4\xb0\xe0\xa5\x81\xe0\xa4\x82\xe0\xa4\xa1\xe0\xa5\x80 "
73  "\xe0\xa4\x85\xe0\xa4\xab\xe0\xa5\x8d\xe0\xa4\xb0\xe0\xa5\x80\xe0\xa4\x95"
74  "\xe0\xa4\xbe \xe0\xa4\xae\xe0\xa4\xb9\xe0\xa4\xbe\xe0\xa4\xa6\xe0\xa5\x8d"
75  "\xe0\xa4\xb5\xe0\xa5\x80\xe0\xa4\xaa\xe0\xa5\x87 \xe0\xa4\xae\xe0\xa4\xa7"
76  "\xe0\xa5\x8d \xe0\xa4\xaf\xe0\xa4\x95\xe0\xa5\x8d\xe0\xa4\xb7\xe0\xa5\x87"
77  "\xe0\xa4\xa4\xe0\xa5\x8d\xe0\xa4\xb0\xe0\xa5\x87 \xe0\xa4\xa6\xe0\xa5\x87"
78  "\xe0\xa4\xb6 \xe0\xa4\x85\xe0\xa4\xb8\xe0\xa5\x8d \xe0\xa4\xa4\xe0\xa4\xbf "
79  "\xe0\xa4\xb8\xe0\xa4\xae\xe0\xa5\x8d \xe0\xa4\xac\xe0\xa4\xa6\xe0\xa5\x8d"
80  "\xe0\xa4\x98 \xe0\xa4\xb5\xe0\xa4\xbf\xe0\xa4\xb7\xe0\xa4\xaf";
81
82// Test strings. This will be squeezed because of the repetitions.
83const char* kTeststr_kr_repetitions =
84    "<meta charset=\"utf-8\" />\n\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b"
85    "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97"
86    "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a"
87    "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5"
88    "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98"
89    "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6"
90    "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec"
91    "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec"
92    "\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d"
93    "\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93"
94    "\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96"
95    "\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1"
96    "\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f"
97    "\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad"
98    "\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96"
99    "\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9"
100    "\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6"
101    "\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8"
102    "\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82"
103    "\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99"
104    "\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac"
105    "\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b"
106    "\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8"
107    "\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb"
108    "\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea"
109    "\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5"
110    "\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9"
111    "\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3"
112    "\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b"
113    "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97"
114    "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a"
115    "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5"
116    "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98"
117    "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6"
118    "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec"
119    "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec"
120    "\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d"
121    "\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93"
122    "\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96"
123    "\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1"
124    "\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f"
125    "\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad"
126    "\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96"
127    "\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9"
128    "\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6"
129    "\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8"
130    "\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82"
131    "\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99"
132    "\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac"
133    "\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b"
134    "\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8"
135    "\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb"
136    "\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea"
137    "\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5"
138    "\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9"
139    "\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3"
140    "\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b"
141    "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97"
142    "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a"
143    "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5"
144    "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98"
145    "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6"
146    "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec"
147    "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec"
148    "\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d"
149    "\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93"
150    "\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96"
151    "\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1"
152    "\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f"
153    "\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad"
154    "\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96"
155    "\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9"
156    "\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6"
157    "\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8"
158    "\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82"
159    "\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99"
160    "\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac"
161    "\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b"
162    "\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8"
163    "\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb"
164    "\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea"
165    "\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5"
166    "\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9"
167    "\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3"
168    "\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b"
169    "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97"
170    "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a"
171    "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5"
172    "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98"
173    "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6"
174    "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec"
175    "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec"
176    "\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d"
177    "\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93"
178    "\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96"
179    "\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1"
180    "\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f"
181    "\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad"
182    "\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96"
183    "\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9"
184    "\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6"
185    "\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8"
186    "\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82"
187    "\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99"
188    "\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac"
189    "\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b"
190    "\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8"
191    "\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb"
192    "\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea"
193    "\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5"
194    "\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9"
195    "\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3"
196    "\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b"
197    "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97"
198    "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a"
199    "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5"
200    "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98"
201    "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6"
202    "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec"
203    "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec"
204    "\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d"
205    "\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93"
206    "\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96"
207    "\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1"
208    "\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f"
209    "\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad"
210    "\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96"
211    "\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9"
212    "\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6"
213    "\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8"
214    "\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82"
215    "\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99"
216    "\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac"
217    "\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b"
218    "\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8"
219    "\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb"
220    "\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea"
221    "\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5"
222    "\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9"
223    "\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3"
224    "\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b"
225    "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97"
226    "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a"
227    "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5"
228    "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98"
229    "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6"
230    "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec"
231    "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec"
232    "\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d"
233    "\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93"
234    "\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96"
235    "\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1"
236    "\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f"
237    "\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad"
238    "\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96"
239    "\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9"
240    "\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6"
241    "\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8"
242    "\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82"
243    "\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99"
244    "\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac"
245    "\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b"
246    "\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8"
247    "\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb"
248    "\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea"
249    "\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5"
250    "\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9"
251    "\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3"
252    "\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b"
253    "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97"
254    "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a"
255    "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5"
256    "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98"
257    "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6"
258    "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec"
259    "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec"
260    "\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d"
261    "\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93"
262    "\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96"
263    "\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1"
264    "\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f"
265    "\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad"
266    "\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96"
267    "\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9"
268    "\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6"
269    "\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8"
270    "\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82"
271    "\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99"
272    "\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac"
273    "\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b"
274    "\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8"
275    "\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb"
276    "\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea"
277    "\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5"
278    "\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9"
279    "\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3"
280    "\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b"
281    "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97"
282    "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a"
283    "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5"
284    "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98"
285    "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6"
286    "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec"
287    "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec"
288    "\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d"
289    "\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93"
290    "\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96"
291    "\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1"
292    "\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f"
293    "\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad"
294    "\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96"
295    "\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9"
296    "\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6"
297    "\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8"
298    "\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82"
299    "\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99"
300    "\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac"
301    "\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b"
302    "\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8"
303    "\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb"
304    "\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea"
305    "\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5"
306    "\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9"
307    "\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3"
308    "\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b"
309    "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97"
310    "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a"
311    "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5"
312    "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98"
313    "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6"
314    "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec"
315    "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec"
316    "\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d"
317    "\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93"
318    "\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96"
319    "\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1"
320    "\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f"
321    "\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad"
322    "\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96"
323    "\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9"
324    "\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6"
325    "\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8"
326    "\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82"
327    "\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99"
328    "\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac"
329    "\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b"
330    "\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8"
331    "\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb"
332    "\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea"
333    "\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5"
334    "\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9"
335    "\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3"
336    "\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b"
337    "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97"
338    "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a"
339    "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5"
340    "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98"
341    "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6"
342    "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec"
343    "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec"
344    "\x9b\x98\naaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
345    "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
346    "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
347    "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
348    "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
349    "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
350    "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
351    "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
352    "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
353    "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
354    "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
355    "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
356    "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
357    "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
358    "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
359    "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
360    "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
361    "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
362    "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
363    "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
364    "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
365    "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
366    "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
367    "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
368    "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
369    "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
370    "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
371    "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
372    "aaaaaaaaaaaaa";
373
374
375// const char* kTeststr_ks =
376//  \u0928\u0947\u092A\u093E\u0932\u0020\u090F\u0938\u093F\u092F\u093E\u0020
377//  \u092E\u0902\u091C\u0020\u0905\u0916\u0020\u092E\u0941\u0932\u0941\u0915
378//  \u0020\u0930\u093E\u091C\u0927\u093E\u0928\u0940\u0020\u0915\u093E\u0920
379//  \u092E\u093E\u0921\u094C\u0902\u0020\u0928\u0947\u092A\u093E\u0932\u0020
380//  \u0905\u0927\u093F\u0930\u093E\u091C\u094D\u092F\u0020\u092A\u0947\u0930
381//  \u0947\u0917\u094D\u0935\u093E\u092F\u0020
382//  \u0926\u0915\u094D\u0937\u093F\u0923\u0020\u0905\u092E\u0947\u0930\u093F
383//  \u0915\u093E\u0020\u092E\u0939\u093E\u0926\u094D\u0935\u0940\u092A\u0947
384//  \u0020\u092E\u0927\u094D\u0020\u092F\u0915\u094D\u0937\u0947\u0924\u094D
385//  \u0930\u0947\u0020\u090F\u0915\u0020\u0926\u0947\u0936\u0020\u0905\u0938
386//  \u094D\u0020\u0924\u093F\u0020\u092B\u0923\u0940\u0936\u094D\u0935\u0930
387//  \u0020\u0928\u093E\u0925\u0020\u0930\u0947\u0923\u0941\u0020
388//  \u092B\u093F\u091C\u0940\u0020\u091B\u0941\u0020\u0926\u0915\u094D\u0937
389//  \u093F\u0923\u0020\u092A\u094D\u0930\u0936\u093E\u0928\u094D\u0020\u0924
390//  \u0020\u092E\u0939\u093E\u0938\u093E\u0917\u0930\u0020\u092E\u0902\u091C
391//  \u0020\u0905\u0916\u0020\u0926\u0947\u0936\u0020\u092C\u0939\u093E\u092E
392//  \u093E\u0938\u0020\u091B\u0941\u0020\u0915\u0947\u0930\u0947\u092C\u093F
393//  \u092F\u0928\u0020\u092E\u0902\u091C\u0020
394//  \u0905\u0916\u0020\u092E\u0941\u0932\u0941\u0916\u0020\u0930\u093E\u091C
395//  \u0927\u093E\u0928\u0940\u0020\u0928\u0938\u094C\u0020\u0938\u092E\u094D
396//  \u0020\u092C\u0926\u094D\u0918\u0020\u0935\u093F\u0937\u092F\u0020\u092C
397//  \u0941\u0930\u0941\u0902\u0921\u0940\u0020\u0905\u092B\u094D\u0930\u0940
398//  \u0915\u093E\u0020\u092E\u0939\u093E\u0926\u094D\u0935\u0940\u092A\u0947
399//  \u0020\u092E\u0927\u094D\u0020
400//  \u092F\u0915\u094D\u0937\u0947\u0924\u094D\u0930\u0947\u0020\u0926\u0947
401//  \u0936\u0020\u0905\u0938\u094D\u0020\u0924\u093F\u0020\u0938\u092E\u094D
402//  \u0020\u092C\u0926\u094D\u0918\u0020\u0935\u093F\u0937\u092F
403
404
405namespace {
406
407class CompactLangDetTest : public testing::Test {
408 protected:
409  // Objects declared here can be used by all tests in the test case for Foo.
410
411  // Detect language of plaintext src
412  Language TestCompactLangDetPlain(const char* src) {
413    bool is_plain_text = true;
414    bool is_reliable;
415
416    Language lang = CompactLangDet::DetectLanguage(NULL, src, strlen(src),
417                                                   is_plain_text,
418                                                   &is_reliable);
419    return lang;
420  }
421
422
423  // Detect extended language of plaintext src
424  Language TestExtCompactLangDetPlain(const char* src) {
425    bool is_plain_text = true;
426    Language language3[3];
427    int percent3[3];
428    int text_bytes;
429    bool is_reliable;
430
431    Language lang =  CompactLangDet::ExtDetectLanguageSummary(NULL,
432                            src, strlen(src),
433                            is_plain_text,
434                            language3,
435                            percent3,
436                            &text_bytes,
437                            &is_reliable);
438    return lang;
439  }
440};    // end class CompactLangDetTest
441
442
443TEST_F(CompactLangDetTest, EasyTests) {
444  EXPECT_EQ(ENGLISH, TestCompactLangDetPlain(kTeststr_en));
445  EXPECT_EQ(HINDI, TestCompactLangDetPlain(kTeststr_hi_Deva));
446}
447
448
449TEST_F(CompactLangDetTest, FullTests) {
450  // Only the tests reflecting the currently used detection tables are enabled.
451
452  // Do all the languages in all their scripts
453  //// EXPECT_EQ(AFAR, TestCompactLangDetPlain(kTeststr_aa_Latn));
454  //// EXPECT_EQ(ABKHAZIAN, TestCompactLangDetPlain(kTeststr_ab_Cyrl));
455  EXPECT_EQ(AFRIKAANS, TestCompactLangDetPlain(kTeststr_af_Latn));
456  //// EXPECT_EQ(AMHARIC, TestCompactLangDetPlain(kTeststr_am_Ethi));
457  EXPECT_EQ(ARABIC, TestCompactLangDetPlain(kTeststr_ar_Arab));
458  //// EXPECT_EQ(ASSAMESE, TestCompactLangDetPlain(kTeststr_as_Beng));
459  //// EXPECT_EQ(AYMARA, TestCompactLangDetPlain(kTeststr_ay_Latn));
460  // AZERBAIJANI Arab & Cyrl removed 2008.05.27. Just AZERBAIJANI Latn left
461  //  EXPECT_EQ(AZERBAIJANI, TestCompactLangDetPlain(kTeststr_az_Arab));
462  //  Missing data: az-Cyrl
463  //// EXPECT_EQ(AZERBAIJANI, TestCompactLangDetPlain(kTeststr_az_Latn));
464
465  //// EXPECT_EQ(BASHKIR, TestCompactLangDetPlain(kTeststr_ba_Cyrl));
466  EXPECT_EQ(BELARUSIAN, TestCompactLangDetPlain(kTeststr_be_Cyrl));
467  EXPECT_EQ(BULGARIAN, TestCompactLangDetPlain(kTeststr_bg_Cyrl));
468  //// EXPECT_EQ(BIHARI, TestCompactLangDetPlain(kTeststr_bh_Deva));
469  //// EXPECT_EQ(BISLAMA, TestCompactLangDetPlain(kTeststr_bi_Latn));
470  //// EXPECT_EQ(BENGALI, TestCompactLangDetPlain(kTeststr_bn_Beng));
471
472  //// EXPECT_EQ(TIBETAN, TestCompactLangDetPlain(kTeststr_bo_Tibt));
473  //// EXPECT_EQ(BRETON, TestCompactLangDetPlain(kTeststr_br_Latn));
474  EXPECT_EQ(SERBIAN, TestCompactLangDetPlain(kTeststr_bs_Cyrl));    // NOTE: Not BOSNIAN
475  //// EXPECT_EQ(CROATIAN, TestCompactLangDetPlain(kTeststr_bs_Latn));   // NOTE: Not BOSNIAN
476
477  EXPECT_EQ(CATALAN, TestCompactLangDetPlain(kTeststr_ca_Latn));
478  EXPECT_EQ(CHEROKEE, TestCompactLangDetPlain(kTeststr_chr_Cher));
479  //// EXPECT_EQ(CORSICAN, TestCompactLangDetPlain(kTeststr_co_Latn));
480    // No CREOLES_AND_PIDGINS_ENGLISH_BASED
481    // No CREOLES_AND_PIDGINS_FRENCH_BASED
482    // No CREOLES_AND_PIDGINS_OTHER
483    // No CREOLES_AND_PIDGINS_PORTUGUESE_BASED
484  EXPECT_EQ(CZECH, TestCompactLangDetPlain(kTeststr_cs_Latn));
485  EXPECT_EQ(WELSH, TestCompactLangDetPlain(kTeststr_cy_Latn));
486
487  EXPECT_EQ(DANISH, TestCompactLangDetPlain(kTeststr_da_Latn));
488  EXPECT_EQ(GERMAN, TestCompactLangDetPlain(kTeststr_de_Latn));
489  EXPECT_EQ(DHIVEHI, TestCompactLangDetPlain(kTeststr_dv_Thaa));
490  //// EXPECT_EQ(DZONGKHA, TestCompactLangDetPlain(kTeststr_dz_Tibt));
491
492  EXPECT_EQ(GREEK, TestCompactLangDetPlain(kTeststr_el_Grek));
493  EXPECT_EQ(ENGLISH, TestCompactLangDetPlain(kTeststr_en_Latn));
494  //// EXPECT_EQ(ESPERANTO, TestCompactLangDetPlain(kTeststr_eo_Latn));
495  EXPECT_EQ(SPANISH, TestCompactLangDetPlain(kTeststr_es_Latn));
496  EXPECT_EQ(ESTONIAN, TestCompactLangDetPlain(kTeststr_et_Latn));
497  //// EXPECT_EQ(BASQUE, TestCompactLangDetPlain(kTeststr_eu_Latn));
498
499  EXPECT_EQ(PERSIAN, TestCompactLangDetPlain(kTeststr_fa_Arab));
500  EXPECT_EQ(FINNISH, TestCompactLangDetPlain(kTeststr_fi_Latn));
501  //// EXPECT_EQ(FIJIAN, TestCompactLangDetPlain(kTeststr_fj_Latn));
502  //// EXPECT_EQ(FAROESE, TestCompactLangDetPlain(kTeststr_fo_Latn));
503  EXPECT_EQ(FRENCH, TestCompactLangDetPlain(kTeststr_fr_Latn));
504  //// EXPECT_EQ(FRISIAN, TestCompactLangDetPlain(kTeststr_fy_Latn));
505
506  EXPECT_EQ(IRISH, TestCompactLangDetPlain(kTeststr_ga_Latn));
507  //// EXPECT_EQ(SCOTS_GAELIC, TestCompactLangDetPlain(kTeststr_gd_Latn));
508  //// EXPECT_EQ(GALICIAN, TestCompactLangDetPlain(kTeststr_gl_Latn));
509  //// EXPECT_EQ(GUARANI, TestCompactLangDetPlain(kTeststr_gn_Latn));
510  EXPECT_EQ(GUJARATI, TestCompactLangDetPlain(kTeststr_gu_Gujr));
511  //// EXPECT_EQ(MANX, TestCompactLangDetPlain(kTeststr_gv_Latn));
512
513 ////  EXPECT_EQ(HAUSA, TestCompactLangDetPlain(kTeststr_ha_Latn));
514  EXPECT_EQ(HINDI, TestCompactLangDetPlain(kTeststr_hi_Deva));
515  EXPECT_EQ(CROATIAN, TestCompactLangDetPlain(kTeststr_hr_Latn));     // NOTE: now CROATIAN
516  //// EXPECT_EQ(HAITIAN_CREOLE, TestCompactLangDetPlain(kTeststr_ht_Latn));
517  EXPECT_EQ(HUNGARIAN, TestCompactLangDetPlain(kTeststr_hu_Latn));
518  EXPECT_EQ(ARMENIAN, TestCompactLangDetPlain(kTeststr_hy_Armn));
519
520  //// EXPECT_EQ(INTERLINGUA, TestCompactLangDetPlain(kTeststr_ia_Latn));
521  EXPECT_EQ(MALAY, TestCompactLangDetPlain(kTeststr_id_Latn));
522  //// EXPECT_EQ(INTERLINGUE, TestCompactLangDetPlain(kTeststr_ie_Latn));
523  //// EXPECT_EQ(INUPIAK, TestCompactLangDetPlain(kTeststr_ik_Latn));
524  EXPECT_EQ(ICELANDIC, TestCompactLangDetPlain(kTeststr_is_Latn));
525  EXPECT_EQ(ITALIAN, TestCompactLangDetPlain(kTeststr_it_Latn));
526  EXPECT_EQ(INUKTITUT, TestCompactLangDetPlain(kTeststr_iu_Cans));
527  EXPECT_EQ(HEBREW, TestCompactLangDetPlain(kTeststr_iw_Hebr));
528
529  EXPECT_EQ(JAPANESE, TestCompactLangDetPlain(kTeststr_ja_Hani));
530  //// EXPECT_EQ(JAVANESE, TestCompactLangDetPlain(kTeststr_jw_Latn));
531
532  EXPECT_EQ(GEORGIAN, TestCompactLangDetPlain(kTeststr_ka_Geor));
533  //// EXPECT_EQ(KHASI, TestCompactLangDetPlain(kTeststr_kha_Latn));
534  //// EXPECT_EQ(KAZAKH, TestCompactLangDetPlain(kTeststr_kk_Arab));
535  //// EXPECT_EQ(KAZAKH, TestCompactLangDetPlain(kTeststr_kk_Cyrl));
536  //// EXPECT_EQ(KAZAKH, TestCompactLangDetPlain(kTeststr_kk_Latn));
537  //// EXPECT_EQ(GREENLANDIC, TestCompactLangDetPlain(kTeststr_kl_Latn));
538  EXPECT_EQ(KHMER, TestCompactLangDetPlain(kTeststr_km_Khmr));
539  EXPECT_EQ(KANNADA, TestCompactLangDetPlain(kTeststr_kn_Knda));
540  EXPECT_EQ(KOREAN, TestCompactLangDetPlain(kTeststr_ko_Hani));
541  //// EXPECT_EQ(KASHMIRI, TestCompactLangDetPlain(kTeststr_ks_Deva));
542  // KURDISH Latn removed 2008.05.27. Just KURDISH Arab left
543 ////  EXPECT_EQ(KURDISH, TestCompactLangDetPlain(kTeststr_ku_Arab));
544  //  EXPECT_EQ(KURDISH, TestCompactLangDetPlain(kTeststr_ku_Latn));
545  //// EXPECT_EQ(KYRGYZ, TestCompactLangDetPlain(kTeststr_ky_Arab));
546  //// EXPECT_EQ(KYRGYZ, TestCompactLangDetPlain(kTeststr_ky_Cyrl));
547
548  //// EXPECT_EQ(LATIN, TestCompactLangDetPlain(kTeststr_la_Latn));
549  //// EXPECT_EQ(LUXEMBOURGISH, TestCompactLangDetPlain(kTeststr_lb_Latn));
550  //// EXPECT_EQ(GANDA, TestCompactLangDetPlain(kTeststr_lg_Latn));
551  //// EXPECT_EQ(LINGALA, TestCompactLangDetPlain(kTeststr_ln_Latn));
552  EXPECT_EQ(LAOTHIAN, TestCompactLangDetPlain(kTeststr_lo_Laoo));
553  EXPECT_EQ(LITHUANIAN, TestCompactLangDetPlain(kTeststr_lt_Latn));
554  EXPECT_EQ(LATVIAN, TestCompactLangDetPlain(kTeststr_lv_Latn));
555
556  //// EXPECT_EQ(MALAGASY, TestCompactLangDetPlain(kTeststr_mg_Latn));
557  //// EXPECT_EQ(MAORI, TestCompactLangDetPlain(kTeststr_mi_Latn));
558  EXPECT_EQ(MACEDONIAN, TestCompactLangDetPlain(kTeststr_mk_Cyrl));
559  EXPECT_EQ(MALAYALAM, TestCompactLangDetPlain(kTeststr_ml_Mlym));
560  //// EXPECT_EQ(MONGOLIAN, TestCompactLangDetPlain(kTeststr_mn_Cyrl));
561  //// EXPECT_EQ(MOLDAVIAN, TestCompactLangDetPlain(kTeststr_mo_Cyrl));
562  //// EXPECT_EQ(MARATHI, TestCompactLangDetPlain(kTeststr_mr_Deva));
563  EXPECT_EQ(MALAY, TestCompactLangDetPlain(kTeststr_ms_Latn));
564  // EXPECT_EQ(MALAY, TestCompactLangDetPlain(kTeststr_ms_Latn2));
565  EXPECT_EQ(MALAY, TestCompactLangDetPlain(kTeststr_ms_Latn3));
566  //// EXPECT_EQ(MALTESE, TestCompactLangDetPlain(kTeststr_mt_Latn));
567  //// EXPECT_EQ(BURMESE, TestCompactLangDetPlain(kTeststr_my_Latn));
568  //// EXPECT_EQ(BURMESE, TestCompactLangDetPlain(kTeststr_my_Mymr));
569
570  //// EXPECT_EQ(NAURU, TestCompactLangDetPlain(kTeststr_na_Latn));
571  //// EXPECT_EQ(NEPALI, TestCompactLangDetPlain(kTeststr_ne_Deva));
572  EXPECT_EQ(DUTCH, TestCompactLangDetPlain(kTeststr_nl_Latn));
573  //// EXPECT_EQ(NORWEGIAN_N, TestCompactLangDetPlain(kTeststr_nn_Latn));
574  EXPECT_EQ(NORWEGIAN, TestCompactLangDetPlain(kTeststr_no_Latn));
575
576  //// EXPECT_EQ(OCCITAN, TestCompactLangDetPlain(kTeststr_oc_Latn));
577  //// EXPECT_EQ(OROMO, TestCompactLangDetPlain(kTeststr_om_Latn));
578  EXPECT_EQ(ORIYA, TestCompactLangDetPlain(kTeststr_or_Orya));
579
580  EXPECT_EQ(PUNJABI, TestCompactLangDetPlain(kTeststr_pa_Guru));
581  EXPECT_EQ(POLISH, TestCompactLangDetPlain(kTeststr_pl_Latn));
582  //// EXPECT_EQ(PASHTO, TestCompactLangDetPlain(kTeststr_ps_Arab));
583  EXPECT_EQ(PORTUGUESE, TestCompactLangDetPlain(kTeststr_pt_BR));     // NOTE: not PORTUGUESE_B
584                                                                      // nor PORTUGUESE_P
585
586  //// EXPECT_EQ(QUECHUA, TestCompactLangDetPlain(kTeststr_qu_Latn));
587
588  //// EXPECT_EQ(RHAETO_ROMANCE, TestCompactLangDetPlain(kTeststr_rm_Latn));
589  //// EXPECT_EQ(RUNDI, TestCompactLangDetPlain(kTeststr_rn_Latn));
590  EXPECT_EQ(ROMANIAN, TestCompactLangDetPlain(kTeststr_ro_Latn));
591  EXPECT_EQ(RUSSIAN, TestCompactLangDetPlain(kTeststr_ru_Cyrl));
592  //// EXPECT_EQ(KINYARWANDA, TestCompactLangDetPlain(kTeststr_rw_Latn));
593
594  //// EXPECT_EQ(SANSKRIT, TestCompactLangDetPlain(kTeststr_sa_Deva));
595  //// EXPECT_EQ(SANSKRIT, TestCompactLangDetPlain(kTeststr_sa_Latn));
596  //// EXPECT_EQ(SCOTS, TestCompactLangDetPlain(kTeststr_sco_Latn));
597  //// EXPECT_EQ(SINDHI, TestCompactLangDetPlain(kTeststr_sd_Arab));
598 ////  EXPECT_EQ(SANGO, TestCompactLangDetPlain(kTeststr_sg_Latn));
599    // No SERBO_CROATIAN (sh)
600  EXPECT_EQ(SINHALESE, TestCompactLangDetPlain(kTeststr_si_Sinh));
601  //// EXPECT_EQ(LIMBU, TestCompactLangDetPlain(kTeststr_sit_NP));
602  EXPECT_EQ(SLOVAK, TestCompactLangDetPlain(kTeststr_sk_Latn));
603  EXPECT_EQ(SLOVENIAN, TestCompactLangDetPlain(kTeststr_sl_Latn));
604  //// EXPECT_EQ(SAMOAN, TestCompactLangDetPlain(kTeststr_sm_Latn));
605  //// EXPECT_EQ(SHONA, TestCompactLangDetPlain(kTeststr_sn_Latn));
606  //// EXPECT_EQ(SOMALI, TestCompactLangDetPlain(kTeststr_so_Latn));
607  //// EXPECT_EQ(ALBANIAN, TestCompactLangDetPlain(kTeststr_sq_Latn));
608  EXPECT_EQ(SERBIAN, TestCompactLangDetPlain(kTeststr_sr_Cyrl));    // NOTE: now SERBIAN
609  EXPECT_EQ(CROATIAN, TestCompactLangDetPlain(kTeststr_sr_Latn));   // NOTE: Not SERBIAN
610  EXPECT_EQ(CROATIAN, TestCompactLangDetPlain(kTeststr_sr_ME_Latn));   // NOTE: not SERBIAN nor MONTENEGRIN
611  //// EXPECT_EQ(SISWANT, TestCompactLangDetPlain(kTeststr_ss_Latn));
612  //// EXPECT_EQ(SESOTHO, TestCompactLangDetPlain(kTeststr_st_Latn));
613  //// EXPECT_EQ(SUNDANESE, TestCompactLangDetPlain(kTeststr_su_Latn));
614  EXPECT_EQ(SWEDISH, TestCompactLangDetPlain(kTeststr_sv_Latn));
615  EXPECT_EQ(SWAHILI, TestCompactLangDetPlain(kTeststr_sw_Latn));
616  EXPECT_EQ(SYRIAC, TestCompactLangDetPlain(kTeststr_syr_Syrc));
617
618  EXPECT_EQ(TAMIL, TestCompactLangDetPlain(kTeststr_ta_Taml));
619  EXPECT_EQ(TELUGU, TestCompactLangDetPlain(kTeststr_te_Telu));
620  // Tajik Arab removed 2008.05.27. Just Tajik Cyrl left
621  //  EXPECT_EQ(TAJIK, TestCompactLangDetPlain(kTeststr_tg_Arab));
622  //// EXPECT_EQ(TAJIK, TestCompactLangDetPlain(kTeststr_tg_Cyrl));
623  EXPECT_EQ(THAI, TestCompactLangDetPlain(kTeststr_th_Thai));
624  //// EXPECT_EQ(TIGRINYA, TestCompactLangDetPlain(kTeststr_ti_Ethi));
625  //// EXPECT_EQ(TURKMEN, TestCompactLangDetPlain(kTeststr_tk_Cyrl));
626  //// EXPECT_EQ(TURKMEN, TestCompactLangDetPlain(kTeststr_tk_Latn));
627  EXPECT_EQ(TAGALOG, TestCompactLangDetPlain(kTeststr_tl_Latn));
628  //// EXPECT_EQ(TSWANA, TestCompactLangDetPlain(kTeststr_tn_Latn));
629  //// EXPECT_EQ(TONGA, TestCompactLangDetPlain(kTeststr_to_Latn));
630  EXPECT_EQ(TURKISH, TestCompactLangDetPlain(kTeststr_tr_Latn));
631  //// EXPECT_EQ(TSONGA, TestCompactLangDetPlain(kTeststr_ts_Latn));
632  //// EXPECT_EQ(TATAR, TestCompactLangDetPlain(kTeststr_tt_Cyrl));
633  //// EXPECT_EQ(TATAR, TestCompactLangDetPlain(kTeststr_tt_Latn));
634  //// EXPECT_EQ(TWI, TestCompactLangDetPlain(kTeststr_tw_Latn));
635
636  //// EXPECT_EQ(UIGHUR, TestCompactLangDetPlain(kTeststr_ug_Arab));
637  //// EXPECT_EQ(UIGHUR, TestCompactLangDetPlain(kTeststr_ug_Cyrl));
638  //// EXPECT_EQ(UIGHUR, TestCompactLangDetPlain(kTeststr_ug_Latn));
639  EXPECT_EQ(UKRAINIAN, TestCompactLangDetPlain(kTeststr_uk_Cyrl));
640  //// EXPECT_EQ(URDU, TestCompactLangDetPlain(kTeststr_ur_Arab));
641  //// EXPECT_EQ(UZBEK, TestCompactLangDetPlain(kTeststr_uz_Arab));
642  //// EXPECT_EQ(UZBEK, TestCompactLangDetPlain(kTeststr_uz_Cyrl));
643  //// EXPECT_EQ(UZBEK, TestCompactLangDetPlain(kTeststr_uz_Latn));
644
645  EXPECT_EQ(VIETNAMESE, TestCompactLangDetPlain(kTeststr_vi_Latn));
646  //// EXPECT_EQ(VOLAPUK, TestCompactLangDetPlain(kTeststr_vo_Latn));
647
648  //// EXPECT_EQ(WOLOF, TestCompactLangDetPlain(kTeststr_wo_Latn));
649
650  //// EXPECT_EQ(XHOSA, TestCompactLangDetPlain(kTeststr_xh_Latn));
651
652  EXPECT_EQ(YIDDISH, TestCompactLangDetPlain(kTeststr_yi_Hebr));
653  //// EXPECT_EQ(YORUBA, TestCompactLangDetPlain(kTeststr_yo_Latn));
654
655  // Zhuang Hani removed 2008.05.13. Just Zhuang Latn left
656  //  EXPECT_EQ(ZHUANG, TestCompactLangDetPlain(kTeststr_za_Hani));
657  //// EXPECT_EQ(ZHUANG, TestCompactLangDetPlain(kTeststr_za_Latn));
658  EXPECT_EQ(CHINESE, TestCompactLangDetPlain(kTeststr_zh_Hani));
659  EXPECT_EQ(CHINESE_T, TestCompactLangDetPlain(kTeststr_zh_TW));
660  //// EXPECT_EQ(ZULU, TestCompactLangDetPlain(kTeststr_zu_Latn));
661  // No TG_UNKNOWN_LANGUAGE
662  // No UNKNOWN_LANGUAGE
663
664  // This test should be executed with ASAN.
665  EXPECT_EQ(KOREAN, TestCompactLangDetPlain(kTeststr_kr_repetitions));
666}
667
668
669TEST_F(CompactLangDetTest, ExtendedTests) {
670  // Do the extended languages, with them not-allowed then allowed
671  // These turn out to be extraordinarily sensitive forms of garbage bytes
672  //// EXPECT_EQ(ENGLISH, TestCompactLangDetPlain(kTeststr_tlh_Latn));
673  //// EXPECT_EQ(X_KLINGON, TestExtCompactLangDetPlain(kTeststr_tlh_Latn));
674
675  //// EXPECT_EQ(ENGLISH, TestCompactLangDetPlain(kTeststr_zzp_Latn));
676  //// EXPECT_EQ(X_PIG_LATIN, TestExtCompactLangDetPlain(kTeststr_zzp_Latn));
677
678  //// EXPECT_EQ(ENGLISH, TestCompactLangDetPlain(kTeststr_xx_Bugi));
679  //// EXPECT_EQ(X_BUGINESE, TestExtCompactLangDetPlain(kTeststr_xx_Bugi));
680
681  //// EXPECT_EQ(ENGLISH, TestCompactLangDetPlain(kTeststr_xx_Goth));
682  //// EXPECT_EQ(X_GOTHIC, TestExtCompactLangDetPlain(kTeststr_xx_Goth));
683
684  // Next three now removed permanently from probability tables (May 2008)
685  //  (used to be X_BORK_BORK_BORK, X_ELMER_FUDD, X_HACKER).
686  //
687  // Small changes in probability tables may cause these non-texts to
688  // change detection result. If that happens, cross-check that
689  // the new result is not because of a bug, then change the expected values.
690  EXPECT_EQ(ENGLISH, TestCompactLangDetPlain(kTeststr_zzb_Latn));
691  EXPECT_EQ(ENGLISH, TestExtCompactLangDetPlain(kTeststr_zzb_Latn));
692
693  EXPECT_EQ(ENGLISH, TestCompactLangDetPlain(kTeststr_zze_Latn));
694  EXPECT_EQ(ENGLISH, TestExtCompactLangDetPlain(kTeststr_zze_Latn));
695
696  //// EXPECT_EQ(ENGLISH, TestCompactLangDetPlain(kTeststr_zzh_Latn));
697  //// EXPECT_EQ(ENGLISH, TestExtCompactLangDetPlain(kTeststr_zzh_Latn));
698}
699
700
701}  // End namespace
702
703#if !defined(CLD_WINDOWS)
704int main(int argc, char** argv) {
705  FLAGS_logtostderr = true;
706  InitGoogle("Unit test for CLD small", &argc, &argv, false);
707  return RUN_ALL_TESTS();
708}
709#endif
710