1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4**********************************************************************
5*   Copyright (C) 2001-2009, International Business Machines
6*   Corporation and others.  All Rights Reserved.
7**********************************************************************
8*   Date        Name        Description
9*   05/23/00    aliu        Creation.
10**********************************************************************
11*/
12
13#include <algorithm>
14#include <vector>
15#include "unicode/utypes.h"
16#include "unicode/edits.h"
17#include "unicode/unistr.h"
18#include "unicode/utf16.h"
19#include "cmemory.h"
20#include "testutil.h"
21#include "intltest.h"
22
23static const UChar HEX[] = u"0123456789ABCDEF";
24
25UnicodeString &TestUtility::appendHex(UnicodeString &buf, UChar32 ch) {
26    if (ch >= 0x10000) {
27        if (ch >= 0x100000) {
28            buf.append(HEX[0xF&(ch>>20)]);
29        }
30        buf.append(HEX[0xF&(ch>>16)]);
31    }
32    buf.append(HEX[0xF&(ch>>12)]);
33    buf.append(HEX[0xF&(ch>>8)]);
34    buf.append(HEX[0xF&(ch>>4)]);
35    buf.append(HEX[0xF&ch]);
36    return buf;
37}
38
39UnicodeString TestUtility::hex(UChar32 ch) {
40    UnicodeString buf;
41    appendHex(buf, ch);
42    return buf;
43}
44
45UnicodeString TestUtility::hex(const UnicodeString& s) {
46    return hex(s, u',');
47}
48
49UnicodeString TestUtility::hex(const UnicodeString& s, UChar sep) {
50    UnicodeString result;
51    if (s.isEmpty()) return result;
52    UChar32 c;
53    for (int32_t i = 0; i < s.length(); i += U16_LENGTH(c)) {
54        c = s.char32At(i);
55        if (i > 0) {
56            result.append(sep);
57        }
58        appendHex(result, c);
59    }
60    return result;
61}
62
63UnicodeString TestUtility::hex(const uint8_t* bytes, int32_t len) {
64    UnicodeString buf;
65    for (int32_t i = 0; i < len; ++i) {
66        buf.append(HEX[0x0F & (bytes[i] >> 4)]);
67        buf.append(HEX[0x0F & bytes[i]]);
68    }
69    return buf;
70}
71
72namespace {
73
74UnicodeString printOneEdit(const Edits::Iterator &ei) {
75    if (ei.hasChange()) {
76        return UnicodeString() + ei.oldLength() + u"->" + ei.newLength();
77    } else {
78        return UnicodeString() + ei.oldLength() + u"=" + ei.newLength();
79    }
80}
81
82/**
83 * Maps indexes according to the expected edits.
84 * A destination index can occur multiple times when there are source deletions.
85 * Map according to the last occurrence, normally in a non-empty destination span.
86 * Simplest is to search from the back.
87 */
88int32_t srcIndexFromDest(const EditChange expected[], int32_t expLength,
89                         int32_t srcLength, int32_t destLength, int32_t index) {
90    int32_t srcIndex = srcLength;
91    int32_t destIndex = destLength;
92    int32_t i = expLength;
93    while (index < destIndex && i > 0) {
94        --i;
95        int32_t prevSrcIndex = srcIndex - expected[i].oldLength;
96        int32_t prevDestIndex = destIndex - expected[i].newLength;
97        if (index == prevDestIndex) {
98            return prevSrcIndex;
99        } else if (index > prevDestIndex) {
100            if (expected[i].change) {
101                // In a change span, map to its end.
102                return srcIndex;
103            } else {
104                // In an unchanged span, offset within it.
105                return prevSrcIndex + (index - prevDestIndex);
106            }
107        }
108        srcIndex = prevSrcIndex;
109        destIndex = prevDestIndex;
110    }
111    // index is outside the string.
112    return srcIndex;
113}
114
115int32_t destIndexFromSrc(const EditChange expected[], int32_t expLength,
116                         int32_t srcLength, int32_t destLength, int32_t index) {
117    int32_t srcIndex = srcLength;
118    int32_t destIndex = destLength;
119    int32_t i = expLength;
120    while (index < srcIndex && i > 0) {
121        --i;
122        int32_t prevSrcIndex = srcIndex - expected[i].oldLength;
123        int32_t prevDestIndex = destIndex - expected[i].newLength;
124        if (index == prevSrcIndex) {
125            return prevDestIndex;
126        } else if (index > prevSrcIndex) {
127            if (expected[i].change) {
128                // In a change span, map to its end.
129                return destIndex;
130            } else {
131                // In an unchanged span, offset within it.
132                return prevDestIndex + (index - prevSrcIndex);
133            }
134        }
135        srcIndex = prevSrcIndex;
136        destIndex = prevDestIndex;
137    }
138    // index is outside the string.
139    return destIndex;
140}
141
142}  // namespace
143
144// For debugging, set -v to see matching edits up to a failure.
145UBool TestUtility::checkEqualEdits(IntlTest &test, const UnicodeString &name,
146                                   const Edits &e1, const Edits &e2, UErrorCode &errorCode) {
147    Edits::Iterator ei1 = e1.getFineIterator();
148    Edits::Iterator ei2 = e2.getFineIterator();
149    UBool ok = TRUE;
150    for (int32_t i = 0; ok; ++i) {
151        UBool ei1HasNext = ei1.next(errorCode);
152        UBool ei2HasNext = ei2.next(errorCode);
153        ok &= test.assertEquals(name + u" next()[" + i + u"]" + __LINE__,
154                                ei1HasNext, ei2HasNext);
155        ok &= test.assertSuccess(name + u" errorCode[" + i + u"]" + __LINE__, errorCode);
156        ok &= test.assertEquals(name + u" edit[" + i + u"]" + __LINE__,
157                                printOneEdit(ei1), printOneEdit(ei2));
158        if (!ei1HasNext || !ei2HasNext) {
159            break;
160        }
161        test.logln();
162    }
163    return ok;
164}
165
166void TestUtility::checkEditsIter(
167        IntlTest &test,
168        const UnicodeString &name,
169        Edits::Iterator ei1, Edits::Iterator ei2,  // two equal iterators
170        const EditChange expected[], int32_t expLength, UBool withUnchanged,
171        UErrorCode &errorCode) {
172    test.assertFalse(name + u":" + __LINE__, ei2.findSourceIndex(-1, errorCode));
173    test.assertFalse(name + u":" + __LINE__, ei2.findDestinationIndex(-1, errorCode));
174
175    int32_t expSrcIndex = 0;
176    int32_t expDestIndex = 0;
177    int32_t expReplIndex = 0;
178    for (int32_t expIndex = 0; expIndex < expLength; ++expIndex) {
179        const EditChange &expect = expected[expIndex];
180        UnicodeString msg = UnicodeString(name).append(u' ') + expIndex;
181        if (withUnchanged || expect.change) {
182            test.assertTrue(msg + u":" + __LINE__, ei1.next(errorCode));
183            test.assertEquals(msg + u":" + __LINE__, expect.change, ei1.hasChange());
184            test.assertEquals(msg + u":" + __LINE__, expect.oldLength, ei1.oldLength());
185            test.assertEquals(msg + u":" + __LINE__, expect.newLength, ei1.newLength());
186            test.assertEquals(msg + u":" + __LINE__, expSrcIndex, ei1.sourceIndex());
187            test.assertEquals(msg + u":" + __LINE__, expDestIndex, ei1.destinationIndex());
188            test.assertEquals(msg + u":" + __LINE__, expReplIndex, ei1.replacementIndex());
189        }
190
191        if (expect.oldLength > 0) {
192            test.assertTrue(msg + u":" + __LINE__, ei2.findSourceIndex(expSrcIndex, errorCode));
193            test.assertEquals(msg + u":" + __LINE__, expect.change, ei2.hasChange());
194            test.assertEquals(msg + u":" + __LINE__, expect.oldLength, ei2.oldLength());
195            test.assertEquals(msg + u":" + __LINE__, expect.newLength, ei2.newLength());
196            test.assertEquals(msg + u":" + __LINE__, expSrcIndex, ei2.sourceIndex());
197            test.assertEquals(msg + u":" + __LINE__, expDestIndex, ei2.destinationIndex());
198            test.assertEquals(msg + u":" + __LINE__, expReplIndex, ei2.replacementIndex());
199            if (!withUnchanged) {
200                // For some iterators, move past the current range
201                // so that findSourceIndex() has to look before the current index.
202                ei2.next(errorCode);
203                ei2.next(errorCode);
204            }
205        }
206
207        if (expect.newLength > 0) {
208            test.assertTrue(msg + u":" + __LINE__, ei2.findDestinationIndex(expDestIndex, errorCode));
209            test.assertEquals(msg + u":" + __LINE__, expect.change, ei2.hasChange());
210            test.assertEquals(msg + u":" + __LINE__, expect.oldLength, ei2.oldLength());
211            test.assertEquals(msg + u":" + __LINE__, expect.newLength, ei2.newLength());
212            test.assertEquals(msg + u":" + __LINE__, expSrcIndex, ei2.sourceIndex());
213            test.assertEquals(msg + u":" + __LINE__, expDestIndex, ei2.destinationIndex());
214            test.assertEquals(msg + u":" + __LINE__, expReplIndex, ei2.replacementIndex());
215            if (!withUnchanged) {
216                // For some iterators, move past the current range
217                // so that findSourceIndex() has to look before the current index.
218                ei2.next(errorCode);
219                ei2.next(errorCode);
220            }
221        }
222
223        expSrcIndex += expect.oldLength;
224        expDestIndex += expect.newLength;
225        if (expect.change) {
226            expReplIndex += expect.newLength;
227        }
228    }
229    UnicodeString msg = UnicodeString(name).append(u" end");
230    test.assertFalse(msg + u":" + __LINE__, ei1.next(errorCode));
231    test.assertFalse(msg + u":" + __LINE__, ei1.hasChange());
232    test.assertEquals(msg + u":" + __LINE__, 0, ei1.oldLength());
233    test.assertEquals(msg + u":" + __LINE__, 0, ei1.newLength());
234    test.assertEquals(msg + u":" + __LINE__, expSrcIndex, ei1.sourceIndex());
235    test.assertEquals(msg + u":" + __LINE__, expDestIndex, ei1.destinationIndex());
236    test.assertEquals(msg + u":" + __LINE__, expReplIndex, ei1.replacementIndex());
237
238    test.assertFalse(name + u":" + __LINE__, ei2.findSourceIndex(expSrcIndex, errorCode));
239    test.assertFalse(name + u":" + __LINE__, ei2.findDestinationIndex(expDestIndex, errorCode));
240
241    // Check mapping of all indexes against a simple implementation
242    // that works on the expected changes.
243    // Iterate once forward, once backward, to cover more runtime conditions.
244    int32_t srcLength = expSrcIndex;
245    int32_t destLength = expDestIndex;
246    std::vector<int32_t> srcIndexes;
247    std::vector<int32_t> destIndexes;
248    srcIndexes.push_back(-1);
249    destIndexes.push_back(-1);
250    int32_t srcIndex = 0;
251    int32_t destIndex = 0;
252    for (int32_t i = 0; i < expLength; ++i) {
253        if (expected[i].oldLength > 0) {
254            srcIndexes.push_back(srcIndex);
255            if (expected[i].oldLength > 1) {
256                srcIndexes.push_back(srcIndex + 1);
257                if (expected[i].oldLength > 2) {
258                    srcIndexes.push_back(srcIndex + expected[i].oldLength - 1);
259                }
260            }
261        }
262        if (expected[i].newLength > 0) {
263            destIndexes.push_back(destIndex);
264            if (expected[i].newLength > 1) {
265                destIndexes.push_back(destIndex + 1);
266                if (expected[i].newLength > 2) {
267                    destIndexes.push_back(destIndex + expected[i].newLength - 1);
268                }
269            }
270        }
271        srcIndex += expected[i].oldLength;
272        destIndex += expected[i].newLength;
273    }
274    srcIndexes.push_back(srcLength);
275    destIndexes.push_back(destLength);
276    srcIndexes.push_back(srcLength + 1);
277    destIndexes.push_back(destLength + 1);
278    std::reverse(destIndexes.begin(), destIndexes.end());
279    // Zig-zag across the indexes to stress next() <-> previous().
280    static const int32_t ZIG_ZAG[] = { 0, 1, 2, 3, 2, 1 };
281    for (auto i = 0; i < (int32_t)srcIndexes.size(); ++i) {
282        for (int32_t ij = 0; ij < UPRV_LENGTHOF(ZIG_ZAG); ++ij) {
283            int32_t j = ZIG_ZAG[ij];
284            if ((i + j) < (int32_t)srcIndexes.size()) {
285                int32_t si = srcIndexes[i + j];
286                test.assertEquals(name + u" destIndexFromSrc(" + si + u"):" + __LINE__,
287                                  destIndexFromSrc(expected, expLength, srcLength, destLength, si),
288                                  ei2.destinationIndexFromSourceIndex(si, errorCode));
289            }
290        }
291    }
292    for (auto i = 0; i < (int32_t)destIndexes.size(); ++i) {
293        for (int32_t ij = 0; ij < UPRV_LENGTHOF(ZIG_ZAG); ++ij) {
294            int32_t j = ZIG_ZAG[ij];
295            if ((i + j) < (int32_t)destIndexes.size()) {
296                int32_t di = destIndexes[i + j];
297                test.assertEquals(name + u" srcIndexFromDest(" + di + u"):" + __LINE__,
298                                  srcIndexFromDest(expected, expLength, srcLength, destLength, di),
299                                  ei2.sourceIndexFromDestinationIndex(di, errorCode));
300            }
301        }
302    }
303}
304