1/*
2*******************************************************************************
3*
4*   Copyright (C) 2003-2013, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*   file name:  sorttest.c
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2003aug04
14*   created by: Markus W. Scherer
15*
16*   Test internal sorting functions.
17*/
18
19#include <stdio.h>
20
21#include "unicode/utypes.h"
22#include "unicode/ucol.h"
23#include "unicode/ustring.h"
24#include "cmemory.h"
25#include "cintltst.h"
26#include "uarrsort.h"
27
28#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
29
30static void
31SortTest() {
32    uint16_t small[]={ 8, 1, 2, 5, 4, 3, 7, 6 };
33    int32_t medium[]={ 10, 8, 1, 2, 5, 5, -1, 6, 4, 3, 9, 7, 5 };
34    uint32_t large[]={ 21, 10, 20, 19, 11, 12, 13, 10, 10, 10, 10,
35                       8, 1, 2, 5, 10, 10, 4, 17, 18, 3, 9, 10, 7, 6, 14, 15, 16 };
36
37    int32_t i;
38    UErrorCode errorCode;
39
40    /* sort small array (stable) */
41    errorCode=U_ZERO_ERROR;
42    uprv_sortArray(small, LENGTHOF(small), sizeof(small[0]), uprv_uint16Comparator, NULL, TRUE, &errorCode);
43    if(U_FAILURE(errorCode)) {
44        log_err("uprv_sortArray(small) failed - %s\n", u_errorName(errorCode));
45        return;
46    }
47    for(i=1; i<LENGTHOF(small); ++i) {
48        if(small[i-1]>small[i]) {
49            log_err("uprv_sortArray(small) mis-sorted [%d]=%u > [%d]=%u\n", i-1, small[i-1], i, small[i]);
50            return;
51        }
52    }
53
54    /* for medium, add bits that will not be compared, to test stability */
55    for(i=0; i<LENGTHOF(medium); ++i) {
56        medium[i]=(medium[i]<<4)|i;
57    }
58
59    /* sort medium array (stable) */
60    uprv_sortArray(medium, LENGTHOF(medium), sizeof(medium[0]), uprv_int32Comparator, NULL, TRUE, &errorCode);
61    if(U_FAILURE(errorCode)) {
62        log_err("uprv_sortArray(medium) failed - %s\n", u_errorName(errorCode));
63        return;
64    }
65    for(i=1; i<LENGTHOF(medium); ++i) {
66        if(medium[i-1]>=medium[i]) {
67            log_err("uprv_sortArray(medium) mis-sorted [%d]=%u > [%d]=%u\n", i-1, medium[i-1], i, medium[i]);
68            return;
69        }
70    }
71
72    /* sort large array (not stable) */
73    errorCode=U_ZERO_ERROR;
74    uprv_sortArray(large, LENGTHOF(large), sizeof(large[0]), uprv_uint32Comparator, NULL, FALSE, &errorCode);
75    if(U_FAILURE(errorCode)) {
76        log_err("uprv_sortArray(large) failed - %s\n", u_errorName(errorCode));
77        return;
78    }
79    for(i=1; i<LENGTHOF(large); ++i) {
80        if(large[i-1]>large[i]) {
81            log_err("uprv_sortArray(large) mis-sorted [%d]=%u > [%d]=%u\n", i-1, large[i-1], i, large[i]);
82            return;
83        }
84    }
85}
86
87#if !UCONFIG_NO_COLLATION
88
89/*
90 * Fill an array with semi-random short strings.
91 * Vary them enough to be interesting, but create duplicates.
92 * With CYCLE=10 characters per STR_LEN=3 string positions there are only 1000 unique strings.
93 * NUM_LINES should be larger than this.
94 */
95#define NUM_LINES 10000
96#define STR_LEN 3
97#define CYCLE 10
98
99/*
100 * Use characters beyond the Latin Extended A block to avoid a collator fastpath.
101 * They should sort unique, so that we can later use a binary comparison for string equality.
102 */
103#define BASE_CHAR 0x200
104
105typedef struct Line {
106    UChar s[STR_LEN];
107    int32_t recordNumber;
108} Line;
109
110static void
111printLines(const Line *lines) {
112#if 0
113    int32_t i, j;
114    for(i=0; i<NUM_LINES; ++i) {
115        const Line *line=lines+i;
116        for(j=0; j<STR_LEN; ++j) {
117            printf("%04x ", line->s[j]);
118        }
119        printf(" #%5d\n", line->recordNumber);
120    }
121#endif
122}
123
124/* Use a collator so that the comparisons are not essentially free, for simple benchmarking. */
125static int32_t U_EXPORT2
126linesComparator(const void *context, const void *left, const void *right) {
127    const UCollator *coll=(const UCollator *)context;
128    const Line *leftLine=(const Line *)left;
129    const Line *rightLine=(const Line *)right;
130    /* compare the strings but not the record number */
131    return ucol_strcoll(coll, leftLine->s, STR_LEN, rightLine->s, STR_LEN);
132}
133
134static void StableSortTest() {
135    UErrorCode errorCode=U_ZERO_ERROR;
136    UCollator *coll;
137    Line *lines, *p;
138    UChar s[STR_LEN];
139    int32_t i, j;
140
141    coll=ucol_open("root", &errorCode);
142    if(U_FAILURE(errorCode)) {
143        log_data_err("ucol_open(root) failed - %s\n", u_errorName(errorCode));
144        return;
145    }
146
147    lines=p=(Line *)uprv_malloc(NUM_LINES*sizeof(Line));
148    uprv_memset(lines, 0, NUM_LINES*sizeof(Line));  /* avoid uninitialized memory */
149
150    for(j=0; j<STR_LEN; ++j) { s[j]=BASE_CHAR; }
151    j=0;
152    for(i=0; i<NUM_LINES; ++i) {
153        UChar c;
154        u_memcpy(p->s, s, STR_LEN);
155        p->recordNumber=i;
156        /* Modify the string for the next line. */
157        c=s[j]+1;
158        if(c==BASE_CHAR+CYCLE) { c=BASE_CHAR; }
159        s[j]=c;
160        if(++j==STR_LEN) { j=0; }
161        ++p;
162    }
163    puts("\n* lines before sorting");
164    printLines(lines);
165
166    uprv_sortArray(lines, NUM_LINES, (int32_t)sizeof(Line),
167                   linesComparator, coll, TRUE, &errorCode);
168    if(U_FAILURE(errorCode)) {
169        log_err("uprv_sortArray() failed - %s\n", u_errorName(errorCode));
170        return;
171    }
172    puts("* lines after sorting");
173    printLines(lines);
174
175    /* Verify that the array is sorted correctly. */
176    p=lines;
177    for(i=1; i<NUM_LINES; ++i) {
178        Line *q=p+1;  /* =lines+i */
179        /* Binary comparison first, for speed. In this case, equal strings must be identical. */
180        int32_t diff=u_strCompare(p->s, STR_LEN, q->s, STR_LEN, FALSE);
181        if(diff==0) {
182            if(p->recordNumber>=q->recordNumber) {
183                log_err("equal strings %d and %d out of order at sorted index %d\n",
184                        (int)p->recordNumber, (int)q->recordNumber, (int)i);
185                break;
186            }
187        } else {
188            /* Compare unequal strings with the collator. */
189            diff=ucol_strcoll(coll, p->s, STR_LEN, q->s, STR_LEN);
190            if(diff>=0) {
191                log_err("unequal strings %d and %d out of order at sorted index %d\n",
192                        (int)p->recordNumber, (int)q->recordNumber, (int)i);
193                break;
194            }
195        }
196        p=q;
197    }
198
199    uprv_free(lines);
200    ucol_close(coll);
201}
202
203#endif  /* !UCONFIG_NO_COLLATION */
204
205void
206addSortTest(TestNode** root);
207
208void
209addSortTest(TestNode** root) {
210    addTest(root, &SortTest, "tsutil/sorttest/SortTest");
211#if !UCONFIG_NO_COLLATION
212    addTest(root, &StableSortTest, "tsutil/sorttest/StableSortTest");
213#endif
214}
215