1#include <stdint.h>
2#include <inttypes.h>
3#include <stdlib.h>
4#include <string.h>
5#include <stdio.h>
6#include "opcodes.h"
7
8#ifndef M3
9#define M3 0
10#endif
11
12/* The abstracted result of an CU14 insn */
13typedef struct {
14   uint64_t addr1;  // target
15   uint64_t len1;
16   uint64_t addr2;  // source
17   uint64_t len2;
18   uint32_t cc;
19} cu14_t;
20
21/* Define various input buffers. */
22
23/* 1-byte UTF-8 character */
24uint8_t pattern1[] = {
25   0x00, 0x01, 0x02, 0x03
26};
27
28/* 2-byte UTF-8 character */
29uint8_t pattern2[] = {
30   0xc2, 0x80,
31   0xc2, 0x81,
32   0xc2, 0x82,
33   0xc2, 0x83,
34};
35
36/* 3-byte UTF-8 character */
37uint8_t pattern3[] = {
38   0xe1, 0x80, 0x80,
39   0xe1, 0x80, 0x81,
40   0xe1, 0x80, 0x82,
41   0xe1, 0x80, 0x83,
42};
43
44/* 4-byte UTF-8 character */
45uint8_t pattern4[] = {
46   0xf4, 0x80, 0x80, 0x80,
47   0xf4, 0x80, 0x80, 0x81,
48   0xf4, 0x80, 0x80, 0x82,
49   0xf4, 0x80, 0x80, 0x83,
50};
51
52
53/* Mixed bytes */
54uint8_t mixed[] = {
55   0x01,                    // 1 byte
56   0xc3, 0x80,              // 2 bytes
57   0x12,                    // 1 byte
58   0xe1, 0x90, 0x93,        // 3 bytes
59   0x23,                    // 1 byte
60   0xf4, 0x80, 0x90, 0x8a,  // 4 bytes
61   0x34,                    // 1 byte
62   0xc4, 0x8c,              // 2 bytes
63   0xe1, 0x91, 0x94,        // 3 bytes
64   0xc5, 0x8a,              // 2 bytes
65   0xf4, 0x80, 0x90, 0x8a,  // 4 bytes
66   0xc5, 0x8a,              // 2 bytes
67   0xe1, 0x91, 0x94,        // 3 bytes
68   0xf4, 0x80, 0x90, 0x8a,  // 4 bytes
69   0xe1, 0x91, 0x94,        // 3 bytes
70};
71
72/* This is the buffer for the converted bytes. */
73uint32_t buff[500];  /* Large so we con'don't have to worry about it */
74
75
76static cu14_t
77do_cu14(uint32_t *dst, uint64_t dst_len, uint8_t *src, uint64_t src_len)
78{
79   int cc = 42;
80   cu14_t regs;
81
82   /* build up the register pairs */
83   register uint8_t  *source     asm("4") = src;
84   register uint64_t  source_len asm("5") = src_len;
85   register uint32_t *dest       asm("2") = dst;
86   register uint64_t  dest_len   asm("3") = dst_len;
87
88   asm volatile(
89                CU14(M3,2,4)
90                "ipm %2\n\t"
91                "srl %2,28\n\t"
92                : "+d"(dest), "+d"(source), "=d"(cc),
93                  "+d"(source_len), "+d"(dest_len)
94                :
95                : "memory", "cc");
96
97   /* Capture register contents at end of cu14 */
98   regs.addr1 = (uint64_t)dest;
99   regs.len1  = dest_len;
100   regs.addr2 = (uint64_t)source;
101   regs.len2  = source_len;
102   regs.cc = cc;
103
104   return regs;
105}
106
107void
108run_test(uint32_t *dst, uint64_t dst_len, uint8_t *src, uint64_t src_len)
109{
110   int i;
111   cu14_t result;
112
113   printf("UTF8:  ");
114   if (src_len == 0)
115      printf(" <none>");
116   else {
117      for(i = 0; i < src_len; ++i)
118         printf(" %02x", src[i]);
119   }
120   printf("\n");
121
122   result = do_cu14(dst, dst_len, src, src_len);
123
124   // Write out the converted bytes, if any
125   printf("UTF32: ");
126   if (dst_len - result.len1 == 0)
127      printf(" <none>");
128   else {
129      uint64_t num_bytes = dst_len - result.len1;
130
131      /* The number of bytes that were written must be divisible by 4 */
132      if (num_bytes % 4 != 0)
133         fprintf(stderr, "*** number of bytes is not a multiple of 4\n");
134
135      for (i = 0; i < num_bytes / 4; i++) {
136         printf(" %08x", dst[i]);
137      }
138   }
139   printf("\n");
140
141   printf("  cc = %d\n", result.cc);
142   if (dst != NULL)
143      printf("  dst address difference: %"PRId64, result.addr1 - (uint64_t)dst);
144   printf("  dst len: %"PRId64"\n", result.len1);
145
146   if (src != NULL)
147      printf("  src address difference: %"PRId64, result.addr2 - (uint64_t)src);
148   printf("  src len: %"PRId64"\n", result.len2);
149}
150
151// Test conversion of a one-byte character
152void convert_1_byte(void)
153{
154   int i;
155
156   printf("===== Conversion of a one-byte character =====\n");
157
158   printf("\n----- Valid characters -----\n");
159   uint8_t valid[] = {
160      0x00, 0x7f,              // corner cases
161      0x01, 0x10, 0x7e, 0x5d   // misc
162   };
163   run_test(buff, sizeof buff, valid, sizeof valid);
164
165   // As conversion stops upon encountering an invalid character, we
166   // need to test each invalid character separately, to make sure it
167   // is recognized as invalid.
168
169   printf("\n----- Invalid characters -----\n");
170   uint8_t always_invalid[] = {
171      0x80, 0xbf,              // corner cases
172      0xf8, 0xff,              // corner cases
173      0x81, 0xbe, 0x95, 0xab   // misc
174   };
175   for (i = 0; i < sizeof always_invalid; ++i) {
176      uint8_t invalid_char[1];
177      invalid_char[0] = always_invalid[i];
178      run_test(buff, sizeof buff, invalid_char, sizeof invalid_char);
179   }
180
181   // In case of m3 == 0 we get cc=0 indicating exhaustion of source
182   printf("\n----- Invalid characters if m3 == 1 -----\n");
183   uint8_t invalid_if_m3[] = {  // contains all such invalid characters
184      0xc0, 0xc1,
185      0xf5, 0xf6, 0xf7
186   };
187   for (i = 0; i < sizeof invalid_if_m3; ++i) {
188      uint8_t invalid_char[1];
189      invalid_char[0] = invalid_if_m3[i];
190      run_test(buff, sizeof buff, invalid_char, sizeof invalid_char);
191   }
192
193   printf("\n----- 1st char valid, 2nd char invalid -----\n");
194   uint8_t valid_invalid[] = {
195      0x10, // valid
196      0xaa  // invalid
197   };
198   run_test(buff, sizeof buff, valid_invalid, sizeof valid_invalid);
199}
200
201// Test conversion of a two-byte character
202void convert_2_bytes(void)
203{
204   int i;
205
206   printf("\n===== Conversion of a two-byte character =====\n");
207
208   printf("\n----- Valid characters -----\n");
209   uint8_t valid[] = {
210      0xc2, 0x80,             // corner case
211      0xc2, 0xbf,             // corner case
212      0xdf, 0x80,             // corner case
213      0xdf, 0xbf,             // corner case
214      0xc3, 0xbe, 0xda, 0xbc  // misc
215   };
216   run_test(buff, sizeof buff, valid, sizeof valid);
217
218   printf("\n----- Valid characters if m3 == 0 -----\n");
219   // First char is 0xc0 or 0xc1
220   uint8_t valid_if_not_m3[] = {
221      0xc0, 0x80,
222      0xc0, 0xbf,
223      0xc1, 0x80,
224      0xc0, 0xbf
225   };
226   run_test(buff, sizeof buff, valid_if_not_m3, sizeof valid_if_not_m3);
227
228   // Test for invalid two-byte characters where the 1st byte is valid
229   // The 2nd byte is invalid if not in range 0x80..0xbf, inclusive
230
231   // As conversion stops upon encountering an invalid character, we
232   // need to test each invalid character separately, to make sure it
233   // is recognized as invalid.
234
235   printf("\n----- Invalid characters if m3 == 1 -----\n");
236   uint8_t always_invalid[] = {
237      0xc2, 0x00,
238      0xc2, 0x7f,
239      0xc2, 0xc0,
240      0xc2, 0xff
241   };
242   for (i = 0; i < sizeof always_invalid; i += 2) {
243      uint8_t invalid_char[2];
244      invalid_char[0] = always_invalid[i];
245      invalid_char[1] = always_invalid[i+1];
246      run_test(buff, sizeof buff, invalid_char, sizeof invalid_char);
247   }
248
249   /* Nb: for a two-byte character we need not test the case where
250      invalidity of the character (cc=2) takes precedence over exhaustion
251      of the 1st operand (cc=1). Invalidity of the character has already
252      been tested when testing the 1st byte. */
253
254   printf("\n----- 1st char valid, 2nd char invalid -----\n");
255   uint8_t valid_invalid[] = {
256      0xc3, 0x81, // valid
257      0xc4, 0x00  // invalid
258   };
259   run_test(buff, sizeof buff, valid_invalid, sizeof valid_invalid);
260}
261
262// Test conversion of a three-byte character
263void
264convert_3_bytes(void)
265{
266   int i;
267
268   printf("\n===== Conversion of a three-byte character =====\n");
269
270   /* Exhaustively test the 1st byte E0 - EF, and the interval boundaries for
271      the 2nd and 3rd bytes */
272   printf("\n----- Valid characters -----\n");
273   uint8_t e0[] = {
274      0xe0, 0xa0, 0x80,
275      0xe0, 0xbf, 0x80,
276      0xe0, 0xa0, 0xbf,
277      0xe0, 0xbf, 0xbf,
278      0xe0, 0xaa, 0xbb,   // random  e0 .. ..
279   };
280   run_test(buff, sizeof buff, e0, sizeof e0);
281
282   uint8_t ed[] = {
283      0xed, 0x80, 0x80,
284      0xed, 0x9f, 0x80,
285      0xed, 0x80, 0xbf,
286      0xed, 0x9f, 0xbf,
287      0xed, 0x8a, 0xbb,   // random  ed .. ..
288   };
289   run_test(buff, sizeof buff, ed, sizeof ed);
290
291   for (i = 0; i <= 0xf; ++i) {
292      uint8_t exxx_1[3] = { 0x0, 0x80, 0x80 };
293      uint8_t exxx_2[3] = { 0x0, 0xbf, 0x80 };
294      uint8_t exxx_3[3] = { 0x0, 0x80, 0xbf };
295      uint8_t exxx_4[3] = { 0x0, 0xbf, 0xbf };
296
297      if (i == 0x00) continue;   // special case e0
298      if (i == 0x0d) continue;   // special case ed
299
300      exxx_1[0] = 0xe0 | i;
301      exxx_2[0] = 0xe0 | i;
302      exxx_3[0] = 0xe0 | i;
303      exxx_4[0] = 0xe0 | i;
304      run_test(buff, sizeof buff, exxx_1, sizeof exxx_1);
305      run_test(buff, sizeof buff, exxx_2, sizeof exxx_2);
306      run_test(buff, sizeof buff, exxx_3, sizeof exxx_3);
307      run_test(buff, sizeof buff, exxx_4, sizeof exxx_4);
308   };
309
310   printf("\n----- Invalid characters (2nd byte is invalid) -----\n");
311   // Test for invalid three-byte characters where the 1st byte is valid
312   // The 2nd byte is invalid.
313
314   // As conversion stops upon encountering an invalid character, we
315   // need to test each invalid character separately, to make sure it
316   // is recognized as invalid.
317
318   e0[0] = 0xe0;  // valid
319   e0[1] = 0x9f;  // invalid  because outside [0xa0 .. 0xbf]
320   e0[2] = 0x80;  // valid
321   run_test(buff, sizeof buff, e0, sizeof e0);
322   e0[1] = 0xc0;  // invalid  because outside [0xa0 .. 0xbf]
323   run_test(buff, sizeof buff, e0, sizeof e0);
324
325   ed[0] = 0xed;  // valid
326   ed[1] = 0x7f;  // invalid  because outside [0x80 .. 0x9f]
327   ed[2] = 0x80;  // valid
328   run_test(buff, sizeof buff, ed, sizeof ed);
329   ed[1] = 0xa0;  // invalid  because outside [0x80 .. 0x9f]
330   run_test(buff, sizeof buff, ed, sizeof ed);
331
332   for (i = 0; i <= 0xf; ++i) {
333      uint8_t exxx_1[3] = { 0x0, 0x7f, 0x80 };
334      uint8_t exxx_2[3] = { 0x0, 0xc0, 0x80 };
335
336      if (i == 0x00) continue;   // special case e0
337      if (i == 0x0d) continue;   // special case ed
338
339      exxx_1[0] = 0xe0 | i;
340      exxx_2[0] = 0xe0 | i;
341      run_test(buff, sizeof buff, exxx_1, sizeof exxx_1);
342      run_test(buff, sizeof buff, exxx_2, sizeof exxx_2);
343   };
344
345   printf("\n----- Invalid characters (3rd byte is invalid) -----\n");
346   // For all 1st bytes 0xe0 .. 0xef the 3rd bytes must be in [0x80 .. 0xbf]
347   // No need to special case 0xe0 and 0xed
348   for (i = 0; i <= 0xf; ++i) {
349      uint8_t exxx_1[3] = { 0x0, 0xab, 0x7f };
350      uint8_t exxx_2[3] = { 0x0, 0xab, 0xc0 };
351
352      exxx_1[0] = 0xe0 | i;
353      exxx_2[0] = 0xe0 | i;
354      run_test(buff, sizeof buff, exxx_1, sizeof exxx_1);
355      run_test(buff, sizeof buff, exxx_2, sizeof exxx_2);
356   };
357
358   printf("\n----- Invalid 2nd char AND output exhausted -----\n");
359   /* The character is invalid in its 2nd byte AND the output buffer is
360      exhausted (2 bytes are needed) */
361   uint8_t pat1[] = {
362      0xe0, 0x00, 0x80
363   };
364   run_test(buff, 1, pat1, 3);
365
366   printf("\n----- Invalid 3rd char AND output exhausted -----\n");
367   /* The character is invalid in its 3rd byte AND the output buffer is
368      exhausted (2 bytes are needed) */
369   uint8_t pat2[] = {
370      0xe4, 0x84, 0x00
371   };
372   run_test(buff, 1, pat2, 3);
373
374   printf("\n----- 1st char valid, 2nd char invalid -----\n");
375   uint8_t valid_invalid[] = {
376      0xe1, 0x90, 0x90, // valid
377      0xe1, 0x00, 0x90  // invalid
378   };
379   run_test(buff, sizeof buff, valid_invalid, sizeof valid_invalid);
380}
381
382// Test conversion of a four-byte character
383void
384convert_4_bytes(void)
385{
386   int i, j;
387
388   printf("\n===== Conversion of a four-byte character =====\n");
389
390   printf("\n----- Valid characters -----\n");
391   for (i = 0; i <= 4; ++i) {
392      uint8_t valid[4];
393
394      valid[0] = 0xf0 | i;
395
396      for (j = 0; j <= 1; ++j) {
397         // Byte 2
398         if (i == 0) {
399            valid[1] = j == 0 ? 0x90 : 0xbf;    // 0xf0
400         } else if (i == 4) {
401            valid[1] = j == 0 ? 0x80 : 0x8f;    // 0xf4
402         } else {
403            valid[1] = j == 0 ? 0x80 : 0xbf;    // 0xf1 .. 0xf3
404         }
405         // Byte 3 and byte 4 have same interval 0x80 .. 0xbf
406         valid[2] = 0x80;
407         valid[3] = 0x80;
408         run_test(buff, sizeof buff, valid, sizeof valid);
409         valid[2] = 0x80;
410         valid[3] = 0xbf;
411         run_test(buff, sizeof buff, valid, sizeof valid);
412         valid[2] = 0xbf;
413         valid[3] = 0x80;
414         run_test(buff, sizeof buff, valid, sizeof valid);
415         valid[2] = 0xbf;
416         valid[3] = 0xbf;
417         run_test(buff, sizeof buff, valid, sizeof valid);
418      }
419   }
420
421   printf("\n----- Valid characters if m3 == 0 -----\n");
422   // First char is 0xf5 .. 0xf7
423   uint8_t valid_if_not_m3[] = {
424      0xf5, 0x00, 0x00, 0x00,
425      0xf6, 0x11, 0x22, 0x33,
426      0xf7, 0x44, 0x55, 0x66,
427   };
428   run_test(buff, sizeof buff, valid_if_not_m3, sizeof valid_if_not_m3);
429
430   // As conversion stops upon encountering an invalid character, we
431   // need to test each invalid character separately, to make sure it
432   // is recognized as invalid.
433
434   printf("\n----- Invalid characters (2nd byte is invalid) -----\n");
435   // Test for invalid four-byte characters where the 2nd byte is invalid.
436   // All other bytes are valid
437   uint8_t f0[4], f4[4];
438
439   f0[0] = 0xf0;  // valid
440   f0[1] = 0x8f;  // invalid  because outside [0x90 .. 0xbf]
441   f0[2] = 0x80;  // valid
442   f0[3] = 0x80;  // valid
443   run_test(buff, sizeof buff, f0, sizeof f0);
444   f0[1] = 0xc0;  // invalid  because outside [0x90 .. 0xbf]
445   run_test(buff, sizeof buff, f0, sizeof f0);
446
447   f4[0] = 0xf4;  // valid
448   f4[1] = 0x7f;  // invalid  because outside [0x80 .. 0x8f]
449   f4[2] = 0x80;  // valid
450   f4[3] = 0x80;  // valid
451   run_test(buff, sizeof buff, f4, sizeof f4);
452   f4[1] = 0x90;  // invalid  because outside [0x80 .. 0x9f]
453   run_test(buff, sizeof buff, f4, sizeof f4);
454
455   for (i = 0; i <= 0x4; ++i) {
456      uint8_t fxxx_1[4] = { 0x0, 0x7f, 0x80, 0x80 };
457      uint8_t fxxx_2[4] = { 0x0, 0xc0, 0x80, 0x80 };
458
459      if (i == 0) continue;   // special case f0
460      if (i == 4) continue;   // special case f4
461
462      fxxx_1[0] = 0xf0 | i;
463      fxxx_2[0] = 0xf0 | i;
464      run_test(buff, sizeof buff, fxxx_1, sizeof fxxx_1);
465      run_test(buff, sizeof buff, fxxx_2, sizeof fxxx_2);
466   };
467
468   printf("\n----- Invalid characters (3rd byte is invalid) -----\n");
469   // Test for invalid four-byte characters where the 3rd byte is invalid.
470   // All other bytes are valid
471   for (i = 0; i <= 0x4; ++i) {
472      uint8_t fxxx[4] = { 0x0, 0x0, 0x0, 0x80 };
473
474      fxxx[0] = 0xf0 | i;
475      fxxx[1] = (i == 0) ? 0x94 : 0x84;
476      fxxx[2] = 0x7f;
477      run_test(buff, sizeof buff, fxxx, sizeof fxxx);
478      fxxx[2] = 0xc0;
479      run_test(buff, sizeof buff, fxxx, sizeof fxxx);
480   };
481
482   printf("\n----- Invalid characters (4th byte is invalid) -----\n");
483   // Test for invalid four-byte characters where the 3rd byte is invalid.
484   // All other bytes are valid
485   for (i = 0; i <= 0x4; ++i) {
486      uint8_t fxxx[4] = { 0x0, 0x0, 0x80, 0x0 };
487
488      fxxx[0] = 0xf0 | i;
489      fxxx[1] = (i == 0) ? 0x94 : 0x84;
490      fxxx[3] = 0x7f;
491      run_test(buff, sizeof buff, fxxx, sizeof fxxx);
492      fxxx[3] = 0xc0;
493      run_test(buff, sizeof buff, fxxx, sizeof fxxx);
494   };
495
496   printf("\n----- Invalid 2nd char AND output exhausted -----\n");
497   /* The character is invalid in its 2nd byte AND the output buffer is
498      exhausted (4 bytes are needed) */
499   uint8_t pat1[] = {
500      0xf0, 0x00, 0x80, 0x80
501   };
502   run_test(buff, 1, pat1, 4);
503
504   printf("\n----- Invalid 3rd char AND output exhausted -----\n");
505   /* The character is invalid in its 3rd byte AND the output buffer is
506      exhausted (4 bytes are needed) */
507   uint8_t pat2[] = {
508      0xf0, 0xaa, 0x00, 0x80
509   };
510   run_test(buff, 3, pat2, 4);
511
512   printf("\n----- Invalid 4th char AND output exhausted -----\n");
513   /* The character is invalid in its 4th byte AND the output buffer is
514      exhausted (4 bytes are needed) */
515   uint8_t pat3[] = {
516      0xf0, 0xaa, 0xaa, 0x00
517   };
518   run_test(buff, 3, pat3, 4);
519
520   printf("\n----- 1st char valid, 2nd char invalid -----\n");
521   uint8_t valid_invalid[] = {
522      0xf0, 0xaa, 0xaa, 0xaa, // valid
523      0xf0, 0x00, 0x00, 0x00  // invalid
524   };
525   run_test(buff, sizeof buff, valid_invalid, sizeof valid_invalid);
526}
527
528
529int main()
530{
531   convert_1_byte();
532   convert_2_bytes();
533   convert_3_bytes();
534   convert_4_bytes();
535
536   /* Length == 0, no memory should be read or written */
537   printf("\n------------- test1 ----------------\n");
538   run_test(NULL, 0, NULL, 0);
539
540   /* Test exhaustion of source length (source bytes are valid) */
541   printf("\n------------- test2.1 ----------------\n");
542
543   /* No character will be written to BUFF, i.e. loop in jitted code
544      is not iterated */
545   run_test(buff, sizeof buff, NULL,     0);
546   run_test(buff, sizeof buff, pattern1, 0);
547   run_test(buff, sizeof buff, pattern2, 0);
548   run_test(buff, sizeof buff, pattern2, 1);
549   run_test(buff, sizeof buff, pattern3, 0);
550   run_test(buff, sizeof buff, pattern3, 1);
551   run_test(buff, sizeof buff, pattern3, 2);
552   run_test(buff, sizeof buff, pattern4, 0);
553   run_test(buff, sizeof buff, pattern4, 1);
554   run_test(buff, sizeof buff, pattern4, 2);
555   run_test(buff, sizeof buff, pattern4, 3);
556
557   printf("\n------------- test2.2 ----------------\n");
558   /* At least one character will be written to BUFF, i.e. loop in jitted
559      code is iterated */
560   run_test(buff, sizeof buff, pattern1, 2);
561   run_test(buff, sizeof buff, pattern2, 5);
562   run_test(buff, sizeof buff, pattern3, 6);
563   run_test(buff, sizeof buff, pattern4, 9);
564
565   /* Test exhaustion of destination length (source bytes are valid) */
566   printf("\n------------- test3.1 ----------------\n");
567
568   /* No character will be written to BUFF, i.e. loop in jitted code
569      is not iterated */
570
571   /* Want to write 2 or 4 bytes at a time */
572   run_test(NULL, 0, pattern1, sizeof pattern1);  // 2-byte result
573   run_test(NULL, 0, pattern2, sizeof pattern2);  // 2-byte result
574   run_test(NULL, 1, pattern2, sizeof pattern2);  // 2-byte result
575   run_test(NULL, 0, pattern3, sizeof pattern3);  // 2-byte result
576   run_test(NULL, 1, pattern3, sizeof pattern3);  // 2-byte result
577   run_test(NULL, 0, pattern4, sizeof pattern4);  // 4-byte result
578   run_test(NULL, 1, pattern4, sizeof pattern4);  // 4-byte result
579   run_test(NULL, 2, pattern4, sizeof pattern4);  // 4-byte result
580   run_test(NULL, 3, pattern4, sizeof pattern4);  // 4-byte result
581
582   printf("\n------------- test3.2 ----------------\n");
583   /* At least one character will be written to BUFF, i.e. loop in jitted
584      code is iterated */
585   run_test(buff, 4, pattern1, sizeof pattern1);
586   run_test(buff, 5, pattern1, sizeof pattern2);
587   run_test(buff, 6, pattern1, sizeof pattern3);
588   run_test(buff, 7, pattern1, sizeof pattern4);
589
590   /* Convert buffer with mixed characters */
591   printf("\n------------- test4 ----------------\n");
592   run_test(buff, sizeof buff, mixed, sizeof mixed);
593
594   return 0;
595}
596