1 2/* 3gcc -o v8crypto v8crypto.c -march=armv8-a -mfpu=crypto-neon-fp-armv8 4gcc -o v8crypto v8crypto.c -mfpu=crypto-neon-fp-armv8 5*/ 6 7#include <stdio.h> 8#include <assert.h> 9#include <malloc.h> // memalign 10#include <string.h> // memset 11#include "tests/malloc.h" 12#include <math.h> // isnormal 13 14typedef unsigned char UChar; 15typedef unsigned short int UShort; 16typedef unsigned int UInt; 17typedef signed int Int; 18typedef unsigned char UChar; 19typedef unsigned long long int ULong; 20typedef signed long long int Long; 21typedef double Double; 22typedef float Float; 23 24typedef unsigned char Bool; 25#define False ((Bool)0) 26#define True ((Bool)1) 27 28 29#define ITERS 1 30 31typedef 32 enum { TyHF=1234, TySF, TyDF, TyB, TyH, TyS, TyD, TyNONE } 33 LaneTy; 34 35union _V128 { 36 UChar u8[16]; 37 UShort u16[8]; 38 UInt u32[4]; 39 ULong u64[2]; 40 Float f32[4]; 41 Double f64[2]; 42}; 43typedef union _V128 V128; 44 45static inline UChar randUChar ( void ) 46{ 47 static UInt seed = 80021; 48 seed = 1103515245 * seed + 12345; 49 return (seed >> 17) & 0xFF; 50} 51 52//static ULong randULong ( LaneTy ty ) 53//{ 54// Int i; 55// ULong r = 0; 56// for (i = 0; i < 8; i++) { 57// r = (r << 8) | (ULong)(0xFF & randUChar()); 58// } 59// return r; 60//} 61 62/* Generates a random V128. Ensures that that it contains normalised 63 FP numbers when viewed as either F32x4 or F64x2, so that it is 64 reasonable to use in FP test cases. */ 65static void randV128 ( /*OUT*/V128* v, LaneTy ty ) 66{ 67 static UInt nCalls = 0, nIters = 0; 68 Int i; 69 nCalls++; 70 while (1) { 71 nIters++; 72 for (i = 0; i < 16; i++) { 73 v->u8[i] = randUChar(); 74 } 75 if (isnormal(v->f32[0]) && isnormal(v->f32[1]) && isnormal(v->f32[2]) 76 && isnormal(v->f32[3]) && isnormal(v->f64[0]) && isnormal(v->f64[1])) 77 break; 78 } 79 if (0 == (nCalls & 0xFF)) 80 printf("randV128: %u calls, %u iters\n", nCalls, nIters); 81} 82 83static void showV128 ( V128* v ) 84{ 85 Int i; 86 for (i = 15; i >= 0; i--) 87 printf("%02x", (Int)v->u8[i]); 88} 89 90//static void showBlock ( const char* msg, V128* block, Int nBlock ) 91//{ 92// Int i; 93// printf("%s\n", msg); 94// for (i = 0; i < nBlock; i++) { 95// printf(" "); 96// showV128(&block[i]); 97// printf("\n"); 98// } 99//} 100 101 102/* ---------------------------------------------------------------- */ 103/* -- Parameterisable test macros -- */ 104/* ---------------------------------------------------------------- */ 105 106#define DO50(_action) \ 107 do { \ 108 Int _qq; for (_qq = 0; _qq < 50; _qq++) { _action ; } \ 109 } while (0) 110 111 112/* Generate a test that involves two vector regs, 113 with no bias as towards which is input or output. 114 It's OK to use r8 as scratch.*/ 115#define GEN_TWOVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO) \ 116 __attribute__((noinline)) \ 117 static void test_##TESTNAME ( LaneTy ty ) { \ 118 Int i; \ 119 for (i = 0; i < ITERS; i++) { \ 120 V128 block[4+1]; \ 121 memset(block, 0x55, sizeof(block)); \ 122 randV128(&block[0], ty); \ 123 randV128(&block[1], ty); \ 124 randV128(&block[2], ty); \ 125 randV128(&block[3], ty); \ 126 __asm__ __volatile__( \ 127 "mov r9, #0 ; vmsr fpscr, r9 ; " \ 128 "add r9, %0, #0 ; vld1.8 { q"#VECREG1NO" }, [r9] ; " \ 129 "add r9, %0, #16 ; vld1.8 { q"#VECREG2NO" }, [r9] ; " \ 130 INSN " ; " \ 131 "add r9, %0, #32 ; vst1.8 { q"#VECREG1NO" }, [r9] ; " \ 132 "add r9, %0, #48 ; vst1.8 { q"#VECREG2NO" }, [r9] ; " \ 133 "vmrs r9, fpscr ; str r9, [%0, #64] " \ 134 : : "r"(&block[0]) \ 135 : "cc", "memory", "q"#VECREG1NO, "q"#VECREG2NO, "r8", "r9" \ 136 ); \ 137 printf(INSN " "); \ 138 UInt fpscr = 0xFFFFFFFF & block[4].u32[0]; \ 139 showV128(&block[0]); printf(" "); \ 140 showV128(&block[1]); printf(" "); \ 141 showV128(&block[2]); printf(" "); \ 142 showV128(&block[3]); printf(" fpscr=%08x\n", fpscr); \ 143 } \ 144 } 145 146 147/* Generate a test that involves three vector regs, 148 with no bias as towards which is input or output. It's also OK 149 to use r8 scratch. */ 150#define GEN_THREEVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO,VECREG3NO) \ 151 __attribute__((noinline)) \ 152 static void test_##TESTNAME ( LaneTy ty ) { \ 153 Int i; \ 154 for (i = 0; i < ITERS; i++) { \ 155 V128 block[6+1]; \ 156 memset(block, 0x55, sizeof(block)); \ 157 randV128(&block[0], ty); \ 158 randV128(&block[1], ty); \ 159 randV128(&block[2], ty); \ 160 randV128(&block[3], ty); \ 161 randV128(&block[4], ty); \ 162 randV128(&block[5], ty); \ 163 __asm__ __volatile__( \ 164 "mov r9, #0 ; vmsr fpscr, r9 ; " \ 165 "add r9, %0, #0 ; vld1.8 { q"#VECREG1NO" }, [r9] ; " \ 166 "add r9, %0, #16 ; vld1.8 { q"#VECREG2NO" }, [r9] ; " \ 167 "add r9, %0, #32 ; vld1.8 { q"#VECREG3NO" }, [r9] ; " \ 168 INSN " ; " \ 169 "add r9, %0, #48 ; vst1.8 { q"#VECREG1NO" }, [r9] ; " \ 170 "add r9, %0, #64 ; vst1.8 { q"#VECREG2NO" }, [r9] ; " \ 171 "add r9, %0, #80 ; vst1.8 { q"#VECREG3NO" }, [r9] ; " \ 172 "vmrs r9, fpscr ; str r9, [%0, #96] " \ 173 : : "r"(&block[0]) \ 174 : "cc", "memory", "q"#VECREG1NO, "q"#VECREG2NO, "q"#VECREG3NO, \ 175 "r8", "r9" \ 176 ); \ 177 printf(INSN " "); \ 178 UInt fpscr = 0xFFFFFFFF & block[6].u32[0]; \ 179 showV128(&block[0]); printf(" "); \ 180 showV128(&block[1]); printf(" "); \ 181 showV128(&block[2]); printf(" "); \ 182 showV128(&block[3]); printf(" "); \ 183 showV128(&block[4]); printf(" "); \ 184 showV128(&block[5]); printf(" fpscr=%08x\n", fpscr); \ 185 } \ 186 } 187 188// ======================== CRYPTO ======================== 189 190GEN_TWOVEC_TEST(aesd_q_q, "aesd.8 q3, q4", 3, 4) 191GEN_TWOVEC_TEST(aese_q_q, "aese.8 q12, q13", 12, 13) 192GEN_TWOVEC_TEST(aesimc_q_q, "aesimc.8 q15, q0", 15, 0) 193GEN_TWOVEC_TEST(aesmc_q_q, "aesmc.8 q1, q9", 1, 9) 194 195GEN_THREEVEC_TEST(sha1c_q_q_q, "sha1c.32 q11, q10, q2", 11, 10, 2) 196GEN_TWOVEC_TEST(sha1h_q_q, "sha1h.32 q6, q7", 6, 7) 197GEN_THREEVEC_TEST(sha1m_q_q_q, "sha1m.32 q2, q8, q13", 2, 8, 13) 198GEN_THREEVEC_TEST(sha1p_q_q_q, "sha1p.32 q3, q9, q14", 3, 9, 14) 199GEN_THREEVEC_TEST(sha1su0_q_q_q, "sha1su0.32 q4, q10, q15", 4, 10, 15) 200GEN_TWOVEC_TEST(sha1su1_q_q, "sha1su1.32 q11, q2", 11, 2) 201 202GEN_THREEVEC_TEST(sha256h2_q_q_q, "sha256h2.32 q9, q8, q7", 9, 8, 7) 203GEN_THREEVEC_TEST(sha256h_q_q_q, "sha256h.32 q10, q9, q8", 10, 9, 8) 204GEN_TWOVEC_TEST(sha256su0_q_q, "sha256su0.32 q11, q10", 11, 10) 205GEN_THREEVEC_TEST(sha256su1_q_q_q, "sha256su1.32 q12, q11, q10", 12, 11, 10) 206 207// This is a bit complex. This really mentions three registers, so it 208// should really be a THREEVEC variant. But the two source registers 209// are D registers. So we say it is just a TWOVEC insn, producing a Q 210// and taking a single Q (q7); q7 is the d14-d15 register pair, which 211// is why the insn itself is mentions d14 and d15 whereas the 212// numbers that follow mention q7. The result (q7) is 128 bits wide and 213// so is unaffected by these shenanigans. 214GEN_TWOVEC_TEST(pmull_q_d_d, "vmull.p64 q13, d14, d15", 13, 7) 215 216int main ( void ) 217{ 218 // ======================== CRYPTO ======================== 219 220 // aesd.8 q_q (aes single round decryption) 221 // aese.8 q_q (aes single round encryption) 222 // aesimc.8 q_q (aes inverse mix columns) 223 // aesmc.8 q_q (aes mix columns) 224 if (1) DO50( test_aesd_q_q(TyNONE) ); 225 if (1) DO50( test_aese_q_q(TyNONE) ); 226 if (1) DO50( test_aesimc_q_q(TyNONE) ); 227 if (1) DO50( test_aesmc_q_q(TyNONE) ); 228 229 // sha1c.32 q_q_q 230 // sha1h.32 q_q 231 // sha1m.32 q_q_q 232 // sha1p.32 q_q_q 233 // sha1su0.32 q_q_q 234 // sha1su1.32 q_q 235 if (1) DO50( test_sha1c_q_q_q(TyNONE) ); 236 if (1) DO50( test_sha1h_q_q(TyNONE) ); 237 if (1) DO50( test_sha1m_q_q_q(TyNONE) ); 238 if (1) DO50( test_sha1p_q_q_q(TyNONE) ); 239 if (1) DO50( test_sha1su0_q_q_q(TyNONE) ); 240 if (1) DO50( test_sha1su1_q_q(TyNONE) ); 241 242 // sha256h2.32 q_q_q 243 // sha256h.32 q_q_q 244 // sha256su0.32 q_q 245 // sha256su1.32 q_q_q 246 if (1) DO50( test_sha256h2_q_q_q(TyNONE) ); 247 if (1) DO50( test_sha256h_q_q_q(TyNONE) ); 248 if (1) DO50( test_sha256su0_q_q(TyNONE) ); 249 if (1) DO50( test_sha256su1_q_q_q(TyNONE) ); 250 251 // vmull.64 q_d_d 252 if (1) DO50( test_pmull_q_d_d(TyD) ); 253 254 return 0; 255} 256