18ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan#!/usr/bin/env perl 28ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 38ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# Copyright (c) 2017, Shay Gueron. 48ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# Copyright (c) 2017, Google Inc. 58ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# 68ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# Permission to use, copy, modify, and/or distribute this software for any 78ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# purpose with or without fee is hereby granted, provided that the above 88ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# copyright notice and this permission notice appear in all copies. 98ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# 108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION 158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ 178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanuse warnings FATAL => 'all'; 198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan$flavour = shift; 218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan$output = shift; 228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanif ($flavour =~ /\./) { $output = $flavour; undef $flavour; } 238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); 258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or 288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or 298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloandie "can't locate x86_64-xlate.pl"; 308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanopen OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; 328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan*STDOUT=*OUT; 338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan$code.=<<___; 358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.data 368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.align 16 388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanone: 398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.quad 1,0 408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloantwo: 418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.quad 2,0 428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanthree: 438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.quad 3,0 448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanfour: 458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.quad 4,0 468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanfive: 478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.quad 5,0 488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloansix: 498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.quad 6,0 508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanseven: 518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.quad 7,0 528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloaneight: 538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.quad 8,0 548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert SloanOR_MASK: 568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.long 0x00000000,0x00000000,0x00000000,0x80000000 578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanpoly: 588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.quad 0x1, 0xc200000000000000 598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanmask: 608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d 618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloancon1: 628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.long 1,1,1,1 638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloancon2: 648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.long 0x1b,0x1b,0x1b,0x1b 658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloancon3: 668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.byte -1,-1,-1,-1,-1,-1,-1,-1,4,5,6,7,4,5,6,7 678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanand_mask: 688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.long 0,0xffffffff, 0xffffffff, 0xffffffff 698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan$code.=<<___; 728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.text 738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloansub gfmul { 768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ######################### 778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # a = T 788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # b = TMP0 - remains unchanged 798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # res = T 808ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # uses also TMP1,TMP2,TMP3,TMP4 818ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # __m128i GFMUL(__m128i A, __m128i B); 828ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 838ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $T = "%xmm0"; 848ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $TMP0 = "%xmm1"; 858ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $TMP1 = "%xmm2"; 868ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $TMP2 = "%xmm3"; 878ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $TMP3 = "%xmm4"; 888ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $TMP4 = "%xmm5"; 898ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 908ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan $code.=<<___; 918ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.type GFMUL,\@abi-omnipotent 928ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.align 16 938ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert SloanGFMUL: 948ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_startproc 958ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x00, $TMP0, $T, $TMP1 968ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x11, $TMP0, $T, $TMP4 978ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x10, $TMP0, $T, $TMP2 988ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x01, $TMP0, $T, $TMP3 998ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $TMP2, $TMP2 1008ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpslldq \$8, $TMP2, $TMP3 1018ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpsrldq \$8, $TMP2, $TMP2 1028ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $TMP1, $TMP1 1038ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP2, $TMP4, $TMP4 1048ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 1058ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x10, poly(%rip), $TMP1, $TMP2 1068ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpshufd \$78, $TMP1, $TMP3 1078ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $TMP2, $TMP1 1088ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 1098ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x10, poly(%rip), $TMP1, $TMP2 1108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpshufd \$78, $TMP1, $TMP3 1118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $TMP2, $TMP1 1128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 1138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP4, $TMP1, $T 1148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ret 1158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_endproc 1168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.size GFMUL, .-GFMUL 1178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 1188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan} 1198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloangfmul(); 1208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 1218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloansub aesgcmsiv_htable_init { 1228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # aesgcmsiv_htable_init writes an eight-entry table of powers of |H| to 1238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # |out_htable|. 1248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # void aesgcmsiv_htable_init(uint8_t out_htable[16*8], uint8_t *H); 1258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 1268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $Htbl = "%rdi"; 1278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $H = "%rsi"; 1288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $T = "%xmm0"; 1298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $TMP0 = "%xmm1"; 1308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 1318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan$code.=<<___; 1328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.globl aesgcmsiv_htable_init 1338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.type aesgcmsiv_htable_init,\@function,2 1348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.align 16 1358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanaesgcmsiv_htable_init: 1368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_startproc 1378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa ($H), $T 1388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $T, $TMP0 1398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $T, ($Htbl) # H 1408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan call GFMUL 1418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $T, 16($Htbl) # H^2 1428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan call GFMUL 1438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $T, 32($Htbl) # H^3 1448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan call GFMUL 1458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $T, 48($Htbl) # H^4 1468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan call GFMUL 1478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $T, 64($Htbl) # H^5 1488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan call GFMUL 1498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $T, 80($Htbl) # H^6 1508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan call GFMUL 1518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $T, 96($Htbl) # H^7 1528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan call GFMUL 1538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $T, 112($Htbl) # H^8 1548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ret 1558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_endproc 1568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.size aesgcmsiv_htable_init, .-aesgcmsiv_htable_init 1578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 1588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan} 1598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanaesgcmsiv_htable_init(); 1608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 1618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloansub aesgcmsiv_htable6_init { 1628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # aesgcmsiv_htable6_init writes a six-entry table of powers of |H| to 1638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # |out_htable|. 1648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # void aesgcmsiv_htable6_init(uint8_t out_htable[16*6], uint8_t *H); 1658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # 1668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $Htbl = "%rdi"; 1678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $H = "%rsi"; 1688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $T = "%xmm0"; 1698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $TMP0 = "%xmm1"; 1708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 1718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan $code.=<<___; 1728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.globl aesgcmsiv_htable6_init 1738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.type aesgcmsiv_htable6_init,\@function,2 1748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.align 16 1758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanaesgcmsiv_htable6_init: 1768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_startproc 1778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa ($H), $T 1788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $T, $TMP0 1798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $T, ($Htbl) # H 1808ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan call GFMUL 1818ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $T, 16($Htbl) # H^2 1828ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan call GFMUL 1838ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $T, 32($Htbl) # H^3 1848ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan call GFMUL 1858ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $T, 48($Htbl) # H^4 1868ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan call GFMUL 1878ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $T, 64($Htbl) # H^5 1888ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan call GFMUL 1898ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $T, 80($Htbl) # H^6 1908ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ret 1918ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_endproc 1928ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.size aesgcmsiv_htable6_init, .-aesgcmsiv_htable6_init 1938ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 1948ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan} 1958ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanaesgcmsiv_htable6_init(); 1968ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 1978ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloansub aesgcmsiv_htable_polyval { 1988ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # void aesgcmsiv_htable_polyval(uint8_t Htbl[16*8], uint8_t *MSG, uint64_t LEN, uint8_t *T); 1998ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # parameter 1: %rdi Htable - pointer to Htable 2008ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # parameter 2: %rsi INp - pointer to input 2018ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # parameter 3: %rdx LEN - length of BUFFER in bytes 2028ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # parameter 4: %rcx T - pointer to POLYVAL output 2038ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 2048ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $DATA = "%xmm0"; 2058ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $hlp0 = "%r11"; 2068ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $Htbl = "%rdi"; 2078ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $inp = "%rsi"; 2088ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $len = "%rdx"; 2098ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $TMP0 = "%xmm3"; 2108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $TMP1 = "%xmm4"; 2118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $TMP2 = "%xmm5"; 2128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $TMP3 = "%xmm6"; 2138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $TMP4 = "%xmm7"; 2148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $Tp = "%rcx"; 2158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $T = "%xmm1"; 2168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $Xhi = "%xmm9"; 2178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 2188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $SCHOOLBOOK_AAD = sub { 2198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my ($i)=@_; 2208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan return <<___; 2218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x01, ${\eval(16*$i)}($Htbl), $DATA, $TMP3 2228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $TMP2, $TMP2 2238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x00, ${\eval(16*$i)}($Htbl), $DATA, $TMP3 2248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $TMP0, $TMP0 2258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x11, ${\eval(16*$i)}($Htbl), $DATA, $TMP3 2268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $TMP1, $TMP1 2278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x10, ${\eval(16*$i)}($Htbl), $DATA, $TMP3 2288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $TMP2, $TMP2 2298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 2308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan }; 2318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 2328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan $code.=<<___; 2338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.globl aesgcmsiv_htable_polyval 2348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.type aesgcmsiv_htable_polyval,\@function,4 2358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.align 16 2368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanaesgcmsiv_htable_polyval: 2378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_startproc 2388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan test $len, $len 2398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan jnz .Lhtable_polyval_start 2408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ret 2418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 2428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.Lhtable_polyval_start: 2438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vzeroall 2448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 2458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # We hash 8 blocks each iteration. If the total number of blocks is not a 2468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # multiple of 8, we first hash the leading n%8 blocks. 2478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan movq $len, $hlp0 2488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan andq \$127, $hlp0 2498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 2508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan jz .Lhtable_polyval_no_prefix 2518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 2528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $Xhi, $Xhi, $Xhi 2538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa ($Tp), $T 2548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan sub $hlp0, $len 2558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 2568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan sub \$16, $hlp0 2578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 2588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # hash first prefix block 2598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu ($inp), $DATA 2608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $T, $DATA, $DATA 2618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 2628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x01, ($Htbl,$hlp0), $DATA, $TMP2 2638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x00, ($Htbl,$hlp0), $DATA, $TMP0 2648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x11, ($Htbl,$hlp0), $DATA, $TMP1 2658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x10, ($Htbl,$hlp0), $DATA, $TMP3 2668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $TMP2, $TMP2 2678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 2688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan lea 16($inp), $inp 2698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan test $hlp0, $hlp0 2708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan jnz .Lhtable_polyval_prefix_loop 2718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan jmp .Lhtable_polyval_prefix_complete 2728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 2738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # hash remaining prefix bocks (up to 7 total prefix blocks) 2748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.align 64 2758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.Lhtable_polyval_prefix_loop: 2768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan sub \$16, $hlp0 2778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 2788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu ($inp), $DATA # next data block 2798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 2808ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x00, ($Htbl,$hlp0), $DATA, $TMP3 2818ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $TMP0, $TMP0 2828ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x11, ($Htbl,$hlp0), $DATA, $TMP3 2838ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $TMP1, $TMP1 2848ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x01, ($Htbl,$hlp0), $DATA, $TMP3 2858ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $TMP2, $TMP2 2868ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x10, ($Htbl,$hlp0), $DATA, $TMP3 2878ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $TMP2, $TMP2 2888ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 2898ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan test $hlp0, $hlp0 2908ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 2918ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan lea 16($inp), $inp 2928ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 2938ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan jnz .Lhtable_polyval_prefix_loop 2948ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 2958ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.Lhtable_polyval_prefix_complete: 2968ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpsrldq \$8, $TMP2, $TMP3 2978ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpslldq \$8, $TMP2, $TMP2 2988ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 2998ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $TMP1, $Xhi 3008ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP2, $TMP0, $T 3018ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 3028ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan jmp .Lhtable_polyval_main_loop 3038ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 3048ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.Lhtable_polyval_no_prefix: 3058ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # At this point we know the number of blocks is a multiple of 8. However, 3068ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # the reduction in the main loop includes a multiplication by x^(-128). In 3078ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # order to counter this, the existing tag needs to be multipled by x^128. 3088ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # In practice, this just means that it is loaded into $Xhi, not $T. 3098ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $T, $T, $T 3108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa ($Tp), $Xhi 3118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 3128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.align 64 3138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.Lhtable_polyval_main_loop: 3148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan sub \$0x80, $len 3158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan jb .Lhtable_polyval_out 3168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 3178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu 16*7($inp), $DATA # Ii 3188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 3198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x01, ($Htbl), $DATA, $TMP2 3208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x00, ($Htbl), $DATA, $TMP0 3218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x11, ($Htbl), $DATA, $TMP1 3228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x10, ($Htbl), $DATA, $TMP3 3238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $TMP2, $TMP2 3248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 3258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ######################################################### 3268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu 16*6($inp), $DATA 3278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$SCHOOLBOOK_AAD->(1)} 3288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 3298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ######################################################### 3308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu 16*5($inp), $DATA 3318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 3328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x10, poly(%rip), $T, $TMP4 # reduction stage 1a 3338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpalignr \$8, $T, $T, $T 3348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 3358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$SCHOOLBOOK_AAD->(2)} 3368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 3378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP4, $T, $T # reduction stage 1b 3388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ######################################################### 3398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu 16*4($inp), $DATA 3408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 3418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$SCHOOLBOOK_AAD->(3)} 3428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ######################################################### 3438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu 16*3($inp), $DATA 3448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 3458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x10, poly(%rip), $T, $TMP4 # reduction stage 2a 3468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpalignr \$8, $T, $T, $T 3478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 3488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$SCHOOLBOOK_AAD->(4)} 3498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 3508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP4, $T, $T # reduction stage 2b 3518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ######################################################### 3528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu 16*2($inp), $DATA 3538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 3548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$SCHOOLBOOK_AAD->(5)} 3558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 3568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $Xhi, $T, $T # reduction finalize 3578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ######################################################### 3588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu 16*1($inp), $DATA 3598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 3608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$SCHOOLBOOK_AAD->(6)} 3618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ######################################################### 3628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu 16*0($inp), $DATA 3638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $T, $DATA, $DATA 3648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 3658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$SCHOOLBOOK_AAD->(7)} 3668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ######################################################### 3678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpsrldq \$8, $TMP2, $TMP3 3688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpslldq \$8, $TMP2, $TMP2 3698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 3708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $TMP1, $Xhi 3718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP2, $TMP0, $T 3728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 3738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan lea 16*8($inp), $inp 3748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan jmp .Lhtable_polyval_main_loop 3758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 3768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ######################################################### 3778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 3788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.Lhtable_polyval_out: 3798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x10, poly(%rip), $T, $TMP3 3808ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpalignr \$8, $T, $T, $T 3818ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $T, $T 3828ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 3838ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x10, poly(%rip), $T, $TMP3 3848ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpalignr \$8, $T, $T, $T 3858ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $T, $T 3868ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $Xhi, $T, $T 3878ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 3888ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $T, ($Tp) 3898ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vzeroupper 3908ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ret 3918ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_endproc 3928ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.size aesgcmsiv_htable_polyval,.-aesgcmsiv_htable_polyval 3938ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 3948ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan} 3958ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanaesgcmsiv_htable_polyval(); 3968ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 3978ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloansub aesgcmsiv_polyval_horner { 3988ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan #void aesgcmsiv_polyval_horner(unsigned char T[16], // output 3998ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # const unsigned char* H, // H 4008ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # unsigned char* BUF, // Buffer 4018ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # unsigned int blocks); // Len2 4028ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # 4038ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # parameter 1: %rdi T - pointers to POLYVAL output 4048ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # parameter 2: %rsi Hp - pointer to H (user key) 4058ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # parameter 3: %rdx INp - pointer to input 4068ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # parameter 4: %rcx L - total number of blocks in input BUFFER 4078ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # 4088ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $T = "%rdi"; 4098ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $Hp = "%rsi"; 4108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $INp = "%rdx"; 4118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $L = "%rcx"; 4128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $LOC = "%r10"; 4138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $LEN = "%eax"; 4148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $H = "%xmm1"; 4158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $RES = "%xmm0"; 4168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 4178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan $code.=<<___; 4188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.globl aesgcmsiv_polyval_horner 4198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.type aesgcmsiv_polyval_horner,\@function,4 4208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.align 16 4218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanaesgcmsiv_polyval_horner: 4228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_startproc 4238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan test $L, $L 4248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan jnz .Lpolyval_horner_start 4258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ret 4268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 4278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.Lpolyval_horner_start: 4288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # We will start with L GFMULS for POLYVAL(BIG_BUFFER) 4298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # RES = GFMUL(RES, H) 4308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 4318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan xorq $LOC, $LOC 4328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan shlq \$4, $L # L contains number of bytes to process 4338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 4348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa ($Hp), $H 4358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa ($T), $RES 4368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 4378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.Lpolyval_horner_loop: 4388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($INp,$LOC), $RES, $RES # RES = RES + Xi 4398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan call GFMUL # RES = RES * H 4408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 4418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan add \$16, $LOC 4428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan cmp $LOC, $L 4438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan jne .Lpolyval_horner_loop 4448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 4458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # calculation of T is complete. RES=T 4468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $RES, ($T) 4478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ret 4488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_endproc 4498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.size aesgcmsiv_polyval_horner,.-aesgcmsiv_polyval_horner 4508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 4518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan} 4528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanaesgcmsiv_polyval_horner(); 4538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 4548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# void aes128gcmsiv_aes_ks(const uint8_t *key, uint8_t *out_expanded_key); 4558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# parameter 1: %rdi 4568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# parameter 2: %rsi 4578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan$code.=<<___; 4588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.globl aes128gcmsiv_aes_ks 4598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.type aes128gcmsiv_aes_ks,\@function,2 4608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.align 16 4618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanaes128gcmsiv_aes_ks: 4628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_startproc 4638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu (%rdi), %xmm1 # xmm1 = user key 4648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa %xmm1, (%rsi) # rsi points to output 4658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 4668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa con1(%rip), %xmm0 4678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa mask(%rip), %xmm15 4688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 4698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan movq \$8, %rax 4708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 4718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.Lks128_loop: 4728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan addq \$16, %rsi # rsi points for next key 4738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan subq \$1, %rax 4748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpshufb %xmm15, %xmm1, %xmm2 # xmm2 = shuffled user key 4758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast %xmm0, %xmm2, %xmm2 4768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpslld \$1, %xmm0, %xmm0 4778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpslldq \$4, %xmm1, %xmm3 4788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm3, %xmm1, %xmm1 4798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpslldq \$4, %xmm3, %xmm3 4808ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm3, %xmm1, %xmm1 4818ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpslldq \$4, %xmm3, %xmm3 4828ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm3, %xmm1, %xmm1 4838ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm2, %xmm1, %xmm1 4848ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa %xmm1, (%rsi) 4858ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan jne .Lks128_loop 4868ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 4878ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa con2(%rip), %xmm0 4888ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpshufb %xmm15, %xmm1, %xmm2 4898ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast %xmm0, %xmm2, %xmm2 4908ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpslld \$1, %xmm0, %xmm0 4918ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpslldq \$4, %xmm1, %xmm3 4928ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm3, %xmm1, %xmm1 4938ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpslldq \$4, %xmm3, %xmm3 4948ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm3, %xmm1, %xmm1 4958ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpslldq \$4, %xmm3, %xmm3 4968ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm3, %xmm1, %xmm1 4978ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm2, %xmm1, %xmm1 4988ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa %xmm1, 16(%rsi) 4998ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 5008ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpshufb %xmm15, %xmm1, %xmm2 5018ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast %xmm0, %xmm2, %xmm2 5028ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpslldq \$4, %xmm1, %xmm3 5038ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm3, %xmm1, %xmm1 5048ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpslldq \$4, %xmm3, %xmm3 5058ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm3, %xmm1, %xmm1 5068ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpslldq \$4, %xmm3, %xmm3 5078ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm3, %xmm1, %xmm1 5088ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm2, %xmm1, %xmm1 5098ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa %xmm1, 32(%rsi) 5108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ret 5118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_endproc 5128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.size aes128gcmsiv_aes_ks,.-aes128gcmsiv_aes_ks 5138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 5148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 5158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# void aes256gcmsiv_aes_ks(const uint8_t *key, uint8_t *out_expanded_key); 5168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# parameter 1: %rdi 5178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# parameter 2: %rsi 5188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan$code.=<<___; 5198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.globl aes256gcmsiv_aes_ks 5208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.type aes256gcmsiv_aes_ks,\@function,2 5218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.align 16 5228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanaes256gcmsiv_aes_ks: 5238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_startproc 5248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu (%rdi), %xmm1 5258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu 16(%rdi), %xmm3 5268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa %xmm1, (%rsi) 5278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa %xmm3, 16(%rsi) 5288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa con1(%rip), %xmm0 5298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa mask(%rip), %xmm15 5308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm14, %xmm14, %xmm14 5318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan mov \$6, %rax 5328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 5338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.Lks256_loop: 5348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan add \$32, %rsi 5358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan subq \$1, %rax 5368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpshufb %xmm15, %xmm3, %xmm2 5378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast %xmm0, %xmm2, %xmm2 5388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpslld \$1, %xmm0, %xmm0 5398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpsllq \$32, %xmm1, %xmm4 5408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm4, %xmm1, %xmm1 5418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpshufb con3(%rip), %xmm1, %xmm4 5428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm4, %xmm1, %xmm1 5438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm2, %xmm1, %xmm1 5448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa %xmm1, (%rsi) 5458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpshufd \$0xff, %xmm1, %xmm2 5468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast %xmm14, %xmm2, %xmm2 5478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpsllq \$32, %xmm3, %xmm4 5488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm4, %xmm3, %xmm3 5498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpshufb con3(%rip), %xmm3, %xmm4 5508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm4, %xmm3, %xmm3 5518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm2, %xmm3, %xmm3 5528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa %xmm3, 16(%rsi) 5538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan jne .Lks256_loop 5548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 5558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpshufb %xmm15, %xmm3, %xmm2 5568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast %xmm0, %xmm2, %xmm2 5578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpsllq \$32, %xmm1, %xmm4 5588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm4, %xmm1, %xmm1 5598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpshufb con3(%rip), %xmm1, %xmm4 5608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm4, %xmm1, %xmm1 5618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm2, %xmm1, %xmm1 5628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa %xmm1, 32(%rsi) 5638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ret 5648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_endproc 5658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 5668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 5678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloansub aes128gcmsiv_aes_ks_enc_x1 { 5688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $KS1_REGA = "%xmm1"; 5698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $KS1_REGB = "%xmm2"; 5708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $BLOCK1 = "%xmm4"; 5718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $AUXREG = "%xmm3"; 5728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 5738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $KS_BLOCK = sub { 5748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my ($reg, $reg2, $auxReg) = @_; 5758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan return <<___; 5768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpsllq \$32, $reg, $auxReg #!!saving mov instruction to xmm3 5778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $auxReg, $reg, $reg 5788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpshufb con3(%rip), $reg, $auxReg 5798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $auxReg, $reg, $reg 5808ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $reg2, $reg, $reg 5818ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 5828ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan }; 5838ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 5848ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $round = sub { 5858ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my ($i, $j) = @_; 5868ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan return <<___; 5878ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpshufb %xmm15, %xmm1, %xmm2 #!!saving mov instruction to xmm2 5888ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast %xmm0, %xmm2, %xmm2 5898ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpslld \$1, %xmm0, %xmm0 5908ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$KS_BLOCK->($KS1_REGA, $KS1_REGB, $AUXREG)} 5918ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc %xmm1, $BLOCK1, $BLOCK1 5928ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa %xmm1, ${\eval(16*$i)}($j) 5938ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 5948ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan }; 5958ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 5968ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $roundlast = sub { 5978ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my ($i, $j) = @_; 5988ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan return <<___; 5998ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpshufb %xmm15, %xmm1, %xmm2 #!!saving mov instruction to xmm2 6008ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast %xmm0, %xmm2, %xmm2 6018ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$KS_BLOCK->($KS1_REGA, $KS1_REGB, $AUXREG)} 6028ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast %xmm1, $BLOCK1, $BLOCK1 6038ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa %xmm1, ${\eval(16*$i)}($j) 6048ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 6058ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan }; 6068ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 6078ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# parameter 1: %rdi Pointer to PT 6088ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# parameter 2: %rsi Pointer to CT 6098ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# parameter 4: %rdx Pointer to keys 6108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# parameter 5: %rcx Pointer to initial key 6118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan $code.=<<___; 6128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.globl aes128gcmsiv_aes_ks_enc_x1 6138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.type aes128gcmsiv_aes_ks_enc_x1,\@function,4 6148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.align 16 6158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanaes128gcmsiv_aes_ks_enc_x1: 6168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_startproc 6178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa (%rcx), %xmm1 # xmm1 = first 16 bytes of random key 6188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa 0*16(%rdi), $BLOCK1 6198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 6208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa %xmm1, (%rdx) # KEY[0] = first 16 bytes of random key 6218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm1, $BLOCK1, $BLOCK1 6228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 6238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa con1(%rip), %xmm0 # xmm0 = 1,1,1,1 6248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa mask(%rip), %xmm15 # xmm15 = mask 6258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 6268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$round->(1, "%rdx")} 6278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$round->(2, "%rdx")} 6288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$round->(3, "%rdx")} 6298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$round->(4, "%rdx")} 6308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$round->(5, "%rdx")} 6318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$round->(6, "%rdx")} 6328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$round->(7, "%rdx")} 6338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$round->(8, "%rdx")} 6348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 6358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa con2(%rip), %xmm0 6368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 6378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$round->(9, "%rdx")} 6388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$roundlast->(10, "%rdx")} 6398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 6408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $BLOCK1, 0*16(%rsi) 6418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ret 6428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_endproc 6438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.size aes128gcmsiv_aes_ks_enc_x1,.-aes128gcmsiv_aes_ks_enc_x1 6448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 6458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan} 6468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanaes128gcmsiv_aes_ks_enc_x1(); 6478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 6488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloansub aes128gcmsiv_kdf { 6498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $BLOCK1 = "%xmm9"; 6508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $BLOCK2 = "%xmm10"; 6518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $BLOCK3 = "%xmm11"; 6528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $BLOCK4 = "%xmm12"; 6538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $BLOCK5 = "%xmm13"; 6548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $BLOCK6 = "%xmm14"; 6558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $ONE = "%xmm13"; 6568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $KSp = "%rdx"; 6578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $STATE_1 = "%xmm1"; 6588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 6598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $enc_roundx4 = sub { 6608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my ($i, $j) = @_; 6618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan return <<___; 6628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa ${\eval($i*16)}(%rdx), $j 6638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $j, $BLOCK1, $BLOCK1 6648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $j, $BLOCK2, $BLOCK2 6658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $j, $BLOCK3, $BLOCK3 6668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $j, $BLOCK4, $BLOCK4 6678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 6688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan }; 6698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 6708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $enc_roundlastx4 = sub { 6718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my ($i, $j) = @_; 6728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan return <<___; 6738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa ${\eval($i*16)}(%rdx), $j 6748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $j, $BLOCK1, $BLOCK1 6758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $j, $BLOCK2, $BLOCK2 6768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $j, $BLOCK3, $BLOCK3 6778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $j, $BLOCK4, $BLOCK4 6788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 6798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan }; 6808ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 6818ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# void aes128gcmsiv_kdf(const uint8_t nonce[16], 6828ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# uint8_t *out_key_material, 6838ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# const uint8_t *key_schedule); 6848ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan $code.=<<___; 6858ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.globl aes128gcmsiv_kdf 6868ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.type aes128gcmsiv_kdf,\@function,3 6878ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.align 16 6888ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanaes128gcmsiv_kdf: 6898ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_startproc 6908ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# parameter 1: %rdi Pointer to NONCE 6918ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# parameter 2: %rsi Pointer to CT 6928ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# parameter 4: %rdx Pointer to keys 6938ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 6948ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa (%rdx), %xmm1 # xmm1 = first 16 bytes of random key 6958ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa 0*16(%rdi), $BLOCK1 6968ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa and_mask(%rip), $BLOCK4 6978ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa one(%rip), $ONE 6988ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpshufd \$0x90, $BLOCK1, $BLOCK1 6998ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpand $BLOCK4, $BLOCK1, $BLOCK1 7008ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd $ONE, $BLOCK1, $BLOCK2 7018ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd $ONE, $BLOCK2, $BLOCK3 7028ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd $ONE, $BLOCK3, $BLOCK4 7038ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 7048ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm1, $BLOCK1, $BLOCK1 7058ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm1, $BLOCK2, $BLOCK2 7068ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm1, $BLOCK3, $BLOCK3 7078ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm1, $BLOCK4, $BLOCK4 7088ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 7098ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$enc_roundx4->(1, "%xmm1")} 7108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$enc_roundx4->(2, "%xmm2")} 7118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$enc_roundx4->(3, "%xmm1")} 7128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$enc_roundx4->(4, "%xmm2")} 7138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$enc_roundx4->(5, "%xmm1")} 7148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$enc_roundx4->(6, "%xmm2")} 7158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$enc_roundx4->(7, "%xmm1")} 7168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$enc_roundx4->(8, "%xmm2")} 7178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$enc_roundx4->(9, "%xmm1")} 7188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$enc_roundlastx4->(10, "%xmm2")} 7198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 7208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $BLOCK1, 0*16(%rsi) 7218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $BLOCK2, 1*16(%rsi) 7228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $BLOCK3, 2*16(%rsi) 7238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $BLOCK4, 3*16(%rsi) 7248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ret 7258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_endproc 7268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.size aes128gcmsiv_kdf,.-aes128gcmsiv_kdf 7278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 7288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan} 7298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanaes128gcmsiv_kdf(); 7308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 7318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloansub aes128gcmsiv_enc_msg_x4 { 7328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CTR1 = "%xmm0"; 7338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CTR2 = "%xmm1"; 7348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CTR3 = "%xmm2"; 7358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CTR4 = "%xmm3"; 7368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $ADDER = "%xmm4"; 7378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 7388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $STATE1 = "%xmm5"; 7398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $STATE2 = "%xmm6"; 7408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $STATE3 = "%xmm7"; 7418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $STATE4 = "%xmm8"; 7428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 7438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $TMP = "%xmm12"; 7448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $TMP2 = "%xmm13"; 7458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $TMP3 = "%xmm14"; 7468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $IV = "%xmm15"; 7478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 7488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $PT = "%rdi"; 7498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CT = "%rsi"; 7508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $TAG = "%rdx"; 7518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $KS = "%rcx"; 7528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $LEN = "%r8"; 7538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 7548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $aes_round = sub { 7558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my ($i) = @_; 7568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan return <<___; 7578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu ${\eval($i*16)}($KS), $TMP 7588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $TMP, $STATE1, $STATE1 7598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $TMP, $STATE2, $STATE2 7608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $TMP, $STATE3, $STATE3 7618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $TMP, $STATE4, $STATE4 7628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 7638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan }; 7648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 7658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $aes_lastround = sub { 7668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my ($i) = @_; 7678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan return <<___; 7688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu ${\eval($i*16)}($KS), $TMP 7698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $TMP, $STATE1, $STATE1 7708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $TMP, $STATE2, $STATE2 7718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $TMP, $STATE3, $STATE3 7728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $TMP, $STATE4, $STATE4 7738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 7748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan }; 7758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 7768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# void aes128gcmsiv_enc_msg_x4(unsigned char* PT, unsigned char* CT, 7778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# unsigned char* TAG, unsigned char* KS, 7788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# size_t byte_len); 7798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# parameter 1: %rdi #PT 7808ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# parameter 2: %rsi #CT 7818ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# parameter 3: %rdx #TAG [127 126 ... 0] IV=[127...32] 7828ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# parameter 4: %rcx #KS 7838ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# parameter 5: %r8 #LEN MSG_length in bytes 7848ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan $code.=<<___; 7858ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.globl aes128gcmsiv_enc_msg_x4 7868ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.type aes128gcmsiv_enc_msg_x4,\@function,5 7878ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.align 16 7888ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanaes128gcmsiv_enc_msg_x4: 7898ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_startproc 7908ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan test $LEN, $LEN 7918ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan jnz .L128_enc_msg_x4_start 7928ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ret 7938ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 7948ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.L128_enc_msg_x4_start: 7958ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan pushq %r12 7968ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_push %r12 7978ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan pushq %r13 7988ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_push %r13 7998ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 8008ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan shrq \$4, $LEN # LEN = num of blocks 8018ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan movq $LEN, %r10 8028ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan shlq \$62, %r10 8038ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan shrq \$62, %r10 8048ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 8058ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # make IV from TAG 8068ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa ($TAG), $IV 8078ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpor OR_MASK(%rip), $IV, $IV #IV = [1]TAG[126...32][00..00] 8088ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 8098ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu four(%rip), $ADDER # Register to increment counters 8108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $IV, $CTR1 # CTR1 = TAG[1][127...32][00..00] 8118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd one(%rip), $IV, $CTR2 # CTR2 = TAG[1][127...32][00..01] 8128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd two(%rip), $IV, $CTR3 # CTR3 = TAG[1][127...32][00..02] 8138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd three(%rip), $IV, $CTR4 # CTR4 = TAG[1][127...32][00..03] 8148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 8158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan shrq \$2, $LEN 8168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan je .L128_enc_msg_x4_check_remainder 8178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 8188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan subq \$64, $CT 8198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan subq \$64, $PT 8208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 8218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.L128_enc_msg_x4_loop1: 8228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan addq \$64, $CT 8238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan addq \$64, $PT 8248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 8258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR1, $STATE1 8268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR2, $STATE2 8278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR3, $STATE3 8288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR4, $STATE4 8298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 8308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($KS), $STATE1, $STATE1 8318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($KS), $STATE2, $STATE2 8328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($KS), $STATE3, $STATE3 8338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($KS), $STATE4, $STATE4 8348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 8358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round->(1)} 8368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd $ADDER, $CTR1, $CTR1 8378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round->(2)} 8388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd $ADDER, $CTR2, $CTR2 8398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round->(3)} 8408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd $ADDER, $CTR3, $CTR3 8418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round->(4)} 8428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd $ADDER, $CTR4, $CTR4 8438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 8448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round->(5)} 8458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round->(6)} 8468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round->(7)} 8478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round->(8)} 8488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round->(9)} 8498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_lastround->(10)} 8508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 8518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # XOR with Plaintext 8528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 0*16($PT), $STATE1, $STATE1 8538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 1*16($PT), $STATE2, $STATE2 8548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 2*16($PT), $STATE3, $STATE3 8558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 3*16($PT), $STATE4, $STATE4 8568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 8578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan subq \$1, $LEN 8588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 8598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $STATE1, 0*16($CT) 8608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $STATE2, 1*16($CT) 8618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $STATE3, 2*16($CT) 8628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $STATE4, 3*16($CT) 8638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 8648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan jne .L128_enc_msg_x4_loop1 8658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 8668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan addq \$64,$CT 8678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan addq \$64,$PT 8688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 8698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.L128_enc_msg_x4_check_remainder: 8708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan cmpq \$0, %r10 8718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan je .L128_enc_msg_x4_out 8728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 8738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.L128_enc_msg_x4_loop2: 8748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # enc each block separately 8758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # CTR1 is the highest counter (even if no LOOP done) 8768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR1, $STATE1 8778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd one(%rip), $CTR1, $CTR1 # inc counter 8788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 8798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($KS), $STATE1, $STATE1 8808ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 16($KS), $STATE1, $STATE1 8818ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 32($KS), $STATE1, $STATE1 8828ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 48($KS), $STATE1, $STATE1 8838ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 64($KS), $STATE1, $STATE1 8848ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 80($KS), $STATE1, $STATE1 8858ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 96($KS), $STATE1, $STATE1 8868ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 112($KS), $STATE1, $STATE1 8878ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 128($KS), $STATE1, $STATE1 8888ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 144($KS), $STATE1, $STATE1 8898ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast 160($KS), $STATE1, $STATE1 8908ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 8918ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # XOR with plaintext 8928ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($PT), $STATE1, $STATE1 8938ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $STATE1, ($CT) 8948ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 8958ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan addq \$16, $PT 8968ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan addq \$16, $CT 8978ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 8988ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan subq \$1, %r10 8998ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan jne .L128_enc_msg_x4_loop2 9008ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 9018ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.L128_enc_msg_x4_out: 9028ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan popq %r13 9038ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_pop %r13 9048ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan popq %r12 9058ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_pop %r12 9068ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ret 9078ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_endproc 9088ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.size aes128gcmsiv_enc_msg_x4,.-aes128gcmsiv_enc_msg_x4 9098ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 9108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan} 9118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanaes128gcmsiv_enc_msg_x4(); 9128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 9138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloansub aes128gcmsiv_enc_msg_x8 { 9148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $STATE1 = "%xmm1"; 9158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $STATE2 = "%xmm2"; 9168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $STATE3 = "%xmm3"; 9178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $STATE4 = "%xmm4"; 9188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $STATE5 = "%xmm5"; 9198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $STATE6 = "%xmm6"; 9208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $STATE7 = "%xmm7"; 9218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $STATE8 = "%xmm8"; 9228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 9238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CTR1 = "%xmm0"; 9248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CTR2 = "%xmm9"; 9258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CTR3 = "%xmm10"; 9268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CTR4 = "%xmm11"; 9278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CTR5 = "%xmm12"; 9288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CTR6 = "%xmm13"; 9298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CTR7 = "%xmm14"; 9308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $SCHED = "%xmm15"; 9318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 9328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $TMP1 = "%xmm1"; 9338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $TMP2 = "%xmm2"; 9348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 9358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $PT = "%rdi"; 9368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CT = "%rsi"; 9378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $TAG = "%rdx"; 9388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $KS = "%rcx"; 9398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $LEN = "%r8"; 9408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 9418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $aes_round8 = sub { 9428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my ($i) = @_; 9438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan return <<___; 9448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu ${\eval($i*16)}($KS), $SCHED 9458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $SCHED, $STATE1, $STATE1 9468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $SCHED, $STATE2, $STATE2 9478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $SCHED, $STATE3, $STATE3 9488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $SCHED, $STATE4, $STATE4 9498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $SCHED, $STATE5, $STATE5 9508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $SCHED, $STATE6, $STATE6 9518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $SCHED, $STATE7, $STATE7 9528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $SCHED, $STATE8, $STATE8 9538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 9548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan }; 9558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 9568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $aes_lastround8 = sub { 9578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my ($i) = @_; 9588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan return <<___; 9598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu ${\eval($i*16)}($KS), $SCHED 9608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $SCHED, $STATE1, $STATE1 9618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $SCHED, $STATE2, $STATE2 9628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $SCHED, $STATE3, $STATE3 9638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $SCHED, $STATE4, $STATE4 9648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $SCHED, $STATE5, $STATE5 9658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $SCHED, $STATE6, $STATE6 9668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $SCHED, $STATE7, $STATE7 9678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $SCHED, $STATE8, $STATE8 9688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 9698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan }; 9708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 9718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# void ENC_MSG_x8(unsigned char* PT, 9728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# unsigned char* CT, 9738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# unsigned char* TAG, 9748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# unsigned char* KS, 9758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# size_t byte_len); 9768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# parameter 1: %rdi #PT 9778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# parameter 2: %rsi #CT 9788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# parameter 3: %rdx #TAG [127 126 ... 0] IV=[127...32] 9798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# parameter 4: %rcx #KS 9808ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# parameter 5: %r8 #LEN MSG_length in bytes 9818ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan $code.=<<___; 9828ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.globl aes128gcmsiv_enc_msg_x8 9838ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.type aes128gcmsiv_enc_msg_x8,\@function,5 9848ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.align 16 9858ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanaes128gcmsiv_enc_msg_x8: 9868ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_startproc 9878ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan test $LEN, $LEN 9888ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan jnz .L128_enc_msg_x8_start 9898ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ret 9908ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 9918ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.L128_enc_msg_x8_start: 9928ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan pushq %r12 9938ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_push %r12 9948ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan pushq %r13 9958ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_push %r13 9968ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan pushq %rbp 9978ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_push %rbp 9988ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan movq %rsp, %rbp 9998ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_def_cfa_register rbp 10008ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 10018ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # Place in stack 10028ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan subq \$128, %rsp 10038ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan andq \$-64, %rsp 10048ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 10058ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan shrq \$4, $LEN # LEN = num of blocks 10068ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan movq $LEN, %r10 10078ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan shlq \$61, %r10 10088ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan shrq \$61, %r10 10098ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 10108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # make IV from TAG 10118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu ($TAG), $TMP1 10128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpor OR_MASK(%rip), $TMP1, $TMP1 # TMP1= IV = [1]TAG[126...32][00..00] 10138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 10148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # store counter8 in the stack 10158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd seven(%rip), $TMP1, $CTR1 10168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $CTR1, (%rsp) # CTR8 = TAG[127...32][00..07] 10178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd one(%rip), $TMP1, $CTR2 # CTR2 = TAG[127...32][00..01] 10188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd two(%rip), $TMP1, $CTR3 # CTR3 = TAG[127...32][00..02] 10198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd three(%rip), $TMP1, $CTR4 # CTR4 = TAG[127...32][00..03] 10208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd four(%rip), $TMP1, $CTR5 # CTR5 = TAG[127...32][00..04] 10218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd five(%rip), $TMP1, $CTR6 # CTR6 = TAG[127...32][00..05] 10228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd six(%rip), $TMP1, $CTR7 # CTR7 = TAG[127...32][00..06] 10238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $TMP1, $CTR1 # CTR1 = TAG[127...32][00..00] 10248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 10258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan shrq \$3, $LEN 10268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan je .L128_enc_msg_x8_check_remainder 10278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 10288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan subq \$128, $CT 10298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan subq \$128, $PT 10308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 10318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.L128_enc_msg_x8_loop1: 10328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan addq \$128, $CT 10338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan addq \$128, $PT 10348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 10358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR1, $STATE1 10368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR2, $STATE2 10378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR3, $STATE3 10388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR4, $STATE4 10398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR5, $STATE5 10408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR6, $STATE6 10418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR7, $STATE7 10428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # move from stack 10438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu (%rsp), $STATE8 10448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 10458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($KS), $STATE1, $STATE1 10468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($KS), $STATE2, $STATE2 10478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($KS), $STATE3, $STATE3 10488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($KS), $STATE4, $STATE4 10498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($KS), $STATE5, $STATE5 10508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($KS), $STATE6, $STATE6 10518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($KS), $STATE7, $STATE7 10528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($KS), $STATE8, $STATE8 10538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 10548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round8->(1)} 10558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu (%rsp), $CTR7 # deal with CTR8 10568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd eight(%rip), $CTR7, $CTR7 10578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $CTR7, (%rsp) 10588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round8->(2)} 10598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpsubd one(%rip), $CTR7, $CTR7 10608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round8->(3)} 10618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd eight(%rip), $CTR1, $CTR1 10628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round8->(4)} 10638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd eight(%rip), $CTR2, $CTR2 10648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round8->(5)} 10658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd eight(%rip), $CTR3, $CTR3 10668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round8->(6)} 10678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd eight(%rip), $CTR4, $CTR4 10688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round8->(7)} 10698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd eight(%rip), $CTR5, $CTR5 10708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round8->(8)} 10718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd eight(%rip), $CTR6, $CTR6 10728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round8->(9)} 10738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_lastround8->(10)} 10748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 10758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # XOR with Plaintext 10768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 0*16($PT), $STATE1, $STATE1 10778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 1*16($PT), $STATE2, $STATE2 10788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 2*16($PT), $STATE3, $STATE3 10798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 3*16($PT), $STATE4, $STATE4 10808ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 4*16($PT), $STATE5, $STATE5 10818ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 5*16($PT), $STATE6, $STATE6 10828ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 6*16($PT), $STATE7, $STATE7 10838ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 7*16($PT), $STATE8, $STATE8 10848ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 10858ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan dec $LEN 10868ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 10878ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $STATE1, 0*16($CT) 10888ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $STATE2, 1*16($CT) 10898ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $STATE3, 2*16($CT) 10908ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $STATE4, 3*16($CT) 10918ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $STATE5, 4*16($CT) 10928ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $STATE6, 5*16($CT) 10938ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $STATE7, 6*16($CT) 10948ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $STATE8, 7*16($CT) 10958ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 10968ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan jne .L128_enc_msg_x8_loop1 10978ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 10988ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan addq \$128, $CT 10998ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan addq \$128, $PT 11008ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 11018ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.L128_enc_msg_x8_check_remainder: 11028ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan cmpq \$0, %r10 11038ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan je .L128_enc_msg_x8_out 11048ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 11058ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.L128_enc_msg_x8_loop2: 11068ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # enc each block separately 11078ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # CTR1 is the highest counter (even if no LOOP done) 11088ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR1, $STATE1 11098ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd one(%rip), $CTR1, $CTR1 # inc counter 11108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 11118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($KS), $STATE1, $STATE1 11128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 16($KS), $STATE1, $STATE1 11138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 32($KS), $STATE1, $STATE1 11148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 48($KS), $STATE1, $STATE1 11158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 64($KS), $STATE1, $STATE1 11168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 80($KS), $STATE1, $STATE1 11178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 96($KS), $STATE1, $STATE1 11188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 112($KS), $STATE1, $STATE1 11198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 128($KS), $STATE1, $STATE1 11208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 144($KS), $STATE1, $STATE1 11218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast 160($KS), $STATE1, $STATE1 11228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 11238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # XOR with Plaintext 11248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($PT), $STATE1, $STATE1 11258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 11268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $STATE1, ($CT) 11278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 11288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan addq \$16, $PT 11298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan addq \$16, $CT 11308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 11318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan decq %r10 11328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan jne .L128_enc_msg_x8_loop2 11338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 11348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.L128_enc_msg_x8_out: 11358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan movq %rbp, %rsp 11368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_def_cfa_register %rsp 11378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan popq %rbp 11388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_pop %rbp 11398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan popq %r13 11408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_pop %r13 11418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan popq %r12 11428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_pop %r12 11438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ret 11448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_endproc 11458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.size aes128gcmsiv_enc_msg_x8,.-aes128gcmsiv_enc_msg_x8 11468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 11478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan} 11488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanaes128gcmsiv_enc_msg_x8(); 11498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 11508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloansub aesgcmsiv_dec { 11518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my ($aes256) = @_; 11528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 11538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $T = "%xmm0"; 11548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $TMP0 = "%xmm1"; 11558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $TMP1 = "%xmm2"; 11568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $TMP2 = "%xmm3"; 11578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $TMP3 = "%xmm4"; 11588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $TMP4 = "%xmm5"; 11598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $TMP5 = "%xmm6"; 11608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CTR1 = "%xmm7"; 11618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CTR2 = "%xmm8"; 11628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CTR3 = "%xmm9"; 11638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CTR4 = "%xmm10"; 11648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CTR5 = "%xmm11"; 11658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CTR6 = "%xmm12"; 11668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CTR = "%xmm15"; 11678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CT = "%rdi"; 11688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $PT = "%rsi"; 11698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $POL = "%rdx"; 11708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $Htbl = "%rcx"; 11718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $KS = "%r8"; 11728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $LEN = "%r9"; 11738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $secureBuffer = "%rax"; 11748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $HTABLE_ROUNDS = "%xmm13"; 11758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 11768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $labelPrefix = "128"; 11778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan if ($aes256) { 11788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan $labelPrefix = "256"; 11798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan } 11808ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 11818ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $aes_round_dec = sub { 11828ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my ($i) = @_; 11838ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan return <<___; 11848ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu ${\eval($i*16)}($KS), $TMP3 11858ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $TMP3, $CTR1, $CTR1 11868ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $TMP3, $CTR2, $CTR2 11878ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $TMP3, $CTR3, $CTR3 11888ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $TMP3, $CTR4, $CTR4 11898ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $TMP3, $CTR5, $CTR5 11908ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $TMP3, $CTR6, $CTR6 11918ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 11928ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan }; 11938ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 11948ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $aes_lastround_dec = sub { 11958ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my ($i) = @_; 11968ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan return <<___; 11978ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu ${\eval($i*16)}($KS), $TMP3 11988ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $TMP3, $CTR1, $CTR1 11998ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $TMP3, $CTR2, $CTR2 12008ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $TMP3, $CTR3, $CTR3 12018ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $TMP3, $CTR4, $CTR4 12028ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $TMP3, $CTR5, $CTR5 12038ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $TMP3, $CTR6, $CTR6 12048ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 12058ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan }; 12068ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 12078ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $schoolbook = sub { 12088ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my ($i) = @_; 12098ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan return <<___; 12108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu ${\eval($i*16-32)}($secureBuffer), $TMP5 12118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu ${\eval($i*16-32)}($Htbl), $HTABLE_ROUNDS 12128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 12138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x10, $HTABLE_ROUNDS, $TMP5, $TMP3 12148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $TMP0, $TMP0 12158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x11, $HTABLE_ROUNDS, $TMP5, $TMP3 12168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $TMP1, $TMP1 12178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x00, $HTABLE_ROUNDS, $TMP5, $TMP3 12188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $TMP2, $TMP2 12198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x01, $HTABLE_ROUNDS, $TMP5, $TMP3 12208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $TMP0, $TMP0 12218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 12228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan }; 12238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 12248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan if ($aes256) { 12258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan $code.=<<___; 12268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.globl aes256gcmsiv_dec 12278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.type aes256gcmsiv_dec,\@function,6 12288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.align 16 12298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanaes256gcmsiv_dec: 12308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 12318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan } else { 12328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan $code.=<<___; 12338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.globl aes128gcmsiv_dec 12348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.type aes128gcmsiv_dec,\@function,6 12358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.align 16 12368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanaes128gcmsiv_dec: 12378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 12388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan } 12398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 12408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan $code.=<<___; 12418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_startproc 12428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan test \$~15, $LEN 12438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan jnz .L${labelPrefix}_dec_start 12448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ret 12458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 12468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.L${labelPrefix}_dec_start: 12478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vzeroupper 12488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa ($POL), $T 12498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan movq $POL, $secureBuffer 12508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 12518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan leaq 32($secureBuffer), $secureBuffer 12528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan leaq 32($Htbl), $Htbl 12538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 12548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # make CTRBLKs from given tag. 12558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu ($CT,$LEN), $CTR 12568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpor OR_MASK(%rip), $CTR, $CTR # CTR = [1]TAG[126...32][00..00] 12578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan andq \$~15, $LEN 12588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 12598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # If less then 6 blocks, make singles 12608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan cmp \$96, $LEN 12618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan jb .L${labelPrefix}_dec_loop2 12628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 12638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # Decrypt the first six blocks 12648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan sub \$96, $LEN 12658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR, $CTR1 12668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd one(%rip), $CTR1, $CTR2 12678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd two(%rip), $CTR1, $CTR3 12688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd one(%rip), $CTR3, $CTR4 12698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd two(%rip), $CTR3, $CTR5 12708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd one(%rip), $CTR5, $CTR6 12718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd two(%rip), $CTR5, $CTR 12728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 12738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($KS), $CTR1, $CTR1 12748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($KS), $CTR2, $CTR2 12758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($KS), $CTR3, $CTR3 12768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($KS), $CTR4, $CTR4 12778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($KS), $CTR5, $CTR5 12788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($KS), $CTR6, $CTR6 12798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 12808ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round_dec->(1)} 12818ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round_dec->(2)} 12828ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round_dec->(3)} 12838ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round_dec->(4)} 12848ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round_dec->(5)} 12858ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round_dec->(6)} 12868ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round_dec->(7)} 12878ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round_dec->(8)} 12888ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round_dec->(9)} 12898ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 12908ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 12918ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanif ($aes256) { 12928ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan$code.=<<___; 12938ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round_dec->(10)} 12948ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round_dec->(11)} 12958ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round_dec->(12)} 12968ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round_dec->(13)} 12978ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_lastround_dec->(14)} 12988ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 12998ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan} else { 13008ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan$code.=<<___; 13018ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_lastround_dec->(10)} 13028ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 13038ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan} 13048ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 13058ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan$code.=<<___; 13068ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # XOR with CT 13078ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 0*16($CT), $CTR1, $CTR1 13088ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 1*16($CT), $CTR2, $CTR2 13098ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 2*16($CT), $CTR3, $CTR3 13108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 3*16($CT), $CTR4, $CTR4 13118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 4*16($CT), $CTR5, $CTR5 13128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 5*16($CT), $CTR6, $CTR6 13138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 13148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $CTR1, 0*16($PT) 13158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $CTR2, 1*16($PT) 13168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $CTR3, 2*16($PT) 13178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $CTR4, 3*16($PT) 13188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $CTR5, 4*16($PT) 13198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $CTR6, 5*16($PT) 13208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 13218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan addq \$96, $CT 13228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan addq \$96, $PT 13238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan jmp .L${labelPrefix}_dec_loop1 13248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 13258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# Decrypt 6 blocks each time while hashing previous 6 blocks 13268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.align 64 13278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.L${labelPrefix}_dec_loop1: 13288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan cmp \$96, $LEN 13298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan jb .L${labelPrefix}_dec_finish_96 13308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan sub \$96, $LEN 13318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 13328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR6, $TMP5 13338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR5, 1*16-32($secureBuffer) 13348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR4, 2*16-32($secureBuffer) 13358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR3, 3*16-32($secureBuffer) 13368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR2, 4*16-32($secureBuffer) 13378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR1, 5*16-32($secureBuffer) 13388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 13398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR, $CTR1 13408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd one(%rip), $CTR1, $CTR2 13418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd two(%rip), $CTR1, $CTR3 13428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd one(%rip), $CTR3, $CTR4 13438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd two(%rip), $CTR3, $CTR5 13448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd one(%rip), $CTR5, $CTR6 13458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd two(%rip), $CTR5, $CTR 13468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 13478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa ($KS), $TMP3 13488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $CTR1, $CTR1 13498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $CTR2, $CTR2 13508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $CTR3, $CTR3 13518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $CTR4, $CTR4 13528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $CTR5, $CTR5 13538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $CTR6, $CTR6 13548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 13558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu 0*16-32($Htbl), $TMP3 13568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x11, $TMP3, $TMP5, $TMP1 13578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x00, $TMP3, $TMP5, $TMP2 13588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x01, $TMP3, $TMP5, $TMP0 13598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x10, $TMP3, $TMP5, $TMP3 13608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $TMP0, $TMP0 13618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 13628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round_dec->(1)} 13638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$schoolbook->(1)} 13648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 13658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round_dec->(2)} 13668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$schoolbook->(2)} 13678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 13688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round_dec->(3)} 13698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$schoolbook->(3)} 13708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 13718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round_dec->(4)} 13728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$schoolbook->(4)} 13738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 13748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round_dec->(5)} 13758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round_dec->(6)} 13768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round_dec->(7)} 13778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 13788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa 5*16-32($secureBuffer), $TMP5 13798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $T, $TMP5, $TMP5 13808ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu 5*16-32($Htbl), $TMP4 13818ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 13828ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x01, $TMP4, $TMP5, $TMP3 13838ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $TMP0, $TMP0 13848ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x11, $TMP4, $TMP5, $TMP3 13858ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $TMP1, $TMP1 13868ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x00, $TMP4, $TMP5, $TMP3 13878ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $TMP2, $TMP2 13888ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x10, $TMP4, $TMP5, $TMP3 13898ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $TMP0, $TMP0 13908ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 13918ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round_dec->(8)} 13928ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 13938ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpsrldq \$8, $TMP0, $TMP3 13948ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $TMP1, $TMP4 13958ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpslldq \$8, $TMP0, $TMP3 13968ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $TMP2, $T 13978ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 13988ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa poly(%rip), $TMP2 13998ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 14008ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round_dec->(9)} 14018ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 14028ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 14038ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanif ($aes256) { 14048ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan$code.=<<___; 14058ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round_dec->(10)} 14068ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round_dec->(11)} 14078ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round_dec->(12)} 14088ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round_dec->(13)} 14098ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu 14*16($KS), $TMP5 14108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 14118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan} else { 14128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan$code.=<<___; 14138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu 10*16($KS), $TMP5 14148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 14158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan} 14168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 14178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan$code.=<<___; 14188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpalignr \$8, $T, $T, $TMP1 14198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x10, $TMP2, $T, $T 14208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $T, $TMP1, $T 14218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 14228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 0*16($CT), $TMP5, $TMP3 14238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $TMP3, $CTR1, $CTR1 14248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 1*16($CT), $TMP5, $TMP3 14258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $TMP3, $CTR2, $CTR2 14268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 2*16($CT), $TMP5, $TMP3 14278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $TMP3, $CTR3, $CTR3 14288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 3*16($CT), $TMP5, $TMP3 14298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $TMP3, $CTR4, $CTR4 14308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 4*16($CT), $TMP5, $TMP3 14318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $TMP3, $CTR5, $CTR5 14328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 5*16($CT), $TMP5, $TMP3 14338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $TMP3, $CTR6, $CTR6 14348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 14358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpalignr \$8, $T, $T, $TMP1 14368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x10, $TMP2, $T, $T 14378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $T, $TMP1, $T 14388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 14398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $CTR1, 0*16($PT) 14408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $CTR2, 1*16($PT) 14418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $CTR3, 2*16($PT) 14428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $CTR4, 3*16($PT) 14438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $CTR5, 4*16($PT) 14448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $CTR6, 5*16($PT) 14458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 14468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP4, $T, $T 14478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 14488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan lea 96($CT), $CT 14498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan lea 96($PT), $PT 14508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan jmp .L${labelPrefix}_dec_loop1 14518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 14528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.L${labelPrefix}_dec_finish_96: 14538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR6, $TMP5 14548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR5, 1*16-32($secureBuffer) 14558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR4, 2*16-32($secureBuffer) 14568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR3, 3*16-32($secureBuffer) 14578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR2, 4*16-32($secureBuffer) 14588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR1, 5*16-32($secureBuffer) 14598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 14608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu 0*16-32($Htbl), $TMP3 14618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x10, $TMP3, $TMP5, $TMP0 14628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x11, $TMP3, $TMP5, $TMP1 14638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x00, $TMP3, $TMP5, $TMP2 14648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x01, $TMP3, $TMP5, $TMP3 14658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $TMP0, $TMP0 14668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 14678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$schoolbook->(1)} 14688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$schoolbook->(2)} 14698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$schoolbook->(3)} 14708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$schoolbook->(4)} 14718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 14728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu 5*16-32($secureBuffer), $TMP5 14738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $T, $TMP5, $TMP5 14748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu 5*16-32($Htbl), $TMP4 14758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x11, $TMP4, $TMP5, $TMP3 14768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $TMP1, $TMP1 14778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x00, $TMP4, $TMP5, $TMP3 14788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $TMP2, $TMP2 14798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x10, $TMP4, $TMP5, $TMP3 14808ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $TMP0, $TMP0 14818ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x01, $TMP4, $TMP5, $TMP3 14828ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $TMP0, $TMP0 14838ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 14848ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpsrldq \$8, $TMP0, $TMP3 14858ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $TMP1, $TMP4 14868ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpslldq \$8, $TMP0, $TMP3 14878ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP3, $TMP2, $T 14888ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 14898ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa poly(%rip), $TMP2 14908ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 14918ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpalignr \$8, $T, $T, $TMP1 14928ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x10, $TMP2, $T, $T 14938ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $T, $TMP1, $T 14948ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 14958ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpalignr \$8, $T, $T, $TMP1 14968ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpclmulqdq \$0x10, $TMP2, $T, $T 14978ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $T, $TMP1, $T 14988ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 14998ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP4, $T, $T 15008ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 15018ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.L${labelPrefix}_dec_loop2: 15028ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # Here we encrypt any remaining whole block 15038ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 15048ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # if there are no whole blocks 15058ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan cmp \$16, $LEN 15068ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan jb .L${labelPrefix}_dec_out 15078ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan sub \$16, $LEN 15088ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 15098ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR, $TMP1 15108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd one(%rip), $CTR, $CTR 15118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 15128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 0*16($KS), $TMP1, $TMP1 15138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 1*16($KS), $TMP1, $TMP1 15148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 2*16($KS), $TMP1, $TMP1 15158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 3*16($KS), $TMP1, $TMP1 15168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 4*16($KS), $TMP1, $TMP1 15178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 5*16($KS), $TMP1, $TMP1 15188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 6*16($KS), $TMP1, $TMP1 15198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 7*16($KS), $TMP1, $TMP1 15208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 8*16($KS), $TMP1, $TMP1 15218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 9*16($KS), $TMP1, $TMP1 15228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 15238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanif ($aes256) { 15248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan$code.=<<___; 15258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 10*16($KS), $TMP1, $TMP1 15268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 11*16($KS), $TMP1, $TMP1 15278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 12*16($KS), $TMP1, $TMP1 15288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 13*16($KS), $TMP1, $TMP1 15298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast 14*16($KS), $TMP1, $TMP1 15308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 15318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan} else { 15328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan$code.=<<___; 15338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast 10*16($KS), $TMP1, $TMP1 15348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 15358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan} 15368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 15378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan$code.=<<___; 15388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($CT), $TMP1, $TMP1 15398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $TMP1, ($PT) 15408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan addq \$16, $CT 15418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan addq \$16, $PT 15428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 15438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $TMP1, $T, $T 15448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa -32($Htbl), $TMP0 15458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan call GFMUL 15468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 15478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan jmp .L${labelPrefix}_dec_loop2 15488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 15498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.L${labelPrefix}_dec_out: 15508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $T, ($POL) 15518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ret 15528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_endproc 15538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 15548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 15558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan if ($aes256) { 15568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan $code.=<<___; 15578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.size aes256gcmsiv_dec, .-aes256gcmsiv_dec 15588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 15598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan } else { 15608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan $code.=<<___; 15618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.size aes128gcmsiv_dec, .-aes128gcmsiv_dec 15628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 15638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan } 15648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan} 15658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 15668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanaesgcmsiv_dec(0); # emit 128-bit version 15678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 15688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloansub aes128gcmsiv_ecb_enc_block { 15698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $STATE_1 = "%xmm1"; 15708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $KSp = "%rdx"; 15718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 15728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # parameter 1: PT %rdi (pointer to 128 bit) 15738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # parameter 2: CT %rsi (pointer to 128 bit) 15748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # parameter 3: ks %rdx (pointer to ks) 15758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan $code.=<<___; 15768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.globl aes128gcmsiv_ecb_enc_block 15778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.type aes128gcmsiv_ecb_enc_block,\@function,3 15788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.align 16 15798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanaes128gcmsiv_ecb_enc_block: 15808ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_startproc 15818ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa (%rdi), $STATE_1 15828ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 15838ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($KSp), $STATE_1, $STATE_1 15848ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 1*16($KSp), $STATE_1, $STATE_1 15858ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 2*16($KSp), $STATE_1, $STATE_1 15868ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 3*16($KSp), $STATE_1, $STATE_1 15878ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 4*16($KSp), $STATE_1, $STATE_1 15888ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 5*16($KSp), $STATE_1, $STATE_1 15898ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 6*16($KSp), $STATE_1, $STATE_1 15908ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 7*16($KSp), $STATE_1, $STATE_1 15918ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 8*16($KSp), $STATE_1, $STATE_1 15928ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 9*16($KSp), $STATE_1, $STATE_1 15938ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast 10*16($KSp), $STATE_1, $STATE_1 # STATE_1 == IV 15948ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 15958ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $STATE_1, (%rsi) 15968ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 15978ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ret 15988ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_endproc 15998ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.size aes128gcmsiv_ecb_enc_block,.-aes128gcmsiv_ecb_enc_block 16008ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 16018ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan} 16028ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanaes128gcmsiv_ecb_enc_block(); 16038ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 16048ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloansub aes256gcmsiv_aes_ks_enc_x1 { 16058ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $KS = "%rdx"; 16068ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $KEYp = "%rcx"; 16078ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CON_MASK = "%xmm0"; 16088ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $MASK_256 = "%xmm15"; 16098ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $KEY_1 = "%xmm1"; 16108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $KEY_2 = "%xmm3"; 16118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $BLOCK1 = "%xmm8"; 16128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $AUX_REG = "%xmm14"; 16138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $PT = "%rdi"; 16148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CT = "%rsi"; 16158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 16168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $round_double = sub { 16178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my ($i, $j) = @_; 16188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan return <<___; 16198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpshufb %xmm15, %xmm3, %xmm2 16208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast %xmm0, %xmm2, %xmm2 16218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpslld \$1, %xmm0, %xmm0 16228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpslldq \$4, %xmm1, %xmm4 16238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm4, %xmm1, %xmm1 16248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpslldq \$4, %xmm4, %xmm4 16258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm4, %xmm1, %xmm1 16268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpslldq \$4, %xmm4, %xmm4 16278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm4, %xmm1, %xmm1 16288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm2, %xmm1, %xmm1 16298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc %xmm1, $BLOCK1, $BLOCK1 16308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu %xmm1, ${\eval(16*$i)}($KS) 16318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 16328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpshufd \$0xff, %xmm1, %xmm2 16338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast %xmm14, %xmm2, %xmm2 16348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpslldq \$4, %xmm3, %xmm4 16358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm4, %xmm3, %xmm3 16368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpslldq \$4, %xmm4, %xmm4 16378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm4, %xmm3, %xmm3 16388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpslldq \$4, %xmm4, %xmm4 16398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm4, %xmm3, %xmm3 16408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm2, %xmm3, %xmm3 16418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc %xmm3, $BLOCK1, $BLOCK1 16428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu %xmm3, ${\eval(16*$j)}($KS) 16438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 16448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan }; 16458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 16468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $round_last = sub { 16478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my ($i) = @_; 16488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan return <<___; 16498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpshufb %xmm15, %xmm3, %xmm2 16508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast %xmm0, %xmm2, %xmm2 16518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpslldq \$4, %xmm1, %xmm4 16528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm4, %xmm1, %xmm1 16538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpslldq \$4, %xmm4, %xmm4 16548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm4, %xmm1, %xmm1 16558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpslldq \$4, %xmm4, %xmm4 16568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm4, %xmm1, %xmm1 16578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm2, %xmm1, %xmm1 16588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast %xmm1, $BLOCK1, $BLOCK1 16598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu %xmm1, ${\eval(16*$i)}($KS) 16608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 16618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan }; 16628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 16638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # parameter 1: %rdi Pointer to PT1 16648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # parameter 2: %rsi Pointer to CT1 16658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # parameter 3: %rdx Pointer to KS 16668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # parameter 4: %rcx Pointer to initial key 16678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan $code.=<<___; 16688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.globl aes256gcmsiv_aes_ks_enc_x1 16698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.type aes256gcmsiv_aes_ks_enc_x1,\@function,4 16708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.align 16 16718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanaes256gcmsiv_aes_ks_enc_x1: 16728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_startproc 16738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa con1(%rip), $CON_MASK # CON_MASK = 1,1,1,1 16748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa mask(%rip), $MASK_256 # MASK_256 16758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa ($PT), $BLOCK1 16768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa ($KEYp), $KEY_1 # KEY_1 || KEY_2 [0..7] = user key 16778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa 16($KEYp), $KEY_2 16788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $KEY_1, $BLOCK1, $BLOCK1 16798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $KEY_2, $BLOCK1, $BLOCK1 16808ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $KEY_1, ($KS) # First round key 16818ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $KEY_2, 16($KS) 16828ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor $AUX_REG, $AUX_REG, $AUX_REG 16838ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 16848ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$round_double->(2, 3)} 16858ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$round_double->(4, 5)} 16868ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$round_double->(6, 7)} 16878ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$round_double->(8, 9)} 16888ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$round_double->(10, 11)} 16898ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$round_double->(12, 13)} 16908ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$round_last->(14)} 16918ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $BLOCK1, ($CT) 16928ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ret 16938ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_endproc 16948ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.size aes256gcmsiv_aes_ks_enc_x1,.-aes256gcmsiv_aes_ks_enc_x1 16958ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 16968ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan} 16978ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanaes256gcmsiv_aes_ks_enc_x1(); 16988ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 16998ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloansub aes256gcmsiv_ecb_enc_block { 17008ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $STATE_1 = "%xmm1"; 17018ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $PT = "%rdi"; 17028ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CT = "%rsi"; 17038ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $KSp = "%rdx"; 17048ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 17058ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # parameter 1: PT %rdi (pointer to 128 bit) 17068ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # parameter 2: CT %rsi (pointer to 128 bit) 17078ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # parameter 3: ks %rdx (pointer to ks) 17088ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan $code.=<<___; 17098ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.globl aes256gcmsiv_ecb_enc_block 17108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.type aes256gcmsiv_ecb_enc_block,\@function,3 17118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.align 16 17128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanaes256gcmsiv_ecb_enc_block: 17138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_startproc 17148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa (%rdi), $STATE_1 17158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($KSp), $STATE_1, $STATE_1 17168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 1*16($KSp), $STATE_1, $STATE_1 17178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 2*16($KSp), $STATE_1, $STATE_1 17188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 3*16($KSp), $STATE_1, $STATE_1 17198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 4*16($KSp), $STATE_1, $STATE_1 17208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 5*16($KSp), $STATE_1, $STATE_1 17218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 6*16($KSp), $STATE_1, $STATE_1 17228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 7*16($KSp), $STATE_1, $STATE_1 17238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 8*16($KSp), $STATE_1, $STATE_1 17248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 9*16($KSp), $STATE_1, $STATE_1 17258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 10*16($KSp), $STATE_1, $STATE_1 17268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 11*16($KSp), $STATE_1, $STATE_1 17278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 12*16($KSp), $STATE_1, $STATE_1 17288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 13*16($KSp), $STATE_1, $STATE_1 17298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast 14*16($KSp), $STATE_1, $STATE_1 # $STATE_1 == IV 17308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $STATE_1, (%rsi) 17318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ret 17328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_endproc 17338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.size aes256gcmsiv_ecb_enc_block,.-aes256gcmsiv_ecb_enc_block 17348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 17358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan} 17368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanaes256gcmsiv_ecb_enc_block(); 17378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 17388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloansub aes256gcmsiv_enc_msg_x4 { 17398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CTR1 = "%xmm0"; 17408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CTR2 = "%xmm1"; 17418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CTR3 = "%xmm2"; 17428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CTR4 = "%xmm3"; 17438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $ADDER = "%xmm4"; 17448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 17458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $STATE1 = "%xmm5"; 17468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $STATE2 = "%xmm6"; 17478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $STATE3 = "%xmm7"; 17488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $STATE4 = "%xmm8"; 17498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 17508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $TMP = "%xmm12"; 17518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $TMP2 = "%xmm13"; 17528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $TMP3 = "%xmm14"; 17538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $IV = "%xmm15"; 17548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 17558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $PT = "%rdi"; 17568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CT = "%rsi"; 17578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $TAG = "%rdx"; 17588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $KS = "%rcx"; 17598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $LEN = "%r8"; 17608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 17618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $aes_round = sub { 17628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my ($i) = @_; 17638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan return <<___; 17648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu ${\eval($i*16)}($KS), $TMP 17658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $TMP, $STATE1, $STATE1 17668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $TMP, $STATE2, $STATE2 17678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $TMP, $STATE3, $STATE3 17688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $TMP, $STATE4, $STATE4 17698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 17708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan }; 17718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 17728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $aes_lastround = sub { 17738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my ($i) = @_; 17748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan return <<___; 17758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu ${\eval($i*16)}($KS), $TMP 17768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $TMP, $STATE1, $STATE1 17778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $TMP, $STATE2, $STATE2 17788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $TMP, $STATE3, $STATE3 17798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $TMP, $STATE4, $STATE4 17808ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 17818ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan }; 17828ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 17838ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # void aes256gcmsiv_enc_msg_x4(unsigned char* PT, unsigned char* CT, 17848ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # unsigned char* TAG, unsigned char* KS, 17858ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # size_t byte_len); 17868ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # parameter 1: %rdi #PT 17878ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # parameter 2: %rsi #CT 17888ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # parameter 3: %rdx #TAG [127 126 ... 0] IV=[127...32] 17898ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # parameter 4: %rcx #KS 17908ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # parameter 5: %r8 #LEN MSG_length in bytes 17918ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan $code.=<<___; 17928ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.globl aes256gcmsiv_enc_msg_x4 17938ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.type aes256gcmsiv_enc_msg_x4,\@function,5 17948ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.align 16 17958ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanaes256gcmsiv_enc_msg_x4: 17968ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_startproc 17978ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan test $LEN, $LEN 17988ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan jnz .L256_enc_msg_x4_start 17998ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ret 18008ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 18018ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.L256_enc_msg_x4_start: 18028ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan movq $LEN, %r10 18038ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan shrq \$4, $LEN # LEN = num of blocks 18048ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan shlq \$60, %r10 18058ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan jz .L256_enc_msg_x4_start2 18068ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan addq \$1, $LEN 18078ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 18088ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.L256_enc_msg_x4_start2: 18098ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan movq $LEN, %r10 18108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan shlq \$62, %r10 18118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan shrq \$62, %r10 18128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 18138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # make IV from TAG 18148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa ($TAG), $IV 18158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpor OR_MASK(%rip), $IV, $IV # IV = [1]TAG[126...32][00..00] 18168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 18178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa four(%rip), $ADDER # Register to increment counters 18188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $IV, $CTR1 # CTR1 = TAG[1][127...32][00..00] 18198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd one(%rip), $IV, $CTR2 # CTR2 = TAG[1][127...32][00..01] 18208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd two(%rip), $IV, $CTR3 # CTR3 = TAG[1][127...32][00..02] 18218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd three(%rip), $IV, $CTR4 # CTR4 = TAG[1][127...32][00..03] 18228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 18238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan shrq \$2, $LEN 18248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan je .L256_enc_msg_x4_check_remainder 18258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 18268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan subq \$64, $CT 18278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan subq \$64, $PT 18288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 18298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.L256_enc_msg_x4_loop1: 18308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan addq \$64, $CT 18318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan addq \$64, $PT 18328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 18338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR1, $STATE1 18348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR2, $STATE2 18358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR3, $STATE3 18368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR4, $STATE4 18378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 18388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($KS), $STATE1, $STATE1 18398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($KS), $STATE2, $STATE2 18408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($KS), $STATE3, $STATE3 18418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($KS), $STATE4, $STATE4 18428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 18438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round->(1)} 18448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd $ADDER, $CTR1, $CTR1 18458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round->(2)} 18468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd $ADDER, $CTR2, $CTR2 18478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round->(3)} 18488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd $ADDER, $CTR3, $CTR3 18498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round->(4)} 18508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd $ADDER, $CTR4, $CTR4 18518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 18528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round->(5)} 18538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round->(6)} 18548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round->(7)} 18558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round->(8)} 18568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round->(9)} 18578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round->(10)} 18588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round->(11)} 18598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round->(12)} 18608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round->(13)} 18618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_lastround->(14)} 18628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 18638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # XOR with Plaintext 18648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 0*16($PT), $STATE1, $STATE1 18658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 1*16($PT), $STATE2, $STATE2 18668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 2*16($PT), $STATE3, $STATE3 18678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 3*16($PT), $STATE4, $STATE4 18688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 18698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan subq \$1, $LEN 18708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 18718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $STATE1, 0*16($CT) 18728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $STATE2, 1*16($CT) 18738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $STATE3, 2*16($CT) 18748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $STATE4, 3*16($CT) 18758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 18768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan jne .L256_enc_msg_x4_loop1 18778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 18788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan addq \$64, $CT 18798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan addq \$64, $PT 18808ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 18818ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.L256_enc_msg_x4_check_remainder: 18828ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan cmpq \$0, %r10 18838ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan je .L256_enc_msg_x4_out 18848ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 18858ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.L256_enc_msg_x4_loop2: 18868ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # encrypt each block separately 18878ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # CTR1 is the highest counter (even if no LOOP done) 18888ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 18898ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR1, $STATE1 18908ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd one(%rip), $CTR1, $CTR1 # inc counter 18918ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($KS), $STATE1, $STATE1 18928ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 16($KS), $STATE1, $STATE1 18938ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 32($KS), $STATE1, $STATE1 18948ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 48($KS), $STATE1, $STATE1 18958ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 64($KS), $STATE1, $STATE1 18968ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 80($KS), $STATE1, $STATE1 18978ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 96($KS), $STATE1, $STATE1 18988ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 112($KS), $STATE1, $STATE1 18998ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 128($KS), $STATE1, $STATE1 19008ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 144($KS), $STATE1, $STATE1 19018ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 160($KS), $STATE1, $STATE1 19028ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 176($KS), $STATE1, $STATE1 19038ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 192($KS), $STATE1, $STATE1 19048ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 208($KS), $STATE1, $STATE1 19058ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast 224($KS), $STATE1, $STATE1 19068ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 19078ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # XOR with Plaintext 19088ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($PT), $STATE1, $STATE1 19098ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 19108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $STATE1, ($CT) 19118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 19128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan addq \$16, $PT 19138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan addq \$16, $CT 19148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 19158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan subq \$1, %r10 19168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan jne .L256_enc_msg_x4_loop2 19178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 19188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.L256_enc_msg_x4_out: 19198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ret 19208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_endproc 19218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.size aes256gcmsiv_enc_msg_x4,.-aes256gcmsiv_enc_msg_x4 19228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 19238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan} 19248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanaes256gcmsiv_enc_msg_x4(); 19258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 19268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloansub aes256gcmsiv_enc_msg_x8() { 19278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $STATE1 = "%xmm1"; 19288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $STATE2 = "%xmm2"; 19298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $STATE3 = "%xmm3"; 19308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $STATE4 = "%xmm4"; 19318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $STATE5 = "%xmm5"; 19328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $STATE6 = "%xmm6"; 19338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $STATE7 = "%xmm7"; 19348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $STATE8 = "%xmm8"; 19358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CTR1 = "%xmm0"; 19368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CTR2 = "%xmm9"; 19378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CTR3 = "%xmm10"; 19388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CTR4 = "%xmm11"; 19398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CTR5 = "%xmm12"; 19408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CTR6 = "%xmm13"; 19418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CTR7 = "%xmm14"; 19428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $TMP1 = "%xmm1"; 19438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $TMP2 = "%xmm2"; 19448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $KS = "%rcx"; 19458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $LEN = "%r8"; 19468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $PT = "%rdi"; 19478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $CT = "%rsi"; 19488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $TAG = "%rdx"; 19498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $SCHED = "%xmm15"; 19508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 19518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $aes_round8 = sub { 19528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my ($i) = @_; 19538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan return <<___; 19548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu ${\eval($i*16)}($KS), $SCHED 19558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $SCHED, $STATE1, $STATE1 19568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $SCHED, $STATE2, $STATE2 19578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $SCHED, $STATE3, $STATE3 19588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $SCHED, $STATE4, $STATE4 19598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $SCHED, $STATE5, $STATE5 19608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $SCHED, $STATE6, $STATE6 19618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $SCHED, $STATE7, $STATE7 19628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $SCHED, $STATE8, $STATE8 19638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 19648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan }; 19658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 19668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $aes_lastround8 = sub { 19678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my ($i) = @_; 19688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan return <<___; 19698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu ${\eval($i*16)}($KS), $SCHED 19708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $SCHED, $STATE1, $STATE1 19718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $SCHED, $STATE2, $STATE2 19728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $SCHED, $STATE3, $STATE3 19738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $SCHED, $STATE4, $STATE4 19748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $SCHED, $STATE5, $STATE5 19758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $SCHED, $STATE6, $STATE6 19768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $SCHED, $STATE7, $STATE7 19778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $SCHED, $STATE8, $STATE8 19788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 19798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan }; 19808ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 19818ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # void ENC_MSG_x8(unsigned char* PT, 19828ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # unsigned char* CT, 19838ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # unsigned char* TAG, 19848ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # unsigned char* KS, 19858ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # size_t byte_len); 19868ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # parameter 1: %rdi #PT 19878ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # parameter 2: %rsi #CT 19888ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # parameter 3: %rdx #TAG [127 126 ... 0] IV=[127...32] 19898ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # parameter 4: %rcx #KS 19908ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # parameter 5: %r8 #LEN MSG_length in bytes 19918ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan $code.=<<___; 19928ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.globl aes256gcmsiv_enc_msg_x8 19938ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.type aes256gcmsiv_enc_msg_x8,\@function,5 19948ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.align 16 19958ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanaes256gcmsiv_enc_msg_x8: 19968ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_startproc 19978ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan test $LEN, $LEN 19988ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan jnz .L256_enc_msg_x8_start 19998ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ret 20008ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 20018ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.L256_enc_msg_x8_start: 20028ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # Place in stack 20038ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan movq %rsp, %r11 20048ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan subq \$16, %r11 20058ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan andq \$-64, %r11 20068ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 20078ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan movq $LEN, %r10 20088ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan shrq \$4, $LEN # LEN = num of blocks 20098ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan shlq \$60, %r10 20108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan jz .L256_enc_msg_x8_start2 20118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan addq \$1, $LEN 20128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 20138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.L256_enc_msg_x8_start2: 20148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan movq $LEN, %r10 20158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan shlq \$61, %r10 20168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan shrq \$61, %r10 20178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 20188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # Make IV from TAG 20198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa ($TAG), $TMP1 20208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpor OR_MASK(%rip), $TMP1, $TMP1 # TMP1= IV = [1]TAG[126...32][00..00] 20218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 20228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # store counter8 on the stack 20238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd seven(%rip), $TMP1, $CTR1 20248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR1, (%r11) # CTR8 = TAG[127...32][00..07] 20258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd one(%rip), $TMP1, $CTR2 # CTR2 = TAG[127...32][00..01] 20268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd two(%rip), $TMP1, $CTR3 # CTR3 = TAG[127...32][00..02] 20278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd three(%rip), $TMP1, $CTR4 # CTR4 = TAG[127...32][00..03] 20288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd four(%rip), $TMP1, $CTR5 # CTR5 = TAG[127...32][00..04] 20298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd five(%rip), $TMP1, $CTR6 # CTR6 = TAG[127...32][00..05] 20308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd six(%rip), $TMP1, $CTR7 # CTR7 = TAG[127...32][00..06] 20318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $TMP1, $CTR1 # CTR1 = TAG[127...32][00..00] 20328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 20338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan shrq \$3, $LEN 20348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan jz .L256_enc_msg_x8_check_remainder 20358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 20368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan subq \$128, $CT 20378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan subq \$128, $PT 20388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 20398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.L256_enc_msg_x8_loop1: 20408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan addq \$128, $CT 20418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan addq \$128, $PT 20428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 20438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR1, $STATE1 20448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR2, $STATE2 20458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR3, $STATE3 20468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR4, $STATE4 20478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR5, $STATE5 20488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR6, $STATE6 20498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR7, $STATE7 20508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # move from stack 20518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa (%r11), $STATE8 20528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 20538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($KS), $STATE1, $STATE1 20548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($KS), $STATE2, $STATE2 20558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($KS), $STATE3, $STATE3 20568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($KS), $STATE4, $STATE4 20578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($KS), $STATE5, $STATE5 20588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($KS), $STATE6, $STATE6 20598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($KS), $STATE7, $STATE7 20608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($KS), $STATE8, $STATE8 20618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 20628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round8->(1)} 20638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa (%r11), $CTR7 # deal with CTR8 20648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd eight(%rip), $CTR7, $CTR7 20658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR7, (%r11) 20668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round8->(2)} 20678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpsubd one(%rip), $CTR7, $CTR7 20688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round8->(3)} 20698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd eight(%rip), $CTR1, $CTR1 20708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round8->(4)} 20718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd eight(%rip), $CTR2, $CTR2 20728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round8->(5)} 20738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd eight(%rip), $CTR3, $CTR3 20748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round8->(6)} 20758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd eight(%rip), $CTR4, $CTR4 20768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round8->(7)} 20778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd eight(%rip), $CTR5, $CTR5 20788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round8->(8)} 20798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd eight(%rip), $CTR6, $CTR6 20808ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round8->(9)} 20818ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round8->(10)} 20828ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round8->(11)} 20838ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round8->(12)} 20848ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_round8->(13)} 20858ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$aes_lastround8->(14)} 20868ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 20878ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # XOR with Plaintext 20888ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 0*16($PT), $STATE1, $STATE1 20898ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 1*16($PT), $STATE2, $STATE2 20908ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 2*16($PT), $STATE3, $STATE3 20918ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 3*16($PT), $STATE4, $STATE4 20928ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 4*16($PT), $STATE5, $STATE5 20938ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 5*16($PT), $STATE6, $STATE6 20948ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 6*16($PT), $STATE7, $STATE7 20958ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor 7*16($PT), $STATE8, $STATE8 20968ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 20978ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan subq \$1, $LEN 20988ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 20998ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $STATE1, 0*16($CT) 21008ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $STATE2, 1*16($CT) 21018ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $STATE3, 2*16($CT) 21028ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $STATE4, 3*16($CT) 21038ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $STATE5, 4*16($CT) 21048ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $STATE6, 5*16($CT) 21058ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $STATE7, 6*16($CT) 21068ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $STATE8, 7*16($CT) 21078ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 21088ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan jne .L256_enc_msg_x8_loop1 21098ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 21108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan addq \$128, $CT 21118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan addq \$128, $PT 21128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 21138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.L256_enc_msg_x8_check_remainder: 21148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan cmpq \$0, %r10 21158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan je .L256_enc_msg_x8_out 21168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 21178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.L256_enc_msg_x8_loop2: 21188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # encrypt each block separately 21198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # CTR1 is the highest counter (even if no LOOP done) 21208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $CTR1, $STATE1 21218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd one(%rip), $CTR1, $CTR1 21228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 21238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($KS), $STATE1, $STATE1 21248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 16($KS), $STATE1, $STATE1 21258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 32($KS), $STATE1, $STATE1 21268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 48($KS), $STATE1, $STATE1 21278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 64($KS), $STATE1, $STATE1 21288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 80($KS), $STATE1, $STATE1 21298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 96($KS), $STATE1, $STATE1 21308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 112($KS), $STATE1, $STATE1 21318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 128($KS), $STATE1, $STATE1 21328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 144($KS), $STATE1, $STATE1 21338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 160($KS), $STATE1, $STATE1 21348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 176($KS), $STATE1, $STATE1 21358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 192($KS), $STATE1, $STATE1 21368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc 208($KS), $STATE1, $STATE1 21378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast 224($KS), $STATE1, $STATE1 21388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 21398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # XOR with Plaintext 21408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor ($PT), $STATE1, $STATE1 21418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 21428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqu $STATE1, ($CT) 21438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 21448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan addq \$16, $PT 21458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan addq \$16, $CT 21468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan subq \$1, %r10 21478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan jnz .L256_enc_msg_x8_loop2 21488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 21498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.L256_enc_msg_x8_out: 21508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ret 21518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 21528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_endproc 21538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.size aes256gcmsiv_enc_msg_x8,.-aes256gcmsiv_enc_msg_x8 21548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 21558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan} 21568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanaes256gcmsiv_enc_msg_x8(); 21578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanaesgcmsiv_dec(1); 21588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 21598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloansub aes256gcmsiv_kdf { 21608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $ONE = "%xmm8"; 21618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $BLOCK1 = "%xmm4"; 21628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $BLOCK2 = "%xmm6"; 21638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $BLOCK3 = "%xmm7"; 21648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $BLOCK4 = "%xmm11"; 21658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $BLOCK5 = "%xmm12"; 21668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $BLOCK6 = "%xmm13"; 21678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 21688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $enc_roundx6 = sub { 21698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my ($i, $j) = @_; 21708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan return <<___; 21718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa ${\eval($i*16)}(%rdx), $j 21728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $j, $BLOCK1, $BLOCK1 21738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $j, $BLOCK2, $BLOCK2 21748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $j, $BLOCK3, $BLOCK3 21758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $j, $BLOCK4, $BLOCK4 21768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $j, $BLOCK5, $BLOCK5 21778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenc $j, $BLOCK6, $BLOCK6 21788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 21798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan }; 21808ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 21818ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my $enc_roundlastx6 = sub { 21828ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan my ($i, $j) = @_; 21838ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan return <<___; 21848ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa ${\eval($i*16)}(%rdx), $j 21858ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $j, $BLOCK1, $BLOCK1 21868ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $j, $BLOCK2, $BLOCK2 21878ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $j, $BLOCK3, $BLOCK3 21888ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $j, $BLOCK4, $BLOCK4 21898ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $j, $BLOCK5, $BLOCK5 21908ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vaesenclast $j, $BLOCK6, $BLOCK6 21918ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 21928ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan }; 21938ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 21948ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # void aes256gcmsiv_kdf(const uint8_t nonce[16], 21958ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # uint8_t *out_key_material, 21968ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan # const uint8_t *key_schedule); 21978ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan $code.=<<___; 21988ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.globl aes256gcmsiv_kdf 21998ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.type aes256gcmsiv_kdf,\@function,3 22008ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.align 16 22018ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanaes256gcmsiv_kdf: 22028ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_startproc 22038ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# parameter 1: %rdi Pointer to NONCE 22048ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# parameter 2: %rsi Pointer to CT 22058ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# parameter 4: %rdx Pointer to keys 22068ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 22078ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa (%rdx), %xmm1 # xmm1 = first 16 bytes of random key 22088ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa 0*16(%rdi), $BLOCK1 22098ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa and_mask(%rip), $BLOCK4 22108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa one(%rip), $ONE 22118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpshufd \$0x90, $BLOCK1, $BLOCK1 22128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpand $BLOCK4, $BLOCK1, $BLOCK1 22138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd $ONE, $BLOCK1, $BLOCK2 22148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd $ONE, $BLOCK2, $BLOCK3 22158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd $ONE, $BLOCK3, $BLOCK4 22168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd $ONE, $BLOCK4, $BLOCK5 22178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpaddd $ONE, $BLOCK5, $BLOCK6 22188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 22198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm1, $BLOCK1, $BLOCK1 22208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm1, $BLOCK2, $BLOCK2 22218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm1, $BLOCK3, $BLOCK3 22228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm1, $BLOCK4, $BLOCK4 22238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm1, $BLOCK5, $BLOCK5 22248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vpxor %xmm1, $BLOCK6, $BLOCK6 22258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 22268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$enc_roundx6->(1, "%xmm1")} 22278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$enc_roundx6->(2, "%xmm2")} 22288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$enc_roundx6->(3, "%xmm1")} 22298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$enc_roundx6->(4, "%xmm2")} 22308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$enc_roundx6->(5, "%xmm1")} 22318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$enc_roundx6->(6, "%xmm2")} 22328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$enc_roundx6->(7, "%xmm1")} 22338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$enc_roundx6->(8, "%xmm2")} 22348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$enc_roundx6->(9, "%xmm1")} 22358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$enc_roundx6->(10, "%xmm2")} 22368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$enc_roundx6->(11, "%xmm1")} 22378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$enc_roundx6->(12, "%xmm2")} 22388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$enc_roundx6->(13, "%xmm1")} 22398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ${\$enc_roundlastx6->(14, "%xmm2")} 22408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 22418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $BLOCK1, 0*16(%rsi) 22428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $BLOCK2, 1*16(%rsi) 22438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $BLOCK3, 2*16(%rsi) 22448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $BLOCK4, 3*16(%rsi) 22458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $BLOCK5, 4*16(%rsi) 22468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan vmovdqa $BLOCK6, 5*16(%rsi) 22478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan ret 22488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.cfi_endproc 22498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.size aes256gcmsiv_kdf, .-aes256gcmsiv_kdf 22508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan___ 22518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan} 22528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanaes256gcmsiv_kdf(); 22538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 22548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanprint $code; 22558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan 22568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloanclose STDOUT; 2257