1bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez#! /usr/bin/env perl 2bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved. 3bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez# 4bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez# Licensed under the OpenSSL license (the "License"). You may not use 5bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez# this file except in compliance with the License. You can obtain a copy 6bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez# in the file LICENSE in the source distribution or at 7bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez# https://www.openssl.org/source/license.html 8bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 9bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez# 10bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez# ==================================================================== 11bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez# project. The module is, however, dual licensed under OpenSSL and 13bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez# CRYPTOGAMS licenses depending on where you obtain it. For further 14bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez# details see http://www.openssl.org/~appro/cryptogams/. 15bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez# ==================================================================== 16bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez# 17bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez# GHASH for for PowerISA v2.07. 18bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez# 19bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez# July 2014 20bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez# 21bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez# Accurate performance measurements are problematic, because it's 22bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez# always virtualized setup with possibly throttled processor. 23bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez# Relative comparison is therefore more informative. This initial 24bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x 25bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez# faster than "4-bit" integer-only compiler-generated 64-bit code. 26bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez# "Initial version" means that there is room for futher improvement. 27bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 28bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez# May 2016 29bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez# 30bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez# 2x aggregated reduction improves performance by 50% (resulting 31bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez# performance on POWER8 is 1 cycle per processed byte), and 4x 32bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez# aggregated reduction - by 170% or 2.7x (resulting in 0.55 cpb). 33bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 34bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez$flavour=shift; 35bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez$output =shift; 36bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 37bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdezif ($flavour =~ /64/) { 38bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez $SIZE_T=8; 39bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez $LRSAVE=2*$SIZE_T; 40bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez $STU="stdu"; 41bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez $POP="ld"; 42bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez $PUSH="std"; 43bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez $UCMP="cmpld"; 44bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez $SHRI="srdi"; 45bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez} elsif ($flavour =~ /32/) { 46bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez $SIZE_T=4; 47bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez $LRSAVE=$SIZE_T; 48bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez $STU="stwu"; 49bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez $POP="lwz"; 50bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez $PUSH="stw"; 51bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez $UCMP="cmplw"; 52bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez $SHRI="srwi"; 53bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez} else { die "nonsense $flavour"; } 54bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 55bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez$sp="r1"; 56bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez$FRAME=6*$SIZE_T+13*16; # 13*16 is for v20-v31 offload 57bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 58bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 59bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 609254e681d446a8105bd66f08bae1252d4d89a139Robert Sloan( $xlate="${dir}../../../perlasm/ppc-xlate.pl" and -f $xlate) or 61bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdezdie "can't locate ppc-xlate.pl"; 62bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 63bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdezopen STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!"; 64bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 65bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdezmy ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block 66bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 67bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdezmy ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3)); 68bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdezmy ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12)); 69bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdezmy ($Xl1,$Xm1,$Xh1,$IN1,$H2,$H2h,$H2l)=map("v$_",(13..19)); 70bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdezmy $vrsave="r12"; 71bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 72bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez$code=<<___; 73bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez.machine "any" 74bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 75bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez.text 76bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 77bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez.globl .gcm_init_p8 78bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez.align 5 79bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez.gcm_init_p8: 80bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r0,-4096 81bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r8,0x10 82bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez mfspr $vrsave,256 83bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r9,0x20 84bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez mtspr 256,r0 85bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r10,0x30 86bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx_u $H,0,r4 # load H 87bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 88bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vspltisb $xC2,-16 # 0xf0 89bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vspltisb $t0,1 # one 90bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vaddubm $xC2,$xC2,$xC2 # 0xe0 91bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $zero,$zero,$zero 92bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vor $xC2,$xC2,$t0 # 0xe1 93bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $xC2,$xC2,$zero,15 # 0xe1... 94bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $t1,$zero,$t0,1 # ...1 95bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vaddubm $xC2,$xC2,$xC2 # 0xc2... 96bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vspltisb $t2,7 97bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vor $xC2,$xC2,$t1 # 0xc2....01 98bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vspltb $t1,$H,0 # most significant byte 99bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsl $H,$H,$t0 # H<<=1 100bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsrab $t1,$t1,$t2 # broadcast carry bit 101bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vand $t1,$t1,$xC2 102bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $IN,$H,$t1 # twisted H 103bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 104bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $H,$IN,$IN,8 # twist even more ... 105bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $xC2,$zero,$xC2,8 # 0xc2.0 106bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $Hl,$zero,$H,8 # ... and split 107bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $Hh,$H,$zero,8 108bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 109bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez stvx_u $xC2,0,r3 # save pre-computed table 110bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez stvx_u $Hl,r8,r3 111bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r8,0x40 112bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez stvx_u $H, r9,r3 113bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r9,0x50 114bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez stvx_u $Hh,r10,r3 115bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r10,0x60 116bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 117bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xl,$IN,$Hl # H.lo·H.lo 118bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xm,$IN,$H # H.hi·H.lo+H.lo·H.hi 119bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xh,$IN,$Hh # H.hi·H.hi 120bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 121bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $t2,$Xl,$xC2 # 1st reduction phase 122bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 123bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $t0,$Xm,$zero,8 124bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $t1,$zero,$Xm,8 125bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xl,$Xl,$t0 126bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xh,$Xh,$t1 127bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 128bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $Xl,$Xl,$Xl,8 129bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xl,$Xl,$t2 130bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 131bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase 132bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xl,$Xl,$xC2 133bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $t1,$t1,$Xh 134bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $IN1,$Xl,$t1 135bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 136bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $H2,$IN1,$IN1,8 137bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $H2l,$zero,$H2,8 138bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $H2h,$H2,$zero,8 139bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 140bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez stvx_u $H2l,r8,r3 # save H^2 141bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r8,0x70 142bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez stvx_u $H2,r9,r3 143bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r9,0x80 144bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez stvx_u $H2h,r10,r3 145bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r10,0x90 146bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez___ 147bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez{ 148bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdezmy ($t4,$t5,$t6) = ($Hl,$H,$Hh); 149bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez$code.=<<___; 150bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xl,$IN,$H2l # H.lo·H^2.lo 151bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xl1,$IN1,$H2l # H^2.lo·H^2.lo 152bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xm,$IN,$H2 # H.hi·H^2.lo+H.lo·H^2.hi 153bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xm1,$IN1,$H2 # H^2.hi·H^2.lo+H^2.lo·H^2.hi 154bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xh,$IN,$H2h # H.hi·H^2.hi 155bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xh1,$IN1,$H2h # H^2.hi·H^2.hi 156bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 157bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $t2,$Xl,$xC2 # 1st reduction phase 158bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $t6,$Xl1,$xC2 # 1st reduction phase 159bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 160bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $t0,$Xm,$zero,8 161bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $t1,$zero,$Xm,8 162bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $t4,$Xm1,$zero,8 163bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $t5,$zero,$Xm1,8 164bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xl,$Xl,$t0 165bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xh,$Xh,$t1 166bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xl1,$Xl1,$t4 167bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xh1,$Xh1,$t5 168bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 169bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $Xl,$Xl,$Xl,8 170bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $Xl1,$Xl1,$Xl1,8 171bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xl,$Xl,$t2 172bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xl1,$Xl1,$t6 173bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 174bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase 175bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $t5,$Xl1,$Xl1,8 # 2nd reduction phase 176bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xl,$Xl,$xC2 177bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xl1,$Xl1,$xC2 178bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $t1,$t1,$Xh 179bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $t5,$t5,$Xh1 180bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xl,$Xl,$t1 181bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xl1,$Xl1,$t5 182bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 183bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $H,$Xl,$Xl,8 184bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $H2,$Xl1,$Xl1,8 185bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $Hl,$zero,$H,8 186bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $Hh,$H,$zero,8 187bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $H2l,$zero,$H2,8 188bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $H2h,$H2,$zero,8 189bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 190bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez stvx_u $Hl,r8,r3 # save H^3 191bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r8,0xa0 192bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez stvx_u $H,r9,r3 193bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r9,0xb0 194bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez stvx_u $Hh,r10,r3 195bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r10,0xc0 196bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez stvx_u $H2l,r8,r3 # save H^4 197bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez stvx_u $H2,r9,r3 198bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez stvx_u $H2h,r10,r3 199bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 200bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez mtspr 256,$vrsave 201bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez blr 202bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez .long 0 203bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez .byte 0,12,0x14,0,0,0,2,0 204bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez .long 0 205bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez.size .gcm_init_p8,.-.gcm_init_p8 206bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez___ 207bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez} 208bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez$code.=<<___; 209bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez.globl .gcm_gmult_p8 210bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez.align 5 211bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez.gcm_gmult_p8: 212bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lis r0,0xfff8 213bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r8,0x10 214bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez mfspr $vrsave,256 215bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r9,0x20 216bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez mtspr 256,r0 217bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r10,0x30 218bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx_u $IN,0,$Xip # load Xi 219bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 220bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx_u $Hl,r8,$Htbl # load pre-computed table 221bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez le?lvsl $lemask,r0,r0 222bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx_u $H, r9,$Htbl 223bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez le?vspltisb $t0,0x07 224bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx_u $Hh,r10,$Htbl 225bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez le?vxor $lemask,$lemask,$t0 226bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx_u $xC2,0,$Htbl 227bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez le?vperm $IN,$IN,$IN,$lemask 228bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $zero,$zero,$zero 229bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 230bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo 231bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi 232bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi 233bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 234bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $t2,$Xl,$xC2 # 1st reduction phase 235bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 236bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $t0,$Xm,$zero,8 237bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $t1,$zero,$Xm,8 238bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xl,$Xl,$t0 239bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xh,$Xh,$t1 240bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 241bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $Xl,$Xl,$Xl,8 242bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xl,$Xl,$t2 243bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 244bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase 245bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xl,$Xl,$xC2 246bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $t1,$t1,$Xh 247bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xl,$Xl,$t1 248bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 249bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez le?vperm $Xl,$Xl,$Xl,$lemask 250bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez stvx_u $Xl,0,$Xip # write out Xi 251bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 252bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez mtspr 256,$vrsave 253bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez blr 254bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez .long 0 255bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez .byte 0,12,0x14,0,0,0,2,0 256bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez .long 0 257bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez.size .gcm_gmult_p8,.-.gcm_gmult_p8 258bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 259bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez.globl .gcm_ghash_p8 260bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez.align 5 261bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez.gcm_ghash_p8: 262bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r0,-4096 263bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r8,0x10 264bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez mfspr $vrsave,256 265bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r9,0x20 266bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez mtspr 256,r0 267bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r10,0x30 268bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx_u $Xl,0,$Xip # load Xi 269bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 270bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx_u $Hl,r8,$Htbl # load pre-computed table 271bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r8,0x40 272bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez le?lvsl $lemask,r0,r0 273bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx_u $H, r9,$Htbl 274bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r9,0x50 275bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez le?vspltisb $t0,0x07 276bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx_u $Hh,r10,$Htbl 277bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r10,0x60 278bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez le?vxor $lemask,$lemask,$t0 279bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx_u $xC2,0,$Htbl 280bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez le?vperm $Xl,$Xl,$Xl,$lemask 281bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $zero,$zero,$zero 282bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 283bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez ${UCMP}i $len,64 284bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez bge Lgcm_ghash_p8_4x 285bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 286bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx_u $IN,0,$inp 287bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez addi $inp,$inp,16 288bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez subic. $len,$len,16 289bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez le?vperm $IN,$IN,$IN,$lemask 290bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $IN,$IN,$Xl 291bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez beq Lshort 292bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 293bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx_u $H2l,r8,$Htbl # load H^2 294bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r8,16 295bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx_u $H2, r9,$Htbl 296bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez add r9,$inp,$len # end of input 297bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx_u $H2h,r10,$Htbl 298bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez be?b Loop_2x 299bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 300bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez.align 5 301bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven ValdezLoop_2x: 302bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx_u $IN1,0,$inp 303bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez le?vperm $IN1,$IN1,$IN1,$lemask 304bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 305bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez subic $len,$len,32 306bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xl,$IN,$H2l # H^2.lo·Xi.lo 307bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xl1,$IN1,$Hl # H.lo·Xi+1.lo 308bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez subfe r0,r0,r0 # borrow?-1:0 309bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xm,$IN,$H2 # H^2.hi·Xi.lo+H^2.lo·Xi.hi 310bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xm1,$IN1,$H # H.hi·Xi+1.lo+H.lo·Xi+1.hi 311bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez and r0,r0,$len 312bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xh,$IN,$H2h # H^2.hi·Xi.hi 313bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xh1,$IN1,$Hh # H.hi·Xi+1.hi 314bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez add $inp,$inp,r0 315bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 316bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xl,$Xl,$Xl1 317bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xm,$Xm,$Xm1 318bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 319bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $t2,$Xl,$xC2 # 1st reduction phase 320bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 321bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $t0,$Xm,$zero,8 322bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $t1,$zero,$Xm,8 323bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xh,$Xh,$Xh1 324bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xl,$Xl,$t0 325bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xh,$Xh,$t1 326bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 327bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $Xl,$Xl,$Xl,8 328bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xl,$Xl,$t2 329bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx_u $IN,r8,$inp 330bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez addi $inp,$inp,32 331bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 332bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase 333bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xl,$Xl,$xC2 334bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez le?vperm $IN,$IN,$IN,$lemask 335bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $t1,$t1,$Xh 336bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $IN,$IN,$t1 337bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $IN,$IN,$Xl 338bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez $UCMP r9,$inp 339bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez bgt Loop_2x # done yet? 340bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 341bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez cmplwi $len,0 342bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez bne Leven 343bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 344bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven ValdezLshort: 345bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo 346bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi 347bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi 348bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 349bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $t2,$Xl,$xC2 # 1st reduction phase 350bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 351bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $t0,$Xm,$zero,8 352bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $t1,$zero,$Xm,8 353bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xl,$Xl,$t0 354bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xh,$Xh,$t1 355bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 356bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $Xl,$Xl,$Xl,8 357bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xl,$Xl,$t2 358bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 359bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase 360bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xl,$Xl,$xC2 361bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $t1,$t1,$Xh 362bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 363bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven ValdezLeven: 364bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xl,$Xl,$t1 365bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez le?vperm $Xl,$Xl,$Xl,$lemask 366bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez stvx_u $Xl,0,$Xip # write out Xi 367bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 368bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez mtspr 256,$vrsave 369bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez blr 370bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez .long 0 371bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez .byte 0,12,0x14,0,0,0,4,0 372bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez .long 0 373bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez___ 374bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez{ 375bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdezmy ($Xl3,$Xm2,$IN2,$H3l,$H3,$H3h, 376bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez $Xh3,$Xm3,$IN3,$H4l,$H4,$H4h) = map("v$_",(20..31)); 377bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdezmy $IN0=$IN; 378bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdezmy ($H21l,$H21h,$loperm,$hiperm) = ($Hl,$Hh,$H2l,$H2h); 379bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 380bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez$code.=<<___; 381bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez.align 5 382bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez.gcm_ghash_p8_4x: 383bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven ValdezLgcm_ghash_p8_4x: 384bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez $STU $sp,-$FRAME($sp) 385bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r10,`15+6*$SIZE_T` 386bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r11,`31+6*$SIZE_T` 387bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez stvx v20,r10,$sp 388bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez addi r10,r10,32 389bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez stvx v21,r11,$sp 390bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez addi r11,r11,32 391bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez stvx v22,r10,$sp 392bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez addi r10,r10,32 393bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez stvx v23,r11,$sp 394bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez addi r11,r11,32 395bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez stvx v24,r10,$sp 396bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez addi r10,r10,32 397bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez stvx v25,r11,$sp 398bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez addi r11,r11,32 399bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez stvx v26,r10,$sp 400bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez addi r10,r10,32 401bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez stvx v27,r11,$sp 402bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez addi r11,r11,32 403bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez stvx v28,r10,$sp 404bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez addi r10,r10,32 405bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez stvx v29,r11,$sp 406bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez addi r11,r11,32 407bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez stvx v30,r10,$sp 408bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r10,0x60 409bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez stvx v31,r11,$sp 410bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r0,-1 411bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez stw $vrsave,`$FRAME-4`($sp) # save vrsave 412bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez mtspr 256,r0 # preserve all AltiVec registers 413bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 414bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvsl $t0,0,r8 # 0x0001..0e0f 415bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez #lvx_u $H2l,r8,$Htbl # load H^2 416bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r8,0x70 417bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx_u $H2, r9,$Htbl 418bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r9,0x80 419bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vspltisb $t1,8 # 0x0808..0808 420bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez #lvx_u $H2h,r10,$Htbl 421bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r10,0x90 422bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx_u $H3l,r8,$Htbl # load H^3 423bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r8,0xa0 424bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx_u $H3, r9,$Htbl 425bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r9,0xb0 426bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx_u $H3h,r10,$Htbl 427bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r10,0xc0 428bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx_u $H4l,r8,$Htbl # load H^4 429bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r8,0x10 430bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx_u $H4, r9,$Htbl 431bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r9,0x20 432bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx_u $H4h,r10,$Htbl 433bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r10,0x30 434bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 435bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $t2,$zero,$t1,8 # 0x0000..0808 436bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vaddubm $hiperm,$t0,$t2 # 0x0001..1617 437bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vaddubm $loperm,$t1,$hiperm # 0x0809..1e1f 438bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 439bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez $SHRI $len,$len,4 # this allows to use sign bit 440bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez # as carry 441bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx_u $IN0,0,$inp # load input 442bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx_u $IN1,r8,$inp 443bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez subic. $len,$len,8 444bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx_u $IN2,r9,$inp 445bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx_u $IN3,r10,$inp 446bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez addi $inp,$inp,0x40 447bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez le?vperm $IN0,$IN0,$IN0,$lemask 448bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez le?vperm $IN1,$IN1,$IN1,$lemask 449bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez le?vperm $IN2,$IN2,$IN2,$lemask 450bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez le?vperm $IN3,$IN3,$IN3,$lemask 451bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 452bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xh,$IN0,$Xl 453bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 454bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xl1,$IN1,$H3l 455bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xm1,$IN1,$H3 456bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xh1,$IN1,$H3h 457bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 458bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vperm $H21l,$H2,$H,$hiperm 459bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vperm $t0,$IN2,$IN3,$loperm 460bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vperm $H21h,$H2,$H,$loperm 461bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vperm $t1,$IN2,$IN3,$hiperm 462bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xm2,$IN2,$H2 # H^2.lo·Xi+2.hi+H^2.hi·Xi+2.lo 463bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xl3,$t0,$H21l # H^2.lo·Xi+2.lo+H.lo·Xi+3.lo 464bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xm3,$IN3,$H # H.hi·Xi+3.lo +H.lo·Xi+3.hi 465bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xh3,$t1,$H21h # H^2.hi·Xi+2.hi+H.hi·Xi+3.hi 466bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 467bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xm2,$Xm2,$Xm1 468bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xl3,$Xl3,$Xl1 469bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xm3,$Xm3,$Xm2 470bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xh3,$Xh3,$Xh1 471bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 472bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez blt Ltail_4x 473bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 474bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven ValdezLoop_4x: 475bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx_u $IN0,0,$inp 476bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx_u $IN1,r8,$inp 477bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez subic. $len,$len,4 478bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx_u $IN2,r9,$inp 479bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx_u $IN3,r10,$inp 480bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez addi $inp,$inp,0x40 481bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez le?vperm $IN1,$IN1,$IN1,$lemask 482bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez le?vperm $IN2,$IN2,$IN2,$lemask 483bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez le?vperm $IN3,$IN3,$IN3,$lemask 484bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez le?vperm $IN0,$IN0,$IN0,$lemask 485bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 486bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xl,$Xh,$H4l # H^4.lo·Xi.lo 487bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xm,$Xh,$H4 # H^4.hi·Xi.lo+H^4.lo·Xi.hi 488bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xh,$Xh,$H4h # H^4.hi·Xi.hi 489bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xl1,$IN1,$H3l 490bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xm1,$IN1,$H3 491bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xh1,$IN1,$H3h 492bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 493bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xl,$Xl,$Xl3 494bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xm,$Xm,$Xm3 495bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xh,$Xh,$Xh3 496bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vperm $t0,$IN2,$IN3,$loperm 497bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vperm $t1,$IN2,$IN3,$hiperm 498bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 499bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $t2,$Xl,$xC2 # 1st reduction phase 500bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xl3,$t0,$H21l # H.lo·Xi+3.lo +H^2.lo·Xi+2.lo 501bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xh3,$t1,$H21h # H.hi·Xi+3.hi +H^2.hi·Xi+2.hi 502bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 503bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $t0,$Xm,$zero,8 504bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $t1,$zero,$Xm,8 505bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xl,$Xl,$t0 506bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xh,$Xh,$t1 507bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 508bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $Xl,$Xl,$Xl,8 509bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xl,$Xl,$t2 510bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 511bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase 512bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xm2,$IN2,$H2 # H^2.hi·Xi+2.lo+H^2.lo·Xi+2.hi 513bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xm3,$IN3,$H # H.hi·Xi+3.lo +H.lo·Xi+3.hi 514bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xl,$Xl,$xC2 515bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 516bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xl3,$Xl3,$Xl1 517bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xh3,$Xh3,$Xh1 518bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xh,$Xh,$IN0 519bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xm2,$Xm2,$Xm1 520bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xh,$Xh,$t1 521bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xm3,$Xm3,$Xm2 522bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xh,$Xh,$Xl 523bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez bge Loop_4x 524bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 525bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven ValdezLtail_4x: 526bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xl,$Xh,$H4l # H^4.lo·Xi.lo 527bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xm,$Xh,$H4 # H^4.hi·Xi.lo+H^4.lo·Xi.hi 528bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xh,$Xh,$H4h # H^4.hi·Xi.hi 529bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 530bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xl,$Xl,$Xl3 531bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xm,$Xm,$Xm3 532bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 533bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $t2,$Xl,$xC2 # 1st reduction phase 534bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 535bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $t0,$Xm,$zero,8 536bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $t1,$zero,$Xm,8 537bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xh,$Xh,$Xh3 538bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xl,$Xl,$t0 539bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xh,$Xh,$t1 540bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 541bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $Xl,$Xl,$Xl,8 542bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xl,$Xl,$t2 543bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 544bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase 545bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xl,$Xl,$xC2 546bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $t1,$t1,$Xh 547bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xl,$Xl,$t1 548bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 549bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez addic. $len,$len,4 550bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez beq Ldone_4x 551bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 552bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx_u $IN0,0,$inp 553bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez ${UCMP}i $len,2 554bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li $len,-4 555bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez blt Lone 556bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx_u $IN1,r8,$inp 557bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez beq Ltwo 558bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 559bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven ValdezLthree: 560bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx_u $IN2,r9,$inp 561bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez le?vperm $IN0,$IN0,$IN0,$lemask 562bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez le?vperm $IN1,$IN1,$IN1,$lemask 563bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez le?vperm $IN2,$IN2,$IN2,$lemask 564bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 565bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xh,$IN0,$Xl 566bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vmr $H4l,$H3l 567bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vmr $H4, $H3 568bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vmr $H4h,$H3h 569bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 570bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vperm $t0,$IN1,$IN2,$loperm 571bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vperm $t1,$IN1,$IN2,$hiperm 572bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xm2,$IN1,$H2 # H^2.lo·Xi+1.hi+H^2.hi·Xi+1.lo 573bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xm3,$IN2,$H # H.hi·Xi+2.lo +H.lo·Xi+2.hi 574bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xl3,$t0,$H21l # H^2.lo·Xi+1.lo+H.lo·Xi+2.lo 575bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xh3,$t1,$H21h # H^2.hi·Xi+1.hi+H.hi·Xi+2.hi 576bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 577bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xm3,$Xm3,$Xm2 578bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez b Ltail_4x 579bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 580bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez.align 4 581bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven ValdezLtwo: 582bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez le?vperm $IN0,$IN0,$IN0,$lemask 583bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez le?vperm $IN1,$IN1,$IN1,$lemask 584bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 585bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xh,$IN0,$Xl 586bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vperm $t0,$zero,$IN1,$loperm 587bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vperm $t1,$zero,$IN1,$hiperm 588bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 589bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $H4l,$zero,$H2,8 590bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vmr $H4, $H2 591bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $H4h,$H2,$zero,8 592bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 593bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xl3,$t0, $H21l # H.lo·Xi+1.lo 594bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xm3,$IN1,$H # H.hi·Xi+1.lo+H.lo·Xi+2.hi 595bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vpmsumd $Xh3,$t1, $H21h # H.hi·Xi+1.hi 596bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 597bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez b Ltail_4x 598bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 599bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez.align 4 600bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven ValdezLone: 601bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez le?vperm $IN0,$IN0,$IN0,$lemask 602bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 603bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $H4l,$zero,$H,8 604bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vmr $H4, $H 605bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vsldoi $H4h,$H,$zero,8 606bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 607bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xh,$IN0,$Xl 608bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xl3,$Xl3,$Xl3 609bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xm3,$Xm3,$Xm3 610bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez vxor $Xh3,$Xh3,$Xh3 611bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 612bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez b Ltail_4x 613bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 614bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven ValdezLdone_4x: 615bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez le?vperm $Xl,$Xl,$Xl,$lemask 616bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez stvx_u $Xl,0,$Xip # write out Xi 617bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 618bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r10,`15+6*$SIZE_T` 619bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez li r11,`31+6*$SIZE_T` 620bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez mtspr 256,$vrsave 621bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx v20,r10,$sp 622bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez addi r10,r10,32 623bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx v21,r11,$sp 624bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez addi r11,r11,32 625bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx v22,r10,$sp 626bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez addi r10,r10,32 627bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx v23,r11,$sp 628bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez addi r11,r11,32 629bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx v24,r10,$sp 630bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez addi r10,r10,32 631bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx v25,r11,$sp 632bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez addi r11,r11,32 633bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx v26,r10,$sp 634bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez addi r10,r10,32 635bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx v27,r11,$sp 636bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez addi r11,r11,32 637bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx v28,r10,$sp 638bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez addi r10,r10,32 639bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx v29,r11,$sp 640bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez addi r11,r11,32 641bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx v30,r10,$sp 642bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez lvx v31,r11,$sp 643bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez addi $sp,$sp,$FRAME 644bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez blr 645bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez .long 0 646bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez .byte 0,12,0x04,0,0x80,0,4,0 647bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez .long 0 648bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez___ 649bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez} 650bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez$code.=<<___; 651bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez.size .gcm_ghash_p8,.-.gcm_ghash_p8 652bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 653bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez.asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>" 654bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez.align 2 655bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez___ 656bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 657bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdezforeach (split("\n",$code)) { 658bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez s/\`([^\`]*)\`/eval $1/geo; 659bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 660bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez if ($flavour =~ /le$/o) { # little-endian 661bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez s/le\?//o or 662bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez s/be\?/#be#/o; 663bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez } else { 664bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez s/le\?/#le#/o or 665bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez s/be\?//o; 666bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez } 667bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez print $_,"\n"; 668bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez} 669bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdez 670bb1ceac29bc7a18b94e3da78057dc41aa7071784Steven Valdezclose STDOUT; # enforce flush 671