1221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#!/usr/bin/env perl
2221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
3221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# ====================================================================
4221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# project. The module is, however, dual licensed under OpenSSL and
6221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# CRYPTOGAMS licenses depending on where you obtain it. For further
7221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# details see http://www.openssl.org/~appro/cryptogams/.
8221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# ====================================================================
9221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
10221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# I let hardware handle unaligned input, except on page boundaries
11221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# (see below for details). Otherwise straightforward implementation
12221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# with X vector in register bank. The module is big-endian [which is
13221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# not big deal as there're no little-endian targets left around].
14221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
15221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#			sha256		|	sha512
16221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# 			-m64	-m32	|	-m64	-m32
17221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# --------------------------------------+-----------------------
18221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# PPC970,gcc-4.0.0	+50%	+38%	|	+40%	+410%(*)
19221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# Power6,xlc-7		+150%	+90%	|	+100%	+430%(*)
20221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#
21221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# (*)	64-bit code in 32-bit application context, which actually is
22221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#	on TODO list. It should be noted that for safe deployment in
23221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#	32-bit *mutli-threaded* context asyncronous signals should be
24221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#	blocked upon entry to SHA512 block routine. This is because
25221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#	32-bit signaling procedure invalidates upper halves of GPRs.
26221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#	Context switch procedure preserves them, but not signaling:-(
27221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
28221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# Second version is true multi-thread safe. Trouble with the original
29221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# version was that it was using thread local storage pointer register.
30221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# Well, it scrupulously preserved it, but the problem would arise the
31221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# moment asynchronous signal was delivered and signal handler would
32221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# dereference the TLS pointer. While it's never the case in openssl
33221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# application or test suite, we have to respect this scenario and not
34221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# use TLS pointer register. Alternative would be to require caller to
35221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# block signals prior calling this routine. For the record, in 32-bit
36221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# context R2 serves as TLS pointer, while in 64-bit context - R13.
37221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
38221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$flavour=shift;
39221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$output =shift;
40221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
41221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromif ($flavour =~ /64/) {
42221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$SIZE_T=8;
43392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$LRSAVE=2*$SIZE_T;
44221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$STU="stdu";
45221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$UCMP="cmpld";
46221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$SHL="sldi";
47221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$POP="ld";
48221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$PUSH="std";
49221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} elsif ($flavour =~ /32/) {
50221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$SIZE_T=4;
51392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$LRSAVE=$SIZE_T;
52221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$STU="stwu";
53221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$UCMP="cmplw";
54221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$SHL="slwi";
55221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$POP="lwz";
56221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$PUSH="stw";
57221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} else { die "nonsense $flavour"; }
58221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
59221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
60221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
61221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
62221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromdie "can't locate ppc-xlate.pl";
63221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
64221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromopen STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
65221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
66221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromif ($output =~ /512/) {
67221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$func="sha512_block_data_order";
68221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$SZ=8;
69221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	@Sigma0=(28,34,39);
70221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	@Sigma1=(14,18,41);
71221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	@sigma0=(1,  8, 7);
72221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	@sigma1=(19,61, 6);
73221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$rounds=80;
74221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$LD="ld";
75221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$ST="std";
76221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$ROR="rotrdi";
77221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$SHR="srdi";
78221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} else {
79221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$func="sha256_block_data_order";
80221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$SZ=4;
81221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	@Sigma0=( 2,13,22);
82221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	@Sigma1=( 6,11,25);
83221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	@sigma0=( 7,18, 3);
84221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	@sigma1=(17,19,10);
85221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$rounds=64;
86221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$LD="lwz";
87221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$ST="stw";
88221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$ROR="rotrwi";
89221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$SHR="srwi";
90221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}
91221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
92392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$FRAME=32*$SIZE_T+16*$SZ;
93392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$LOCALS=6*$SIZE_T;
94221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
95221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$sp ="r1";
96221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$toc="r2";
97221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$ctx="r3";	# zapped by $a0
98221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$inp="r4";	# zapped by $a1
99221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$num="r5";	# zapped by $t0
100221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
101221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$T  ="r0";
102221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$a0 ="r3";
103221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$a1 ="r4";
104221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$t0 ="r5";
105221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$t1 ="r6";
106221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$Tbl="r7";
107221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
108221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$A  ="r8";
109221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$B  ="r9";
110221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$C  ="r10";
111221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$D  ="r11";
112221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$E  ="r12";
113221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$F  ="r13";	$F="r2" if ($SIZE_T==8);# reassigned to exempt TLS pointer
114221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$G  ="r14";
115221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$H  ="r15";
116221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
117221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom@V=($A,$B,$C,$D,$E,$F,$G,$H);
118221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom@X=("r16","r17","r18","r19","r20","r21","r22","r23",
119221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    "r24","r25","r26","r27","r28","r29","r30","r31");
120221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
121221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$inp="r31";	# reassigned $inp! aliases with @X[15]
122221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
123221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromsub ROUND_00_15 {
124221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrommy ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
125221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$code.=<<___;
126221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$LD	$T,`$i*$SZ`($Tbl)
127221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$ROR	$a0,$e,$Sigma1[0]
128221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$ROR	$a1,$e,$Sigma1[1]
129221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$t0,$f,$e
130221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	andc	$t1,$g,$e
131221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$T,$T,$h
132221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	xor	$a0,$a0,$a1
133