1221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#!/usr/bin/env perl
2221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
3221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# ====================================================================
4221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# project. The module is, however, dual licensed under OpenSSL and
6221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# CRYPTOGAMS licenses depending on where you obtain it. For further
7221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# details see http://www.openssl.org/~appro/cryptogams/.
8221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# ====================================================================
9221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
10221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# I let hardware handle unaligned input(*), except on page boundaries
11221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# (see below for details). Otherwise straightforward implementation
12221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# with X vector in register bank. The module is big-endian [which is
13221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# not big deal as there're no little-endian targets left around].
14221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#
15221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# (*) this means that this module is inappropriate for PPC403? Does
16221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#     anybody know if pre-POWER3 can sustain unaligned load?
17221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
18221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# 			-m64	-m32
19221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# ----------------------------------
20221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# PPC970,gcc-4.0.0	+76%	+59%
21221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# Power6,xlc-7		+68%	+33%
22221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
23221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$flavour = shift;
24221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
25221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromif ($flavour =~ /64/) {
26221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$SIZE_T	=8;
27392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$LRSAVE	=2*$SIZE_T;
28221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$UCMP	="cmpld";
29221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$STU	="stdu";
30221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$POP	="ld";
31221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$PUSH	="std";
32221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} elsif ($flavour =~ /32/) {
33221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$SIZE_T	=4;
34392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$LRSAVE	=$SIZE_T;
35221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$UCMP	="cmplw";
36221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$STU	="stwu";
37221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$POP	="lwz";
38221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	$PUSH	="stw";
39221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom} else { die "nonsense $flavour"; }
40221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
41221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
42221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
43221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
44221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromdie "can't locate ppc-xlate.pl";
45221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
46221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromopen STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
47221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
48392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$FRAME=24*$SIZE_T+64;
49392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$LOCALS=6*$SIZE_T;
50221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
51221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$K  ="r0";
52221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$sp ="r1";
53221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$toc="r2";
54221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$ctx="r3";
55221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$inp="r4";
56221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$num="r5";
57221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$t0 ="r15";
58221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$t1 ="r6";
59221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
60221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$A  ="r7";
61221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$B  ="r8";
62221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$C  ="r9";
63221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$D  ="r10";
64221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$E  ="r11";
65221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$T  ="r12";
66221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
67221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom@V=($A,$B,$C,$D,$E,$T);
68221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom@X=("r16","r17","r18","r19","r20","r21","r22","r23",
69221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    "r24","r25","r26","r27","r28","r29","r30","r31");
70221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
71221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromsub BODY_00_19 {
72221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrommy ($i,$a,$b,$c,$d,$e,$f)=@_;
73221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrommy $j=$i+1;
74221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$code.=<<___ if ($i==0);
75221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	lwz	@X[$i],`$i*4`($inp)
76221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom___
77221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$code.=<<___ if ($i<15);
78221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	lwz	@X[$j],`$j*4`($inp)
79221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$f,$K,$e
80221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	rotlwi	$e,$a,5
81221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$f,$f,@X[$i]
82221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$t0,$c,$b
83221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$f,$f,$e
84221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	andc	$t1,$d,$b
85221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	rotlwi	$b,$b,30
86221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	or	$t0,$t0,$t1
87221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$f,$f,$t0
88221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom___
89221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$code.=<<___ if ($i>=15);
90221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$f,$K,$e
91221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	rotlwi	$e,$a,5
92221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	xor	@X[$j%16],@X[$j%16],@X[($j+2)%16]
93221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$f,$f,@X[$i%16]
94221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	and	$t0,$c,$b
95221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	xor	@X[$j%16],@X[$j%16],@X[($j+8)%16]
96221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$f,$f,$e
97221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	andc	$t1,$d,$b
98221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	rotlwi	$b,$b,30
99221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	or	$t0,$t0,$t1
100221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	xor	@X[$j%16],@X[$j%16],@X[($j+13)%16]
101221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$f,$f,$t0
102221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	rotlwi	@X[$j%16],@X[$j%16],1
103221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom___
104221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}
105221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
106221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromsub BODY_20_39 {
107221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrommy ($i,$a,$b,$c,$d,$e,$f)=@_;
108221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrommy $j=$i+1;
109221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$code.=<<___ if ($i<79);
110221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$f,$K,$e
111221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	rotlwi	$e,$a,5
112221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	xor	@X[$j%16],@X[$j%16],@X[($j+2)%16]
113221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$f,$f,@X[$i%16]
114221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	xor	$t0,$b,$c
115221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	xor	@X[$j%16],@X[$j%16],@X[($j+8)%16]
116221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$f,$f,$e
117221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	rotlwi	$b,$b,30
118221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	xor	$t0,$t0,$d
119221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	xor	@X[$j%16],@X[$j%16],@X[($j+13)%16]
120221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$f,$f,$t0
121221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	rotlwi	@X[$j%16],@X[$j%16],1
122221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom___
123221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom$code.=<<___ if ($i==79);
124221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$f,$K,$e
125221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	rotlwi	$e,$a,5
126221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	lwz	r16,0($ctx)
127221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$f,$f,@X[$i%16]
128221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	xor	$t0,$b,$c
129221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	lwz	r17,4($ctx)
130221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	add	$f,$f,$e
131221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	rotlwi	$b,$b,30
132221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	lwz	r18,8($ctx)
133