1c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org#!/usr/bin/env perl
2c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org#
3c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org# ====================================================================
4c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org# project. The module is, however, dual licensed under OpenSSL and
6c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org# CRYPTOGAMS licenses depending on where you obtain it. For further
7c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org# details see http://www.openssl.org/~appro/cryptogams/.
8c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org# ====================================================================
9c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org#
10c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org# Eternal question is what's wrong with compiler generated code? The
11c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org# trick is that it's possible to reduce the number of shifts required
12c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org# to perform rotations by maintaining copy of 32-bit value in upper
13c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org# bits of 64-bit register. Just follow mux2 and shrp instructions...
14c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org# Performance under big-endian OS such as HP-UX is 179MBps*1GHz, which
15c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org# is >50% better than HP C and >2x better than gcc.
16c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
17c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org$code=<<___;
182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org.ident  \"sha1-ia64.s, version 1.3\"
19c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org.ident  \"IA-64 ISA artwork by Andy Polyakov <appro\@fy.chalmers.se>\"
20c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org.explicit
21c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
22c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org___
23c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
24c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
25c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.orgif ($^O eq "hpux") {
26c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org    $ADDP="addp4";
27c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org    for (@ARGV) { $ADDP="add" if (/[\+DD|\-mlp]64/); }
28c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org} else { $ADDP="add"; }
29c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
30c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org#$human=1;
31c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.orgif ($human) {	# useful for visual code auditing...
322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	($A,$B,$C,$D,$E)   = ("A","B","C","D","E");
33c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	($h0,$h1,$h2,$h3,$h4) = ("h0","h1","h2","h3","h4");
34c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	($K_00_19, $K_20_39, $K_40_59, $K_60_79) =
35c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	    (	"K_00_19","K_20_39","K_40_59","K_60_79"	);
36c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	@X= (	"X0", "X1", "X2", "X3", "X4", "X5", "X6", "X7",
37c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org		"X8", "X9","X10","X11","X12","X13","X14","X15"	);
38c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org}
39c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.orgelse {
402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	($A,$B,$C,$D,$E)   =    ("loc0","loc1","loc2","loc3","loc4");
412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	($h0,$h1,$h2,$h3,$h4) = ("loc5","loc6","loc7","loc8","loc9");
42c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	($K_00_19, $K_20_39, $K_40_59, $K_60_79) =
432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	    (	"r14", "r15", "loc10", "loc11"	);
44c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	@X= (	"r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
45c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org		"r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"	);
46c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org}
47c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
48c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.orgsub BODY_00_15 {
49c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.orglocal	*code=shift;
502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgmy	($i,$a,$b,$c,$d,$e)=@_;
512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgmy	$j=$i+1;
522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgmy	$Xn=@X[$j%16];
53c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
54c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org$code.=<<___ if ($i==0);
552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mmi;	ld1	$X[$i]=[inp],2		    // MSB
56c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	ld1	tmp2=[tmp3],2		};;
57c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org{ .mmi;	ld1	tmp0=[inp],2
58c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	ld1	tmp4=[tmp3],2		    // LSB
592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	dep	$X[$i]=$X[$i],tmp2,8,8	};;
60c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org___
61c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.orgif ($i<15) {
62c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	$code.=<<___;
632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mmi;	ld1	$Xn=[inp],2		    // forward Xload
642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	nop.m	0x0
65c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	dep	tmp1=tmp0,tmp4,8,8	};;
662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mmi;	ld1	tmp2=[tmp3],2		    // forward Xload
67c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	and	tmp4=$c,$b
682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	dep	$X[$i]=$X[$i],tmp1,16,16} //;;
692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mmi;	add	$e=$e,$K_00_19		    // e+=K_00_19
702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	andcm	tmp1=$d,$b
71c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	dep.z	tmp5=$a,5,27		};; // a<<5
722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mmi;	add	$e=$e,$X[$i]		    // e+=Xload
732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	or	tmp4=tmp4,tmp1		    // F_00_19(b,c,d)=(b&c)|(~b&d)
74c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	extr.u	tmp1=$a,27,5		};; // a>>27
752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mmi;	ld1	tmp0=[inp],2		    // forward Xload
762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	add	$e=$e,tmp4		    // e+=F_00_19(b,c,d)
77c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	shrp	$b=tmp6,tmp6,2		}   // b=ROTATE(b,30)
782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mmi;	ld1	tmp4=[tmp3],2		    // forward Xload
79c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	or	tmp5=tmp1,tmp5		    // ROTATE(a,5)
80c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	mux2	tmp6=$a,0x44		};; // see b in next iteration
812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mii;	add	$e=$e,tmp5		    // e+=ROTATE(a,5)
822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	dep	$Xn=$Xn,tmp2,8,8	    // forward Xload
832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	mux2	$X[$i]=$X[$i],0x44	} //;;
84c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
85c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org___
86c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	}
87c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.orgelse	{
88c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	$code.=<<___;
89c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org{ .mii;	and	tmp3=$c,$b
90c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	dep	tmp1=tmp0,tmp4,8,8;;
912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	dep	$X[$i]=$X[$i],tmp1,16,16} //;;
922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mmi;	add	$e=$e,$K_00_19		    // e+=K_00_19
932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	andcm	tmp1=$d,$b
94c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	dep.z	tmp5=$a,5,27		};; // a<<5
952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mmi;	add	$e=$e,$X[$i]		    // e+=Xupdate
962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	or	tmp4=tmp3,tmp1		    // F_00_19(b,c,d)=(b&c)|(~b&d)
97c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	extr.u	tmp1=$a,27,5		}   // a>>27
982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mmi;	xor	$Xn=$Xn,$X[($j+2)%16]	    // forward Xupdate
992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	xor	tmp3=$X[($j+8)%16],$X[($j+13)%16] // forward Xupdate
100c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	nop.i	0			};;
1012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mmi;	add	$e=$e,tmp4		    // e+=F_00_19(b,c,d)
1022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	xor	$Xn=$Xn,tmp3		    // forward Xupdate
103c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	shrp	$b=tmp6,tmp6,2		}   // b=ROTATE(b,30)
104c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org{ .mmi; or	tmp1=tmp1,tmp5		    // ROTATE(a,5)
105c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	mux2	tmp6=$a,0x44		};; // see b in next iteration
1062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mii;	add	$e=$e,tmp1		    // e+=ROTATE(a,5)
1072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	shrp	$Xn=$Xn,$Xn,31		    // ROTATE(x[0]^x[2]^x[8]^x[13],1)
1082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	mux2	$X[$i]=$X[$i],0x44	};;
109c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
110c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org___
111c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	}
112c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org}
113c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
114c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.orgsub BODY_16_19 {
115c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.orglocal	*code=shift;
1162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgmy	($i,$a,$b,$c,$d,$e)=@_;
1172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgmy	$j=$i+1;
1182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgmy	$Xn=@X[$j%16];
119c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
120c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org$code.=<<___;
1212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mib;	add	$e=$e,$K_00_19		    // e+=K_00_19
122c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	dep.z	tmp5=$a,5,27		}   // a<<5
1232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mib;	andcm	tmp1=$d,$b
1242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	and	tmp0=$c,$b		};;
1252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mmi;	add	$e=$e,$X[$i%16]		    // e+=Xupdate
1262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	or	tmp0=tmp0,tmp1		    // F_00_19(b,c,d)=(b&c)|(~b&d)
127c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	extr.u	tmp1=$a,27,5		}   // a>>27
1282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mmi;	xor	$Xn=$Xn,$X[($j+2)%16]	    // forward Xupdate
1292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	xor	tmp3=$X[($j+8)%16],$X[($j+13)%16]	// forward Xupdate
130c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	nop.i	0			};;
1312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mmi;	add	$e=$e,tmp0		    // f+=F_00_19(b,c,d)
1322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	xor	$Xn=$Xn,tmp3		    // forward Xupdate
133c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	shrp	$b=tmp6,tmp6,2		}   // b=ROTATE(b,30)
134c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org{ .mmi;	or	tmp1=tmp1,tmp5		    // ROTATE(a,5)
135c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	mux2	tmp6=$a,0x44		};; // see b in next iteration
1362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mii;	add	$e=$e,tmp1		    // e+=ROTATE(a,5)
1372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	shrp	$Xn=$Xn,$Xn,31		    // ROTATE(x[0]^x[2]^x[8]^x[13],1)
138c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	nop.i	0			};;
139c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
140c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org___
141c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org}
142c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
143c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.orgsub BODY_20_39 {
144c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.orglocal	*code=shift;
1452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgmy	($i,$a,$b,$c,$d,$e,$Konst)=@_;
146c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	$Konst = $K_20_39 if (!defined($Konst));
1472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgmy	$j=$i+1;
1482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgmy	$Xn=@X[$j%16];
149c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
150c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.orgif ($i<79) {
151c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org$code.=<<___;
1522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mib;	add	$e=$e,$Konst		    // e+=K_XX_XX
153c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	dep.z	tmp5=$a,5,27		}   // a<<5
154c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org{ .mib;	xor	tmp0=$c,$b
1552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	xor	$Xn=$Xn,$X[($j+2)%16]	};; // forward Xupdate
1562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mib;	add	$e=$e,$X[$i%16]		    // e+=Xupdate
157c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	extr.u	tmp1=$a,27,5		}   // a>>27
1582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mib;	xor	tmp0=tmp0,$d		    // F_20_39(b,c,d)=b^c^d
1592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	xor	$Xn=$Xn,$X[($j+8)%16]	};; // forward Xupdate
1602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mmi;	add	$e=$e,tmp0		    // e+=F_20_39(b,c,d)
1612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	xor	$Xn=$Xn,$X[($j+13)%16]	    // forward Xupdate
162c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	shrp	$b=tmp6,tmp6,2		}   // b=ROTATE(b,30)
163c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org{ .mmi;	or	tmp1=tmp1,tmp5		    // ROTATE(a,5)
164c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	mux2	tmp6=$a,0x44		};; // see b in next iteration
1652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mii;	add	$e=$e,tmp1		    // e+=ROTATE(a,5)
1662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	shrp	$Xn=$Xn,$Xn,31		    // ROTATE(x[0]^x[2]^x[8]^x[13],1)
167c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	nop.i	0			};;
168c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
169c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org___
170c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org}
171c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.orgelse {
172c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org$code.=<<___;
1732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mib;	add	$e=$e,$Konst		    // e+=K_60_79
174c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	dep.z	tmp5=$a,5,27		}   // a<<5
175c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org{ .mib;	xor	tmp0=$c,$b
176c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	add	$h1=$h1,$a		};; // wrap up
1772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mib;	add	$e=$e,$X[$i%16]		    // e+=Xupdate
1782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	extr.u	tmp1=$a,27,5		}   // a>>27
1792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mib;	xor	tmp0=tmp0,$d		    // F_20_39(b,c,d)=b^c^d
180c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	add	$h3=$h3,$c		};; // wrap up
1812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mmi;	add	$e=$e,tmp0		    // e+=F_20_39(b,c,d)
1822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	or	tmp1=tmp1,tmp5		    // ROTATE(a,5)
1832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	shrp	$b=tmp6,tmp6,2		};; // b=ROTATE(b,30) ;;?
1842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mmi;	add	$e=$e,tmp1		    // e+=ROTATE(a,5)
1852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	add	tmp3=1,inp		    // used in unaligned codepath
186c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	add	$h4=$h4,$d		};; // wrap up
187c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
188c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org___
189c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org}
190c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org}
191c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
192c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.orgsub BODY_40_59 {
193c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.orglocal	*code=shift;
1942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgmy	($i,$a,$b,$c,$d,$e)=@_;
1952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgmy	$j=$i+1;
1962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgmy	$Xn=@X[$j%16];
197c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
198c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org$code.=<<___;
1992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mib;	add	$e=$e,$K_40_59		    // e+=K_40_59
200c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	dep.z	tmp5=$a,5,27		}   // a<<5
2012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mib;	and	tmp1=$c,$d
2022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	xor	tmp0=$c,$d		};;
2032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mmi;	add	$e=$e,$X[$i%16]		    // e+=Xupdate
2042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	add	tmp5=tmp5,tmp1		    // a<<5+(c&d)
205c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	extr.u	tmp1=$a,27,5		}   // a>>27
2062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mmi;	and	tmp0=tmp0,$b
2072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	xor	$Xn=$Xn,$X[($j+2)%16]	    // forward Xupdate
2082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	xor	tmp3=$X[($j+8)%16],$X[($j+13)%16] };;	// forward Xupdate
2092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mmi;	add	$e=$e,tmp0		    // e+=b&(c^d)
2102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	add	tmp5=tmp5,tmp1		    // ROTATE(a,5)+(c&d)
211c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	shrp	$b=tmp6,tmp6,2		}   // b=ROTATE(b,30)
2122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mmi;	xor	$Xn=$Xn,tmp3
213c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	mux2	tmp6=$a,0x44		};; // see b in next iteration
2142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mii;	add	$e=$e,tmp5		    // e+=ROTATE(a,5)+(c&d)
2152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	shrp	$Xn=$Xn,$Xn,31		    // ROTATE(x[0]^x[2]^x[8]^x[13],1)
2162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	nop.i	0x0			};;
217c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
218c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org___
219c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org}
220c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.orgsub BODY_60_79	{ &BODY_20_39(@_,$K_60_79); }
221c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
222c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org$code.=<<___;
223c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org.text
224c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
225c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.orgtmp0=r8;
226c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.orgtmp1=r9;
227c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.orgtmp2=r10;
228c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.orgtmp3=r11;
229c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.orgctx=r32;	// in0
230c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.orginp=r33;	// in1
231c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
232c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org// void sha1_block_data_order(SHA_CTX *c,const void *p,size_t num);
233c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org.global	sha1_block_data_order#
234c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org.proc	sha1_block_data_order#
235c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org.align	32
236c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.orgsha1_block_data_order:
237c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	.prologue
2382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mmi;	alloc	tmp1=ar.pfs,3,14,0,0
239c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	$ADDP	tmp0=4,ctx
240c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	.save	ar.lc,r3
241c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	mov	r3=ar.lc		}
242c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org{ .mmi;	$ADDP	ctx=0,ctx
243c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	$ADDP	inp=0,inp
244c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	mov	r2=pr			};;
245c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.orgtmp4=in2;
2462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgtmp5=loc12;
2472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgtmp6=loc13;
248c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	.body
249c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org{ .mlx;	ld4	$h0=[ctx],8
250c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	movl	$K_00_19=0x5a827999	}
251c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org{ .mlx;	ld4	$h1=[tmp0],8
252c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	movl	$K_20_39=0x6ed9eba1	};;
253c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org{ .mlx;	ld4	$h2=[ctx],8
254c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	movl	$K_40_59=0x8f1bbcdc	}
255c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org{ .mlx;	ld4	$h3=[tmp0]
256c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	movl	$K_60_79=0xca62c1d6	};;
257c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org{ .mmi;	ld4	$h4=[ctx],-16
258c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	add	in2=-1,in2		    // adjust num for ar.lc
259c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	mov	ar.ec=1			};;
260c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org{ .mmi;	nop.m	0
261c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	add	tmp3=1,inp
262c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	mov	ar.lc=in2		};; // brp.loop.imp: too far
263c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
264c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org.Ldtop:
265c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org{ .mmi;	mov	$A=$h0
266c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	mov	$B=$h1
267c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	mux2	tmp6=$h1,0x44		}
268c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org{ .mmi;	mov	$C=$h2
269c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	mov	$D=$h3
270c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	mov	$E=$h4			};;
271c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
272c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org___
273c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
2747453c6c0666947e06d87565404f4397a4b387f91digit@chromium.org{ my $i;
2757453c6c0666947e06d87565404f4397a4b387f91digit@chromium.org  my @V=($A,$B,$C,$D,$E);
276c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
277c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	for($i=0;$i<16;$i++)	{ &BODY_00_15(\$code,$i,@V); unshift(@V,pop(@V)); }
278c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	for(;$i<20;$i++)	{ &BODY_16_19(\$code,$i,@V); unshift(@V,pop(@V)); }
279c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	for(;$i<40;$i++)	{ &BODY_20_39(\$code,$i,@V); unshift(@V,pop(@V)); }
280c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	for(;$i<60;$i++)	{ &BODY_40_59(\$code,$i,@V); unshift(@V,pop(@V)); }
281c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	for(;$i<80;$i++)	{ &BODY_60_79(\$code,$i,@V); unshift(@V,pop(@V)); }
282c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
2832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	(($V[0] eq $A) and ($V[4] eq $E)) or die;	# double-check
284c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org}
285c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
286c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org$code.=<<___;
2872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ .mmb;	add	$h0=$h0,$A
2882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	add	$h2=$h2,$C
289c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	br.ctop.dptk.many	.Ldtop	};;
290c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org.Ldend:
291c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org{ .mmi;	add	tmp0=4,ctx
292c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	mov	ar.lc=r3		};;
293c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org{ .mmi;	st4	[ctx]=$h0,8
294c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	st4	[tmp0]=$h1,8		};;
295c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org{ .mmi;	st4	[ctx]=$h2,8
296c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	st4	[tmp0]=$h3		};;
297c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org{ .mib;	st4	[ctx]=$h4,-16
298c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	mov	pr=r2,0x1ffff
299c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org	br.ret.sptk.many	b0	};;
300c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org.endp	sha1_block_data_order#
301c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.orgstringz	"SHA1 block transform for IA64, CRYPTOGAMS by <appro\@openssl.org>"
302c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org___
303c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org
304c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.org$output=shift and open STDOUT,">$output";
305c9490d33b98b7affb729b5f1db13cb0a348471aagl@chromium.orgprint $code;
306