1// Works on all IA-64 platforms: Linux, HP-UX, Win64i...
2// On Win64i compile with ias.exe.
3.text
4
5.global	OPENSSL_cpuid_setup#
6.proc	OPENSSL_cpuid_setup#
7OPENSSL_cpuid_setup:
8{ .mib;	br.ret.sptk.many	b0		};;
9.endp	OPENSSL_cpuid_setup#
10
11.global	OPENSSL_rdtsc#
12.proc	OPENSSL_rdtsc#
13OPENSSL_rdtsc:
14{ .mib;	mov			r8=ar.itc
15	br.ret.sptk.many	b0		};;
16.endp   OPENSSL_rdtsc#
17
18.global	OPENSSL_atomic_add#
19.proc	OPENSSL_atomic_add#
20.align	32
21OPENSSL_atomic_add:
22{ .mii;	ld4		r2=[r32]
23	nop.i		0
24	nop.i		0		};;
25.Lspin:
26{ .mii;	mov		ar.ccv=r2
27	add		r8=r2,r33
28	mov		r3=r2		};;
29{ .mmi;	mf;;
30	cmpxchg4.acq	r2=[r32],r8,ar.ccv
31	nop.i		0		};;
32{ .mib;	cmp.ne		p6,p0=r2,r3
33	nop.i		0
34(p6)	br.dpnt		.Lspin		};;
35{ .mib;	nop.m		0
36	sxt4		r8=r8
37	br.ret.sptk.many	b0	};;
38.endp	OPENSSL_atomic_add#
39
40// Returns a structure comprising pointer to the top of stack of
41// the caller and pointer beyond backing storage for the current
42// register frame. The latter is required, because it might be
43// insufficient to wipe backing storage for the current frame
44// (as this procedure does), one might have to go further, toward
45// higher addresses to reach for whole "retroactively" saved
46// context...
47.global	OPENSSL_wipe_cpu#
48.proc	OPENSSL_wipe_cpu#
49.align	32
50OPENSSL_wipe_cpu:
51	.prologue
52	.fframe	0
53	.save	ar.pfs,r2
54	.save	ar.lc,r3
55{ .mib;	alloc		r2=ar.pfs,0,96,0,96
56	mov		r3=ar.lc
57	brp.loop.imp	.L_wipe_top,.L_wipe_end-16
58					};;
59{ .mii;	mov		r9=ar.bsp
60	mov		r8=pr
61	mov		ar.lc=96	};;
62	.body
63{ .mii;	add		r9=96*8-8,r9
64	mov		ar.ec=1		};;
65
66// One can sweep double as fast, but then we can't quarantee
67// that backing storage is wiped...
68.L_wipe_top:
69{ .mfi;	st8		[r9]=r0,-8
70	mov		f127=f0
71	mov		r127=r0		}
72{ .mfb;	nop.m		0
73	nop.f		0
74	br.ctop.sptk	.L_wipe_top	};;
75.L_wipe_end:
76
77{ .mfi;	mov		r11=r0
78	mov		f6=f0
79	mov		r14=r0		}
80{ .mfi;	mov		r15=r0
81	mov		f7=f0
82	mov		r16=r0		}
83{ .mfi;	mov		r17=r0
84	mov		f8=f0
85	mov		r18=r0		}
86{ .mfi;	mov		r19=r0
87	mov		f9=f0
88	mov		r20=r0		}
89{ .mfi;	mov		r21=r0
90	mov		f10=f0
91	mov		r22=r0		}
92{ .mfi;	mov		r23=r0
93	mov		f11=f0
94	mov		r24=r0		}
95{ .mfi;	mov		r25=r0
96	mov		f12=f0
97	mov		r26=r0		}
98{ .mfi;	mov		r27=r0
99	mov		f13=f0
100	mov		r28=r0		}
101{ .mfi;	mov		r29=r0
102	mov		f14=f0
103	mov		r30=r0		}
104{ .mfi;	mov		r31=r0
105	mov		f15=f0
106	nop.i		0		}
107{ .mfi;	mov		f16=f0		}
108{ .mfi;	mov		f17=f0		}
109{ .mfi;	mov		f18=f0		}
110{ .mfi;	mov		f19=f0		}
111{ .mfi;	mov		f20=f0		}
112{ .mfi;	mov		f21=f0		}
113{ .mfi;	mov		f22=f0		}
114{ .mfi;	mov		f23=f0		}
115{ .mfi;	mov		f24=f0		}
116{ .mfi;	mov		f25=f0		}
117{ .mfi;	mov		f26=f0		}
118{ .mfi;	mov		f27=f0		}
119{ .mfi;	mov		f28=f0		}
120{ .mfi;	mov		f29=f0		}
121{ .mfi;	mov		f30=f0		}
122{ .mfi;	add		r9=96*8+8,r9
123	mov		f31=f0
124	mov		pr=r8,0x1ffff	}
125{ .mib;	mov		r8=sp
126	mov		ar.lc=r3
127	br.ret.sptk	b0		};;
128.endp	OPENSSL_wipe_cpu#
129
130.global	OPENSSL_cleanse#
131.proc	OPENSSL_cleanse#
132OPENSSL_cleanse:
133{ .mib;	cmp.eq		p6,p0=0,r33	    // len==0
134#if defined(_HPUX_SOURCE) && !defined(_LP64)
135	addp4		r32=0,r32
136#endif
137(p6)	br.ret.spnt	b0		};;
138{ .mib;	and		r2=7,r32
139	cmp.leu		p6,p0=15,r33	    // len>=15
140(p6)	br.cond.dptk	.Lot		};;
141
142.Little:
143{ .mib;	st1		[r32]=r0,1
144	cmp.ltu		p6,p7=1,r33	}  // len>1
145{ .mbb;	add		r33=-1,r33	   // len--
146(p6)	br.cond.dptk	.Little
147(p7)	br.ret.sptk.many	b0	};;
148
149.Lot:
150{ .mib;	cmp.eq		p6,p0=0,r2
151(p6)	br.cond.dptk	.Laligned	};;
152{ .mmi;	st1		[r32]=r0,1;;
153	and		r2=7,r32	}
154{ .mib;	add		r33=-1,r33
155	br		.Lot		};;
156
157.Laligned:
158{ .mmi;	st8		[r32]=r0,8
159	and		r2=-8,r33	    // len&~7
160	add		r33=-8,r33	};; // len-=8
161{ .mib;	cmp.ltu		p6,p0=8,r2	    // ((len+8)&~7)>8
162(p6)	br.cond.dptk	.Laligned	};;
163
164{ .mbb;	cmp.eq		p6,p7=r0,r33
165(p7)	br.cond.dpnt	.Little
166(p6)	br.ret.sptk.many	b0	};;
167.endp	OPENSSL_cleanse#
168