ia64-test-rbs-asm.S revision 71ab0b2a3cee677026ca27420ad8f6c03bc02749
1/* libunwind - a platform-independent unwind library
2   Copyright (C) 2003 Hewlett-Packard Co
3	Contributed by David Mosberger-Tang <davidm@hpl.hp.com>
4
5This file is part of libunwind.
6
7Permission is hereby granted, free of charge, to any person obtaining
8a copy of this software and associated documentation files (the
9"Software"), to deal in the Software without restriction, including
10without limitation the rights to use, copy, modify, merge, publish,
11distribute, sublicense, and/or sell copies of the Software, and to
12permit persons to whom the Software is furnished to do so, subject to
13the following conditions:
14
15The above copyright notice and this permission notice shall be
16included in all copies or substantial portions of the Software.
17
18THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
22LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
25
26#include "ia64-test-rbs.h"
27
28	.common stackmem, NSTACKS*STACK_SIZE, 16
29
30	.text
31
32#define SAVED_SP_OFF		 0
33#define SAVED_RP_OFF		 8
34#define SAVED_PFS_OFF		16
35#define SAVED_RNAT_OFF		24
36#define SAVED_BSP_OFF		32
37#define SAVED_BSPSTORE_OFF	40
38#define FRAME_SIZE		48
39
40#define SPILL(n)							     \
41	/* int rbs_spill_#n(long iteration, int (*next_func[])()) */	     \
42	.globl rbs_spill_##n;						     \
43	.proc rbs_spill_##n;						     \
44rbs_spill_##n:								     \
45	.prologue;							     \
46	alloc r18 = ar.pfs, 2, (n)-2, 2, 0;/* read ar.pfs */		     \
47	/* first, calculate address of new stack: */			     \
48	addl r2 = @ltoff(stackmem), gp;					     \
49	shladd r8 = in0, 3, in1;	/* r8 = &next_func[iteration] */     \
50	;;								     \
51	ld8 r2 = [r2];			/* r2 = &stackmem */		     \
52	ld8 r8 = [r8];			/* r8 = next_func[iteration] */	     \
53	shl r3 = in0, STACK_SIZE_SHIFT;					     \
54	;;								     \
55	ld8 r9 = [r8], 8;;		/* r9 = target's entry-point */	     \
56	ld8 gp = [r8];			/* r22 = target's gp */		     \
57	add r2 = r2, r3;		/* r2 = stackmem[iteration] */	     \
58	;;								     \
59	mov b6 = r9;							     \
60	addl r3 = STACK_SIZE-FRAME_SIZE, r2; /* r3 = &stackframe */	     \
61	;;								     \
62	st8 [r3] = sp;							     \
63	.vframesp SAVED_SP_OFF+16;					     \
64	adds sp = -16, r3;		/* switch the memory stack */	     \
65	;;								     \
66	adds r3 = (SAVED_RP_OFF - SAVED_SP_OFF), r3;			     \
67	mov r16 = rp;							     \
68	;;								     \
69	.savesp rp, SAVED_RP_OFF+16;					     \
70	st8 [r3] = r16, (SAVED_PFS_OFF - SAVED_RP_OFF);			     \
71	;;								     \
72	.savesp ar.pfs, SAVED_PFS_OFF+16;				     \
73	st8 [r3] = r18, (SAVED_BSP_OFF - SAVED_PFS_OFF);		     \
74	mov r16 = ar.bsp;						     \
75	mov r17 = ar.bspstore;						     \
76	mov r18 = ar.rnat;						     \
77	;;								     \
78	.savesp ar.bsp, SAVED_BSP_OFF+16;				     \
79	st8 [r3] = r16, (SAVED_BSPSTORE_OFF - SAVED_BSP_OFF);		     \
80	;;								     \
81	.savesp ar.bspstore, SAVED_BSPSTORE_OFF+16;			     \
82	st8 [r3] = r17, (SAVED_RNAT_OFF - SAVED_BSPSTORE_OFF);		     \
83	mov out1 = in1;							     \
84	;;								     \
85	.savesp ar.rnat, SAVED_RNAT_OFF+16;				     \
86	st8 [r3] = r18;							     \
87	.body;								     \
88	mov ar.bspstore = r2;		/* switch the backing store */	     \
89	adds out0 = 1, in0;						     \
90	;;								     \
91	br.call.sptk.many rp = b6;					     \
921:	/* switch back to stack: */					     \
93	adds r3 = SAVED_SP_OFF+16, sp;					     \
94	cmp.ge p8, p0 = r8, r0;						     \
95	;;								     \
96(p8)	add r8 = 1, r8;							     \
97	ld8 r16 = [r3], (SAVED_RP_OFF-SAVED_SP_OFF);;	/* saved sp */	     \
98	ld8 r17 = [r3], (SAVED_PFS_OFF-SAVED_RP_OFF);;	/* saved rp */	     \
99	ld8 r18 = [r3], (SAVED_RNAT_OFF-SAVED_PFS_OFF);;/* saved pfs */	     \
100	ld8 r19 = [r3], (SAVED_BSP_OFF-SAVED_RNAT_OFF);;/* saved rnat */     \
101	ld8 r20 = [r3], (SAVED_BSPSTORE_OFF-SAVED_BSP_OFF);;/* saved bsp */  \
102	ld8 r21 = [r3];;				/* saved bspstore */ \
103	mov rp = r17;							     \
104	mov ar.pfs = r18;						     \
105	shl r3 = in0, STACK_SIZE_SHIFT;					     \
106	addl r2 = @ltoff(stackmem), gp;;				     \
107	ld8 r2 = [r2];;			/* r2 = &stackmem */		     \
108	add r2 = r2, r3;		/* r2 = stackmem[iteration] */	     \
109	mov r3 = ar.bsp;;						     \
110	sub r2 = r3, r2;;		/* r2 = dirty_size */		     \
111	shl r2 = r2, 16;;						     \
112	mov ar.rsc = r2;;						     \
113	alloc r3 = ar.pfs, 0, 0, 0, 0;;					     \
114	loadrs;;							     \
115	mov ar.bspstore = r21;;	/* this also restores ar.bsp */		     \
116	mov ar.rnat = r19;						     \
117	.restore sp;							     \
118	mov sp = r16;							     \
119	br.ret.sptk.many rp;						     \
120	.endp rbs_spill_##n
121
122		        SPILL(2);  SPILL(3)
123  SPILL(4);  SPILL(5);  SPILL(6);  SPILL(7)
124  SPILL(8);  SPILL(9); SPILL(10); SPILL(11)
125 SPILL(12); SPILL(13); SPILL(14); SPILL(15)
126 SPILL(16); SPILL(17); SPILL(18); SPILL(19)
127 SPILL(20); SPILL(21); SPILL(22); SPILL(23)
128 SPILL(24); SPILL(25); SPILL(26); SPILL(27)
129 SPILL(28); SPILL(29); SPILL(30); SPILL(31)
130 SPILL(32); SPILL(33); SPILL(34); SPILL(35)
131 SPILL(36); SPILL(37); SPILL(38); SPILL(39)
132 SPILL(40); SPILL(41); SPILL(42); SPILL(43)
133 SPILL(44); SPILL(45); SPILL(46); SPILL(47)
134 SPILL(48); SPILL(49); SPILL(50); SPILL(51)
135 SPILL(52); SPILL(53); SPILL(54); SPILL(55)
136 SPILL(56); SPILL(57); SPILL(58); SPILL(59)
137 SPILL(60); SPILL(61); SPILL(62); SPILL(63)
138 SPILL(64); SPILL(65); SPILL(66); SPILL(67)
139 SPILL(68); SPILL(69); SPILL(70); SPILL(71)
140 SPILL(72); SPILL(73); SPILL(74); SPILL(75)
141 SPILL(76); SPILL(77); SPILL(78); SPILL(79)
142 SPILL(80); SPILL(81); SPILL(82); SPILL(83)
143 SPILL(84); SPILL(85); SPILL(86); SPILL(87)
144 SPILL(88); SPILL(89); SPILL(90); SPILL(91)
145 SPILL(92); SPILL(93); SPILL(94)
146
147#define LD_LOC(n)				\
148	ld4 loc##n = [in1], 4;;			\
149	cmp.eq p8, p9 = r0, loc##n;;		\
150(p9)	or loc##n = loc##n, r8;			\
151(p8)	ld4.s loc##n = [r0]
152
153#define CK_LOC(n)				\
154	ld4 r16 = [in1], 4;;			\
155	cmp.eq p8, p9 = r0, r16;		\
156	or r16 = r16, r9;;			\
157(p8)	tnat.z p10, p0 = loc##n;		\
158(p9)	cmp.ne p10, p0 = r16, loc##n;		\
159	;;					\
160(p10)	mov r8 = -n;				\
161(p10)	br.cond.spnt.many .fail
162
163	/* int loadup(long iteration, int *values, next_func[]) */
164
165	.global loadup
166	.proc loadup
167loadup:
168	.prologue
169	.save ar.pfs, r36
170	alloc loc1 = ar.pfs, 3, 90, 3, 0
171	.save rp, loc0
172	mov loc0 = rp
173	.body
174	cmp.eq p6, p7 = 1, in0
175	;;
176	mov ar.rsc = 0		// put RSE into enforced lazy mode
177(p6)	mov out1 = in2
178(p7)	mov out2 = in2
179
180(p6)	ld8 r17 = [in2]		// get address of function descriptor
181(p7)	add out0 = -1, in0
182(p7)	mov out1 = in1
183
184	;;
185(p6)	ld8 r16 = [r17], 8	// load entry point
186	shl r8 = in0, 32	// store iteration # in top 32 bits
187	mov r18 = in1
188	;;
189(p6)	ld8 r1 = [r17]		// load gp
190(p6)	mov b6 = r16
191
192(p6)	mov out0 = 0
193	;;
194	LD_LOC( 2); LD_LOC( 3)
195	LD_LOC( 4); LD_LOC( 5); LD_LOC( 6); LD_LOC( 7)
196	LD_LOC( 8); LD_LOC( 9); LD_LOC(10); LD_LOC(11)
197	LD_LOC(12); LD_LOC(13); LD_LOC(14); LD_LOC(15)
198	LD_LOC(16); LD_LOC(17); LD_LOC(18); LD_LOC(19)
199	LD_LOC(20); LD_LOC(21); LD_LOC(22); LD_LOC(23)
200	LD_LOC(24); LD_LOC(25); LD_LOC(26); LD_LOC(27)
201	LD_LOC(28); LD_LOC(29); LD_LOC(30); LD_LOC(31)
202	LD_LOC(32); LD_LOC(33); LD_LOC(34); LD_LOC(35)
203	LD_LOC(36); LD_LOC(37); LD_LOC(38); LD_LOC(39)
204	LD_LOC(40); LD_LOC(41); LD_LOC(42); LD_LOC(43)
205	LD_LOC(44); LD_LOC(45); LD_LOC(46); LD_LOC(47)
206	LD_LOC(48); LD_LOC(49); LD_LOC(50); LD_LOC(51)
207	LD_LOC(52); LD_LOC(53); LD_LOC(54); LD_LOC(55)
208	LD_LOC(56); LD_LOC(57); LD_LOC(58); LD_LOC(59)
209	LD_LOC(60); LD_LOC(61); LD_LOC(62); LD_LOC(63)
210	LD_LOC(64); LD_LOC(65); LD_LOC(66); LD_LOC(67)
211	LD_LOC(68); LD_LOC(69); LD_LOC(70); LD_LOC(71)
212	LD_LOC(72); LD_LOC(73); LD_LOC(74); LD_LOC(75)
213	LD_LOC(76); LD_LOC(77); LD_LOC(78); LD_LOC(79)
214	LD_LOC(80); LD_LOC(81); LD_LOC(82); LD_LOC(83)
215	LD_LOC(84); LD_LOC(85); LD_LOC(86); LD_LOC(87)
216	LD_LOC(88); LD_LOC(89)
217	;;
218{	.mbb
219	mov in1 = r18
220(p6)	br.call.sptk.many rp = b6
221(p7)	br.call.sptk.many rp = loadup
222}
223	cmp.lt p8, p9 = r8, r0
224	shl r9 = in0, 32	// store iteration # in top 32 bits
225(p8)	br.cond.spnt.few .fail
226	;;
227	add r8 = 1, r8
228	CK_LOC( 2); CK_LOC( 3)
229	CK_LOC( 4); CK_LOC( 5); CK_LOC( 6); CK_LOC( 7)
230	CK_LOC( 8); CK_LOC( 9); CK_LOC(10); CK_LOC(11)
231	CK_LOC(12); CK_LOC(13); CK_LOC(14); CK_LOC(15)
232	CK_LOC(16); CK_LOC(17); CK_LOC(18); CK_LOC(19)
233	CK_LOC(20); CK_LOC(21); CK_LOC(22); CK_LOC(23)
234	CK_LOC(24); CK_LOC(25); CK_LOC(26); CK_LOC(27)
235	CK_LOC(28); CK_LOC(29); CK_LOC(30); CK_LOC(31)
236	CK_LOC(32); CK_LOC(33); CK_LOC(34); CK_LOC(35)
237	CK_LOC(36); CK_LOC(37); CK_LOC(38); CK_LOC(39)
238	CK_LOC(40); CK_LOC(41); CK_LOC(42); CK_LOC(43)
239	CK_LOC(44); CK_LOC(45); CK_LOC(46); CK_LOC(47)
240	CK_LOC(48); CK_LOC(49); CK_LOC(50); CK_LOC(51)
241	CK_LOC(52); CK_LOC(53); CK_LOC(54); CK_LOC(55)
242	CK_LOC(56); CK_LOC(57); CK_LOC(58); CK_LOC(59)
243	CK_LOC(60); CK_LOC(61); CK_LOC(62); CK_LOC(63)
244	CK_LOC(64); CK_LOC(65); CK_LOC(66); CK_LOC(67)
245	CK_LOC(68); CK_LOC(69); CK_LOC(70); CK_LOC(71)
246	CK_LOC(72); CK_LOC(73); CK_LOC(74); CK_LOC(75)
247	CK_LOC(76); CK_LOC(77); CK_LOC(78); CK_LOC(79)
248	CK_LOC(80); CK_LOC(81); CK_LOC(82); CK_LOC(83)
249	CK_LOC(84); CK_LOC(85); CK_LOC(86); CK_LOC(87)
250	CK_LOC(88); CK_LOC(89)
251.fail:
252	mov rp = loc0
253	mov ar.pfs = loc1
254	br.ret.sptk.many rp
255	.endp loadup
256
257	.global resumption_point_label
258	.proc resumption_point
259resumption_point:
260resumption_point_label:
261	.prologue
262	.save rp, r16
263	.save ar.pfs, r0
264	.body
265	mov r8 = r15
266	mov b6 = r16
267	;;
268	br.cond.sptk.many b6
269	.endp resumption_point
270