ia64-test-rbs-asm.S revision 71ab0b2a3cee677026ca27420ad8f6c03bc02749
1/* libunwind - a platform-independent unwind library 2 Copyright (C) 2003 Hewlett-Packard Co 3 Contributed by David Mosberger-Tang <davidm@hpl.hp.com> 4 5This file is part of libunwind. 6 7Permission is hereby granted, free of charge, to any person obtaining 8a copy of this software and associated documentation files (the 9"Software"), to deal in the Software without restriction, including 10without limitation the rights to use, copy, modify, merge, publish, 11distribute, sublicense, and/or sell copies of the Software, and to 12permit persons to whom the Software is furnished to do so, subject to 13the following conditions: 14 15The above copyright notice and this permission notice shall be 16included in all copies or substantial portions of the Software. 17 18THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 21NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 22LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ 25 26#include "ia64-test-rbs.h" 27 28 .common stackmem, NSTACKS*STACK_SIZE, 16 29 30 .text 31 32#define SAVED_SP_OFF 0 33#define SAVED_RP_OFF 8 34#define SAVED_PFS_OFF 16 35#define SAVED_RNAT_OFF 24 36#define SAVED_BSP_OFF 32 37#define SAVED_BSPSTORE_OFF 40 38#define FRAME_SIZE 48 39 40#define SPILL(n) \ 41 /* int rbs_spill_#n(long iteration, int (*next_func[])()) */ \ 42 .globl rbs_spill_##n; \ 43 .proc rbs_spill_##n; \ 44rbs_spill_##n: \ 45 .prologue; \ 46 alloc r18 = ar.pfs, 2, (n)-2, 2, 0;/* read ar.pfs */ \ 47 /* first, calculate address of new stack: */ \ 48 addl r2 = @ltoff(stackmem), gp; \ 49 shladd r8 = in0, 3, in1; /* r8 = &next_func[iteration] */ \ 50 ;; \ 51 ld8 r2 = [r2]; /* r2 = &stackmem */ \ 52 ld8 r8 = [r8]; /* r8 = next_func[iteration] */ \ 53 shl r3 = in0, STACK_SIZE_SHIFT; \ 54 ;; \ 55 ld8 r9 = [r8], 8;; /* r9 = target's entry-point */ \ 56 ld8 gp = [r8]; /* r22 = target's gp */ \ 57 add r2 = r2, r3; /* r2 = stackmem[iteration] */ \ 58 ;; \ 59 mov b6 = r9; \ 60 addl r3 = STACK_SIZE-FRAME_SIZE, r2; /* r3 = &stackframe */ \ 61 ;; \ 62 st8 [r3] = sp; \ 63 .vframesp SAVED_SP_OFF+16; \ 64 adds sp = -16, r3; /* switch the memory stack */ \ 65 ;; \ 66 adds r3 = (SAVED_RP_OFF - SAVED_SP_OFF), r3; \ 67 mov r16 = rp; \ 68 ;; \ 69 .savesp rp, SAVED_RP_OFF+16; \ 70 st8 [r3] = r16, (SAVED_PFS_OFF - SAVED_RP_OFF); \ 71 ;; \ 72 .savesp ar.pfs, SAVED_PFS_OFF+16; \ 73 st8 [r3] = r18, (SAVED_BSP_OFF - SAVED_PFS_OFF); \ 74 mov r16 = ar.bsp; \ 75 mov r17 = ar.bspstore; \ 76 mov r18 = ar.rnat; \ 77 ;; \ 78 .savesp ar.bsp, SAVED_BSP_OFF+16; \ 79 st8 [r3] = r16, (SAVED_BSPSTORE_OFF - SAVED_BSP_OFF); \ 80 ;; \ 81 .savesp ar.bspstore, SAVED_BSPSTORE_OFF+16; \ 82 st8 [r3] = r17, (SAVED_RNAT_OFF - SAVED_BSPSTORE_OFF); \ 83 mov out1 = in1; \ 84 ;; \ 85 .savesp ar.rnat, SAVED_RNAT_OFF+16; \ 86 st8 [r3] = r18; \ 87 .body; \ 88 mov ar.bspstore = r2; /* switch the backing store */ \ 89 adds out0 = 1, in0; \ 90 ;; \ 91 br.call.sptk.many rp = b6; \ 921: /* switch back to stack: */ \ 93 adds r3 = SAVED_SP_OFF+16, sp; \ 94 cmp.ge p8, p0 = r8, r0; \ 95 ;; \ 96(p8) add r8 = 1, r8; \ 97 ld8 r16 = [r3], (SAVED_RP_OFF-SAVED_SP_OFF);; /* saved sp */ \ 98 ld8 r17 = [r3], (SAVED_PFS_OFF-SAVED_RP_OFF);; /* saved rp */ \ 99 ld8 r18 = [r3], (SAVED_RNAT_OFF-SAVED_PFS_OFF);;/* saved pfs */ \ 100 ld8 r19 = [r3], (SAVED_BSP_OFF-SAVED_RNAT_OFF);;/* saved rnat */ \ 101 ld8 r20 = [r3], (SAVED_BSPSTORE_OFF-SAVED_BSP_OFF);;/* saved bsp */ \ 102 ld8 r21 = [r3];; /* saved bspstore */ \ 103 mov rp = r17; \ 104 mov ar.pfs = r18; \ 105 shl r3 = in0, STACK_SIZE_SHIFT; \ 106 addl r2 = @ltoff(stackmem), gp;; \ 107 ld8 r2 = [r2];; /* r2 = &stackmem */ \ 108 add r2 = r2, r3; /* r2 = stackmem[iteration] */ \ 109 mov r3 = ar.bsp;; \ 110 sub r2 = r3, r2;; /* r2 = dirty_size */ \ 111 shl r2 = r2, 16;; \ 112 mov ar.rsc = r2;; \ 113 alloc r3 = ar.pfs, 0, 0, 0, 0;; \ 114 loadrs;; \ 115 mov ar.bspstore = r21;; /* this also restores ar.bsp */ \ 116 mov ar.rnat = r19; \ 117 .restore sp; \ 118 mov sp = r16; \ 119 br.ret.sptk.many rp; \ 120 .endp rbs_spill_##n 121 122 SPILL(2); SPILL(3) 123 SPILL(4); SPILL(5); SPILL(6); SPILL(7) 124 SPILL(8); SPILL(9); SPILL(10); SPILL(11) 125 SPILL(12); SPILL(13); SPILL(14); SPILL(15) 126 SPILL(16); SPILL(17); SPILL(18); SPILL(19) 127 SPILL(20); SPILL(21); SPILL(22); SPILL(23) 128 SPILL(24); SPILL(25); SPILL(26); SPILL(27) 129 SPILL(28); SPILL(29); SPILL(30); SPILL(31) 130 SPILL(32); SPILL(33); SPILL(34); SPILL(35) 131 SPILL(36); SPILL(37); SPILL(38); SPILL(39) 132 SPILL(40); SPILL(41); SPILL(42); SPILL(43) 133 SPILL(44); SPILL(45); SPILL(46); SPILL(47) 134 SPILL(48); SPILL(49); SPILL(50); SPILL(51) 135 SPILL(52); SPILL(53); SPILL(54); SPILL(55) 136 SPILL(56); SPILL(57); SPILL(58); SPILL(59) 137 SPILL(60); SPILL(61); SPILL(62); SPILL(63) 138 SPILL(64); SPILL(65); SPILL(66); SPILL(67) 139 SPILL(68); SPILL(69); SPILL(70); SPILL(71) 140 SPILL(72); SPILL(73); SPILL(74); SPILL(75) 141 SPILL(76); SPILL(77); SPILL(78); SPILL(79) 142 SPILL(80); SPILL(81); SPILL(82); SPILL(83) 143 SPILL(84); SPILL(85); SPILL(86); SPILL(87) 144 SPILL(88); SPILL(89); SPILL(90); SPILL(91) 145 SPILL(92); SPILL(93); SPILL(94) 146 147#define LD_LOC(n) \ 148 ld4 loc##n = [in1], 4;; \ 149 cmp.eq p8, p9 = r0, loc##n;; \ 150(p9) or loc##n = loc##n, r8; \ 151(p8) ld4.s loc##n = [r0] 152 153#define CK_LOC(n) \ 154 ld4 r16 = [in1], 4;; \ 155 cmp.eq p8, p9 = r0, r16; \ 156 or r16 = r16, r9;; \ 157(p8) tnat.z p10, p0 = loc##n; \ 158(p9) cmp.ne p10, p0 = r16, loc##n; \ 159 ;; \ 160(p10) mov r8 = -n; \ 161(p10) br.cond.spnt.many .fail 162 163 /* int loadup(long iteration, int *values, next_func[]) */ 164 165 .global loadup 166 .proc loadup 167loadup: 168 .prologue 169 .save ar.pfs, r36 170 alloc loc1 = ar.pfs, 3, 90, 3, 0 171 .save rp, loc0 172 mov loc0 = rp 173 .body 174 cmp.eq p6, p7 = 1, in0 175 ;; 176 mov ar.rsc = 0 // put RSE into enforced lazy mode 177(p6) mov out1 = in2 178(p7) mov out2 = in2 179 180(p6) ld8 r17 = [in2] // get address of function descriptor 181(p7) add out0 = -1, in0 182(p7) mov out1 = in1 183 184 ;; 185(p6) ld8 r16 = [r17], 8 // load entry point 186 shl r8 = in0, 32 // store iteration # in top 32 bits 187 mov r18 = in1 188 ;; 189(p6) ld8 r1 = [r17] // load gp 190(p6) mov b6 = r16 191 192(p6) mov out0 = 0 193 ;; 194 LD_LOC( 2); LD_LOC( 3) 195 LD_LOC( 4); LD_LOC( 5); LD_LOC( 6); LD_LOC( 7) 196 LD_LOC( 8); LD_LOC( 9); LD_LOC(10); LD_LOC(11) 197 LD_LOC(12); LD_LOC(13); LD_LOC(14); LD_LOC(15) 198 LD_LOC(16); LD_LOC(17); LD_LOC(18); LD_LOC(19) 199 LD_LOC(20); LD_LOC(21); LD_LOC(22); LD_LOC(23) 200 LD_LOC(24); LD_LOC(25); LD_LOC(26); LD_LOC(27) 201 LD_LOC(28); LD_LOC(29); LD_LOC(30); LD_LOC(31) 202 LD_LOC(32); LD_LOC(33); LD_LOC(34); LD_LOC(35) 203 LD_LOC(36); LD_LOC(37); LD_LOC(38); LD_LOC(39) 204 LD_LOC(40); LD_LOC(41); LD_LOC(42); LD_LOC(43) 205 LD_LOC(44); LD_LOC(45); LD_LOC(46); LD_LOC(47) 206 LD_LOC(48); LD_LOC(49); LD_LOC(50); LD_LOC(51) 207 LD_LOC(52); LD_LOC(53); LD_LOC(54); LD_LOC(55) 208 LD_LOC(56); LD_LOC(57); LD_LOC(58); LD_LOC(59) 209 LD_LOC(60); LD_LOC(61); LD_LOC(62); LD_LOC(63) 210 LD_LOC(64); LD_LOC(65); LD_LOC(66); LD_LOC(67) 211 LD_LOC(68); LD_LOC(69); LD_LOC(70); LD_LOC(71) 212 LD_LOC(72); LD_LOC(73); LD_LOC(74); LD_LOC(75) 213 LD_LOC(76); LD_LOC(77); LD_LOC(78); LD_LOC(79) 214 LD_LOC(80); LD_LOC(81); LD_LOC(82); LD_LOC(83) 215 LD_LOC(84); LD_LOC(85); LD_LOC(86); LD_LOC(87) 216 LD_LOC(88); LD_LOC(89) 217 ;; 218{ .mbb 219 mov in1 = r18 220(p6) br.call.sptk.many rp = b6 221(p7) br.call.sptk.many rp = loadup 222} 223 cmp.lt p8, p9 = r8, r0 224 shl r9 = in0, 32 // store iteration # in top 32 bits 225(p8) br.cond.spnt.few .fail 226 ;; 227 add r8 = 1, r8 228 CK_LOC( 2); CK_LOC( 3) 229 CK_LOC( 4); CK_LOC( 5); CK_LOC( 6); CK_LOC( 7) 230 CK_LOC( 8); CK_LOC( 9); CK_LOC(10); CK_LOC(11) 231 CK_LOC(12); CK_LOC(13); CK_LOC(14); CK_LOC(15) 232 CK_LOC(16); CK_LOC(17); CK_LOC(18); CK_LOC(19) 233 CK_LOC(20); CK_LOC(21); CK_LOC(22); CK_LOC(23) 234 CK_LOC(24); CK_LOC(25); CK_LOC(26); CK_LOC(27) 235 CK_LOC(28); CK_LOC(29); CK_LOC(30); CK_LOC(31) 236 CK_LOC(32); CK_LOC(33); CK_LOC(34); CK_LOC(35) 237 CK_LOC(36); CK_LOC(37); CK_LOC(38); CK_LOC(39) 238 CK_LOC(40); CK_LOC(41); CK_LOC(42); CK_LOC(43) 239 CK_LOC(44); CK_LOC(45); CK_LOC(46); CK_LOC(47) 240 CK_LOC(48); CK_LOC(49); CK_LOC(50); CK_LOC(51) 241 CK_LOC(52); CK_LOC(53); CK_LOC(54); CK_LOC(55) 242 CK_LOC(56); CK_LOC(57); CK_LOC(58); CK_LOC(59) 243 CK_LOC(60); CK_LOC(61); CK_LOC(62); CK_LOC(63) 244 CK_LOC(64); CK_LOC(65); CK_LOC(66); CK_LOC(67) 245 CK_LOC(68); CK_LOC(69); CK_LOC(70); CK_LOC(71) 246 CK_LOC(72); CK_LOC(73); CK_LOC(74); CK_LOC(75) 247 CK_LOC(76); CK_LOC(77); CK_LOC(78); CK_LOC(79) 248 CK_LOC(80); CK_LOC(81); CK_LOC(82); CK_LOC(83) 249 CK_LOC(84); CK_LOC(85); CK_LOC(86); CK_LOC(87) 250 CK_LOC(88); CK_LOC(89) 251.fail: 252 mov rp = loc0 253 mov ar.pfs = loc1 254 br.ret.sptk.many rp 255 .endp loadup 256 257 .global resumption_point_label 258 .proc resumption_point 259resumption_point: 260resumption_point_label: 261 .prologue 262 .save rp, r16 263 .save ar.pfs, r0 264 .body 265 mov r8 = r15 266 mov b6 = r16 267 ;; 268 br.cond.sptk.many b6 269 .endp resumption_point 270