1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5#include "go_asm.h"
6#include "go_tls.h"
7#include "funcdata.h"
8#include "textflag.h"
9
10// using frame size $-4 means do not save LR on stack.
11TEXT runtime·rt0_go(SB),NOSPLIT,$-4
12	MOVW	$0xcafebabe, R12
13
14	// copy arguments forward on an even stack
15	// use R13 instead of SP to avoid linker rewriting the offsets
16	MOVW	0(R13), R0		// argc
17	MOVW	4(R13), R1		// argv
18	SUB	$64, R13		// plenty of scratch
19	AND	$~7, R13
20	MOVW	R0, 60(R13)		// save argc, argv away
21	MOVW	R1, 64(R13)
22
23	// set up g register
24	// g is R10
25	MOVW	$runtime·g0(SB), g
26	MOVW	$runtime·m0(SB), R8
27
28	// save m->g0 = g0
29	MOVW	g, m_g0(R8)
30	// save g->m = m0
31	MOVW	R8, g_m(g)
32
33	// create istack out of the OS stack
34	// (1MB of system stack is available on iOS and Android)
35	MOVW	$(-64*1024+104)(R13), R0
36	MOVW	R0, g_stackguard0(g)
37	MOVW	R0, g_stackguard1(g)
38	MOVW	R0, (g_stack+stack_lo)(g)
39	MOVW	R13, (g_stack+stack_hi)(g)
40
41	BL	runtime·emptyfunc(SB)	// fault if stack check is wrong
42
43	BL	runtime·_initcgo(SB)	// will clobber R0-R3
44
45	// update stackguard after _cgo_init
46	MOVW	(g_stack+stack_lo)(g), R0
47	ADD	$const__StackGuard, R0
48	MOVW	R0, g_stackguard0(g)
49	MOVW	R0, g_stackguard1(g)
50
51	BL	runtime·check(SB)
52
53	// saved argc, argv
54	MOVW	60(R13), R0
55	MOVW	R0, 4(R13)
56	MOVW	64(R13), R1
57	MOVW	R1, 8(R13)
58	BL	runtime·args(SB)
59	BL	runtime·checkgoarm(SB)
60	BL	runtime·osinit(SB)
61	BL	runtime·schedinit(SB)
62
63	// create a new goroutine to start program
64	MOVW	$runtime·mainPC(SB), R0
65	MOVW.W	R0, -4(R13)
66	MOVW	$8, R0
67	MOVW.W	R0, -4(R13)
68	MOVW	$0, R0
69	MOVW.W	R0, -4(R13)	// push $0 as guard
70	BL	runtime·newproc(SB)
71	MOVW	$12(R13), R13	// pop args and LR
72
73	// start this M
74	BL	runtime·mstart(SB)
75
76	MOVW	$1234, R0
77	MOVW	$1000, R1
78	MOVW	R0, (R1)	// fail hard
79
80DATA	runtime·mainPC+0(SB)/4,$runtime·main(SB)
81GLOBL	runtime·mainPC(SB),RODATA,$4
82
83TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
84	// gdb won't skip this breakpoint instruction automatically,
85	// so you must manually "set $pc+=4" to skip it and continue.
86#ifdef GOOS_nacl
87	WORD	$0xe125be7f	// BKPT 0x5bef, NACL_INSTR_ARM_BREAKPOINT
88#else
89#ifdef GOOS_plan9
90	WORD	$0xD1200070	// undefined instruction used as armv5 breakpoint in Plan 9
91#else
92	WORD	$0xe7f001f0	// undefined instruction that gdb understands is a software breakpoint
93#endif
94#endif
95	RET
96
97TEXT runtime·asminit(SB),NOSPLIT,$0-0
98	// disable runfast (flush-to-zero) mode of vfp if runtime.goarm > 5
99	MOVB	runtime·goarm(SB), R11
100	CMP	$5, R11
101	BLE	4(PC)
102	WORD	$0xeef1ba10	// vmrs r11, fpscr
103	BIC	$(1<<24), R11
104	WORD	$0xeee1ba10	// vmsr fpscr, r11
105	RET
106
107/*
108 *  go-routine
109 */
110
111// void gosave(Gobuf*)
112// save state in Gobuf; setjmp
113TEXT runtime·gosave(SB),NOSPLIT,$-4-4
114	MOVW	buf+0(FP), R0
115	MOVW	R13, gobuf_sp(R0)
116	MOVW	LR, gobuf_pc(R0)
117	MOVW	g, gobuf_g(R0)
118	MOVW	$0, R11
119	MOVW	R11, gobuf_lr(R0)
120	MOVW	R11, gobuf_ret(R0)
121	// Assert ctxt is zero. See func save.
122	MOVW	gobuf_ctxt(R0), R0
123	CMP	R0, R11
124	B.EQ	2(PC)
125	CALL	runtime·badctxt(SB)
126	RET
127
128// void gogo(Gobuf*)
129// restore state from Gobuf; longjmp
130TEXT runtime·gogo(SB),NOSPLIT,$8-4
131	MOVW	buf+0(FP), R1
132
133	// If ctxt is not nil, invoke deletion barrier before overwriting.
134	MOVW	gobuf_ctxt(R1), R0
135	CMP	$0, R0
136	B.EQ	nilctxt
137	MOVW	$gobuf_ctxt(R1), R0
138	MOVW	R0, 4(R13)
139	MOVW	$0, R0
140	MOVW	R0, 8(R13)
141	BL	runtime·writebarrierptr_prewrite(SB)
142	MOVW	buf+0(FP), R1
143
144nilctxt:
145	MOVW	gobuf_g(R1), R0
146	BL	setg<>(SB)
147
148	// NOTE: We updated g above, and we are about to update SP.
149	// Until LR and PC are also updated, the g/SP/LR/PC quadruple
150	// are out of sync and must not be used as the basis of a traceback.
151	// Sigprof skips the traceback when SP is not within g's bounds,
152	// and when the PC is inside this function, runtime.gogo.
153	// Since we are about to update SP, until we complete runtime.gogo
154	// we must not leave this function. In particular, no calls
155	// after this point: it must be straight-line code until the
156	// final B instruction.
157	// See large comment in sigprof for more details.
158	MOVW	gobuf_sp(R1), R13	// restore SP==R13
159	MOVW	gobuf_lr(R1), LR
160	MOVW	gobuf_ret(R1), R0
161	MOVW	gobuf_ctxt(R1), R7
162	MOVW	$0, R11
163	MOVW	R11, gobuf_sp(R1)	// clear to help garbage collector
164	MOVW	R11, gobuf_ret(R1)
165	MOVW	R11, gobuf_lr(R1)
166	MOVW	R11, gobuf_ctxt(R1)
167	MOVW	gobuf_pc(R1), R11
168	CMP	R11, R11 // set condition codes for == test, needed by stack split
169	B	(R11)
170
171// func mcall(fn func(*g))
172// Switch to m->g0's stack, call fn(g).
173// Fn must never return. It should gogo(&g->sched)
174// to keep running g.
175TEXT runtime·mcall(SB),NOSPLIT,$-4-4
176	// Save caller state in g->sched.
177	MOVW	R13, (g_sched+gobuf_sp)(g)
178	MOVW	LR, (g_sched+gobuf_pc)(g)
179	MOVW	$0, R11
180	MOVW	R11, (g_sched+gobuf_lr)(g)
181	MOVW	g, (g_sched+gobuf_g)(g)
182
183	// Switch to m->g0 & its stack, call fn.
184	MOVW	g, R1
185	MOVW	g_m(g), R8
186	MOVW	m_g0(R8), R0
187	BL	setg<>(SB)
188	CMP	g, R1
189	B.NE	2(PC)
190	B	runtime·badmcall(SB)
191	MOVB	runtime·iscgo(SB), R11
192	CMP	$0, R11
193	BL.NE	runtime·save_g(SB)
194	MOVW	fn+0(FP), R0
195	MOVW	(g_sched+gobuf_sp)(g), R13
196	SUB	$8, R13
197	MOVW	R1, 4(R13)
198	MOVW	R0, R7
199	MOVW	0(R0), R0
200	BL	(R0)
201	B	runtime·badmcall2(SB)
202	RET
203
204// systemstack_switch is a dummy routine that systemstack leaves at the bottom
205// of the G stack. We need to distinguish the routine that
206// lives at the bottom of the G stack from the one that lives
207// at the top of the system stack because the one at the top of
208// the system stack terminates the stack walk (see topofstack()).
209TEXT runtime·systemstack_switch(SB),NOSPLIT,$0-0
210	MOVW	$0, R0
211	BL	(R0) // clobber lr to ensure push {lr} is kept
212	RET
213
214// func systemstack(fn func())
215TEXT runtime·systemstack(SB),NOSPLIT,$0-4
216	MOVW	fn+0(FP), R0	// R0 = fn
217	MOVW	g_m(g), R1	// R1 = m
218
219	MOVW	m_gsignal(R1), R2	// R2 = gsignal
220	CMP	g, R2
221	B.EQ	noswitch
222
223	MOVW	m_g0(R1), R2	// R2 = g0
224	CMP	g, R2
225	B.EQ	noswitch
226
227	MOVW	m_curg(R1), R3
228	CMP	g, R3
229	B.EQ	switch
230
231	// Bad: g is not gsignal, not g0, not curg. What is it?
232	// Hide call from linker nosplit analysis.
233	MOVW	$runtime·badsystemstack(SB), R0
234	BL	(R0)
235
236switch:
237	// save our state in g->sched. Pretend to
238	// be systemstack_switch if the G stack is scanned.
239	MOVW	$runtime·systemstack_switch(SB), R3
240#ifdef GOOS_nacl
241	ADD	$4, R3, R3 // get past nacl-insert bic instruction
242#endif
243	ADD	$4, R3, R3 // get past push {lr}
244	MOVW	R3, (g_sched+gobuf_pc)(g)
245	MOVW	R13, (g_sched+gobuf_sp)(g)
246	MOVW	LR, (g_sched+gobuf_lr)(g)
247	MOVW	g, (g_sched+gobuf_g)(g)
248
249	// switch to g0
250	MOVW	R0, R5
251	MOVW	R2, R0
252	BL	setg<>(SB)
253	MOVW	R5, R0
254	MOVW	(g_sched+gobuf_sp)(R2), R3
255	// make it look like mstart called systemstack on g0, to stop traceback
256	SUB	$4, R3, R3
257	MOVW	$runtime·mstart(SB), R4
258	MOVW	R4, 0(R3)
259	MOVW	R3, R13
260
261	// call target function
262	MOVW	R0, R7
263	MOVW	0(R0), R0
264	BL	(R0)
265
266	// switch back to g
267	MOVW	g_m(g), R1
268	MOVW	m_curg(R1), R0
269	BL	setg<>(SB)
270	MOVW	(g_sched+gobuf_sp)(g), R13
271	MOVW	$0, R3
272	MOVW	R3, (g_sched+gobuf_sp)(g)
273	RET
274
275noswitch:
276	MOVW	R0, R7
277	MOVW	0(R0), R0
278	BL	(R0)
279	RET
280
281/*
282 * support for morestack
283 */
284
285// Called during function prolog when more stack is needed.
286// R1 frame size
287// R3 prolog's LR
288// NB. we do not save R0 because we've forced 5c to pass all arguments
289// on the stack.
290// using frame size $-4 means do not save LR on stack.
291//
292// The traceback routines see morestack on a g0 as being
293// the top of a stack (for example, morestack calling newstack
294// calling the scheduler calling newm calling gc), so we must
295// record an argument size. For that purpose, it has no arguments.
296TEXT runtime·morestack(SB),NOSPLIT,$-4-0
297	// Cannot grow scheduler stack (m->g0).
298	MOVW	g_m(g), R8
299	MOVW	m_g0(R8), R4
300	CMP	g, R4
301	BNE	3(PC)
302	BL	runtime·badmorestackg0(SB)
303	B	runtime·abort(SB)
304
305	// Cannot grow signal stack (m->gsignal).
306	MOVW	m_gsignal(R8), R4
307	CMP	g, R4
308	BNE	3(PC)
309	BL	runtime·badmorestackgsignal(SB)
310	B	runtime·abort(SB)
311
312	// Called from f.
313	// Set g->sched to context in f.
314	MOVW	R13, (g_sched+gobuf_sp)(g)
315	MOVW	LR, (g_sched+gobuf_pc)(g)
316	MOVW	R3, (g_sched+gobuf_lr)(g)
317	// newstack will fill gobuf.ctxt.
318
319	// Called from f.
320	// Set m->morebuf to f's caller.
321	MOVW	R3, (m_morebuf+gobuf_pc)(R8)	// f's caller's PC
322	MOVW	R13, (m_morebuf+gobuf_sp)(R8)	// f's caller's SP
323	MOVW	$4(R13), R3			// f's argument pointer
324	MOVW	g, (m_morebuf+gobuf_g)(R8)
325
326	// Call newstack on m->g0's stack.
327	MOVW	m_g0(R8), R0
328	BL	setg<>(SB)
329	MOVW	(g_sched+gobuf_sp)(g), R13
330	MOVW	$0, R0
331	MOVW.W	R0, -8(R13)	// create a call frame on g0
332	MOVW	R7, 4(R13)	// ctxt argument
333	BL	runtime·newstack(SB)
334
335	// Not reached, but make sure the return PC from the call to newstack
336	// is still in this function, and not the beginning of the next.
337	RET
338
339TEXT runtime·morestack_noctxt(SB),NOSPLIT,$-4-0
340	MOVW	$0, R7
341	B runtime·morestack(SB)
342
343TEXT runtime·stackBarrier(SB),NOSPLIT,$0
344	// We came here via a RET to an overwritten LR.
345	// R0 may be live. Other registers are available.
346
347	// Get the original return PC, g.stkbar[g.stkbarPos].savedLRVal.
348	MOVW	(g_stkbar+slice_array)(g), R4
349	MOVW	g_stkbarPos(g), R5
350	MOVW	$stkbar__size, R6
351	MUL	R5, R6
352	ADD	R4, R6
353	MOVW	stkbar_savedLRVal(R6), R6
354	// Record that this stack barrier was hit.
355	ADD	$1, R5
356	MOVW	R5, g_stkbarPos(g)
357	// Jump to the original return PC.
358	B	(R6)
359
360// reflectcall: call a function with the given argument list
361// func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
362// we don't have variable-sized frames, so we use a small number
363// of constant-sized-frame functions to encode a few bits of size in the pc.
364// Caution: ugly multiline assembly macros in your future!
365
366#define DISPATCH(NAME,MAXSIZE)		\
367	CMP	$MAXSIZE, R0;		\
368	B.HI	3(PC);			\
369	MOVW	$NAME(SB), R1;		\
370	B	(R1)
371
372TEXT reflect·call(SB), NOSPLIT, $0-0
373	B	·reflectcall(SB)
374
375TEXT ·reflectcall(SB),NOSPLIT,$-4-20
376	MOVW	argsize+12(FP), R0
377	DISPATCH(runtime·call16, 16)
378	DISPATCH(runtime·call32, 32)
379	DISPATCH(runtime·call64, 64)
380	DISPATCH(runtime·call128, 128)
381	DISPATCH(runtime·call256, 256)
382	DISPATCH(runtime·call512, 512)
383	DISPATCH(runtime·call1024, 1024)
384	DISPATCH(runtime·call2048, 2048)
385	DISPATCH(runtime·call4096, 4096)
386	DISPATCH(runtime·call8192, 8192)
387	DISPATCH(runtime·call16384, 16384)
388	DISPATCH(runtime·call32768, 32768)
389	DISPATCH(runtime·call65536, 65536)
390	DISPATCH(runtime·call131072, 131072)
391	DISPATCH(runtime·call262144, 262144)
392	DISPATCH(runtime·call524288, 524288)
393	DISPATCH(runtime·call1048576, 1048576)
394	DISPATCH(runtime·call2097152, 2097152)
395	DISPATCH(runtime·call4194304, 4194304)
396	DISPATCH(runtime·call8388608, 8388608)
397	DISPATCH(runtime·call16777216, 16777216)
398	DISPATCH(runtime·call33554432, 33554432)
399	DISPATCH(runtime·call67108864, 67108864)
400	DISPATCH(runtime·call134217728, 134217728)
401	DISPATCH(runtime·call268435456, 268435456)
402	DISPATCH(runtime·call536870912, 536870912)
403	DISPATCH(runtime·call1073741824, 1073741824)
404	MOVW	$runtime·badreflectcall(SB), R1
405	B	(R1)
406
407#define CALLFN(NAME,MAXSIZE)			\
408TEXT NAME(SB), WRAPPER, $MAXSIZE-20;		\
409	NO_LOCAL_POINTERS;			\
410	/* copy arguments to stack */		\
411	MOVW	argptr+8(FP), R0;		\
412	MOVW	argsize+12(FP), R2;		\
413	ADD	$4, R13, R1;			\
414	CMP	$0, R2;				\
415	B.EQ	5(PC);				\
416	MOVBU.P	1(R0), R5;			\
417	MOVBU.P R5, 1(R1);			\
418	SUB	$1, R2, R2;			\
419	B	-5(PC);				\
420	/* call function */			\
421	MOVW	f+4(FP), R7;			\
422	MOVW	(R7), R0;			\
423	PCDATA  $PCDATA_StackMapIndex, $0;	\
424	BL	(R0);				\
425	/* copy return values back */		\
426	MOVW	argtype+0(FP), R4;		\
427	MOVW	argptr+8(FP), R0;		\
428	MOVW	argsize+12(FP), R2;		\
429	MOVW	retoffset+16(FP), R3;		\
430	ADD	$4, R13, R1;			\
431	ADD	R3, R1;				\
432	ADD	R3, R0;				\
433	SUB	R3, R2;				\
434	BL	callRet<>(SB);			\
435	RET
436
437// callRet copies return values back at the end of call*. This is a
438// separate function so it can allocate stack space for the arguments
439// to reflectcallmove. It does not follow the Go ABI; it expects its
440// arguments in registers.
441TEXT callRet<>(SB), NOSPLIT, $16-0
442	MOVW	R4, 4(R13)
443	MOVW	R0, 8(R13)
444	MOVW	R1, 12(R13)
445	MOVW	R2, 16(R13)
446	BL	runtime·reflectcallmove(SB)
447	RET
448
449CALLFNcall16, 16)
450CALLFNcall32, 32)
451CALLFNcall64, 64)
452CALLFNcall128, 128)
453CALLFNcall256, 256)
454CALLFNcall512, 512)
455CALLFNcall1024, 1024)
456CALLFNcall2048, 2048)
457CALLFNcall4096, 4096)
458CALLFNcall8192, 8192)
459CALLFNcall16384, 16384)
460CALLFNcall32768, 32768)
461CALLFNcall65536, 65536)
462CALLFNcall131072, 131072)
463CALLFNcall262144, 262144)
464CALLFNcall524288, 524288)
465CALLFNcall1048576, 1048576)
466CALLFNcall2097152, 2097152)
467CALLFNcall4194304, 4194304)
468CALLFNcall8388608, 8388608)
469CALLFNcall16777216, 16777216)
470CALLFNcall33554432, 33554432)
471CALLFNcall67108864, 67108864)
472CALLFNcall134217728, 134217728)
473CALLFNcall268435456, 268435456)
474CALLFNcall536870912, 536870912)
475CALLFNcall1073741824, 1073741824)
476
477// void jmpdefer(fn, sp);
478// called from deferreturn.
479// 1. grab stored LR for caller
480// 2. sub 4 bytes to get back to BL deferreturn
481// 3. B to fn
482// TODO(rsc): Push things on stack and then use pop
483// to load all registers simultaneously, so that a profiling
484// interrupt can never see mismatched SP/LR/PC.
485// (And double-check that pop is atomic in that way.)
486TEXT runtime·jmpdefer(SB),NOSPLIT,$0-8
487	MOVW	0(R13), LR
488	MOVW	$-4(LR), LR	// BL deferreturn
489	MOVW	fv+0(FP), R7
490	MOVW	argp+4(FP), R13
491	MOVW	$-4(R13), R13	// SP is 4 below argp, due to saved LR
492	MOVW	0(R7), R1
493	B	(R1)
494
495// Save state of caller into g->sched. Smashes R11.
496TEXT gosave<>(SB),NOSPLIT,$-4
497	MOVW	LR, (g_sched+gobuf_pc)(g)
498	MOVW	R13, (g_sched+gobuf_sp)(g)
499	MOVW	$0, R11
500	MOVW	R11, (g_sched+gobuf_lr)(g)
501	MOVW	R11, (g_sched+gobuf_ret)(g)
502	MOVW	R11, (g_sched+gobuf_ctxt)(g)
503	// Assert ctxt is zero. See func save.
504	MOVW	(g_sched+gobuf_ctxt)(g), R11
505	CMP	$0, R11
506	B.EQ	2(PC)
507	CALL	runtime·badctxt(SB)
508	RET
509
510// func asmcgocall(fn, arg unsafe.Pointer) int32
511// Call fn(arg) on the scheduler stack,
512// aligned appropriately for the gcc ABI.
513// See cgocall.go for more details.
514TEXT ·asmcgocall(SB),NOSPLIT,$0-12
515	MOVW	fn+0(FP), R1
516	MOVW	arg+4(FP), R0
517
518	MOVW	R13, R2
519	MOVW	g, R4
520
521	// Figure out if we need to switch to m->g0 stack.
522	// We get called to create new OS threads too, and those
523	// come in on the m->g0 stack already.
524	MOVW	g_m(g), R8
525	MOVW	m_g0(R8), R3
526	CMP	R3, g
527	BEQ	g0
528	BL	gosave<>(SB)
529	MOVW	R0, R5
530	MOVW	R3, R0
531	BL	setg<>(SB)
532	MOVW	R5, R0
533	MOVW	(g_sched+gobuf_sp)(g), R13
534
535	// Now on a scheduling stack (a pthread-created stack).
536g0:
537	SUB	$24, R13
538	BIC	$0x7, R13	// alignment for gcc ABI
539	MOVW	R4, 20(R13) // save old g
540	MOVW	(g_stack+stack_hi)(R4), R4
541	SUB	R2, R4
542	MOVW	R4, 16(R13)	// save depth in stack (can't just save SP, as stack might be copied during a callback)
543	BL	(R1)
544
545	// Restore registers, g, stack pointer.
546	MOVW	R0, R5
547	MOVW	20(R13), R0
548	BL	setg<>(SB)
549	MOVW	(g_stack+stack_hi)(g), R1
550	MOVW	16(R13), R2
551	SUB	R2, R1
552	MOVW	R5, R0
553	MOVW	R1, R13
554
555	MOVW	R0, ret+8(FP)
556	RET
557
558// cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt)
559// Turn the fn into a Go func (by taking its address) and call
560// cgocallback_gofunc.
561TEXT runtime·cgocallback(SB),NOSPLIT,$16-16
562	MOVW	$fn+0(FP), R0
563	MOVW	R0, 4(R13)
564	MOVW	frame+4(FP), R0
565	MOVW	R0, 8(R13)
566	MOVW	framesize+8(FP), R0
567	MOVW	R0, 12(R13)
568	MOVW	ctxt+12(FP), R0
569	MOVW	R0, 16(R13)
570	MOVW	$runtime·cgocallback_gofunc(SB), R0
571	BL	(R0)
572	RET
573
574// cgocallback_gofunc(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt)
575// See cgocall.go for more details.
576TEXT	·cgocallback_gofunc(SB),NOSPLIT,$8-16
577	NO_LOCAL_POINTERS
578
579	// Load m and g from thread-local storage.
580	MOVB	runtime·iscgo(SB), R0
581	CMP	$0, R0
582	BL.NE	runtime·load_g(SB)
583
584	// If g is nil, Go did not create the current thread.
585	// Call needm to obtain one for temporary use.
586	// In this case, we're running on the thread stack, so there's
587	// lots of space, but the linker doesn't know. Hide the call from
588	// the linker analysis by using an indirect call.
589	CMP	$0, g
590	B.EQ	needm
591
592	MOVW	g_m(g), R8
593	MOVW	R8, savedm-4(SP)
594	B	havem
595
596needm:
597	MOVW	g, savedm-4(SP) // g is zero, so is m.
598	MOVW	$runtime·needm(SB), R0
599	BL	(R0)
600
601	// Set m->sched.sp = SP, so that if a panic happens
602	// during the function we are about to execute, it will
603	// have a valid SP to run on the g0 stack.
604	// The next few lines (after the havem label)
605	// will save this SP onto the stack and then write
606	// the same SP back to m->sched.sp. That seems redundant,
607	// but if an unrecovered panic happens, unwindm will
608	// restore the g->sched.sp from the stack location
609	// and then systemstack will try to use it. If we don't set it here,
610	// that restored SP will be uninitialized (typically 0) and
611	// will not be usable.
612	MOVW	g_m(g), R8
613	MOVW	m_g0(R8), R3
614	MOVW	R13, (g_sched+gobuf_sp)(R3)
615
616havem:
617	// Now there's a valid m, and we're running on its m->g0.
618	// Save current m->g0->sched.sp on stack and then set it to SP.
619	// Save current sp in m->g0->sched.sp in preparation for
620	// switch back to m->curg stack.
621	// NOTE: unwindm knows that the saved g->sched.sp is at 4(R13) aka savedsp-8(SP).
622	MOVW	m_g0(R8), R3
623	MOVW	(g_sched+gobuf_sp)(R3), R4
624	MOVW	R4, savedsp-8(SP)
625	MOVW	R13, (g_sched+gobuf_sp)(R3)
626
627	// Switch to m->curg stack and call runtime.cgocallbackg.
628	// Because we are taking over the execution of m->curg
629	// but *not* resuming what had been running, we need to
630	// save that information (m->curg->sched) so we can restore it.
631	// We can restore m->curg->sched.sp easily, because calling
632	// runtime.cgocallbackg leaves SP unchanged upon return.
633	// To save m->curg->sched.pc, we push it onto the stack.
634	// This has the added benefit that it looks to the traceback
635	// routine like cgocallbackg is going to return to that
636	// PC (because the frame we allocate below has the same
637	// size as cgocallback_gofunc's frame declared above)
638	// so that the traceback will seamlessly trace back into
639	// the earlier calls.
640	//
641	// In the new goroutine, -4(SP) is unused (where SP refers to
642	// m->curg's SP while we're setting it up, before we've adjusted it).
643	MOVW	m_curg(R8), R0
644	BL	setg<>(SB)
645	MOVW	(g_sched+gobuf_sp)(g), R4 // prepare stack as R4
646	MOVW	(g_sched+gobuf_pc)(g), R5
647	MOVW	R5, -12(R4)
648	MOVW	ctxt+12(FP), R0
649	MOVW	R0, -8(R4)
650	MOVW	$-12(R4), R13
651	BL	runtime·cgocallbackg(SB)
652
653	// Restore g->sched (== m->curg->sched) from saved values.
654	MOVW	0(R13), R5
655	MOVW	R5, (g_sched+gobuf_pc)(g)
656	MOVW	$12(R13), R4
657	MOVW	R4, (g_sched+gobuf_sp)(g)
658
659	// Switch back to m->g0's stack and restore m->g0->sched.sp.
660	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
661	// so we do not have to restore it.)
662	MOVW	g_m(g), R8
663	MOVW	m_g0(R8), R0
664	BL	setg<>(SB)
665	MOVW	(g_sched+gobuf_sp)(g), R13
666	MOVW	savedsp-8(SP), R4
667	MOVW	R4, (g_sched+gobuf_sp)(g)
668
669	// If the m on entry was nil, we called needm above to borrow an m
670	// for the duration of the call. Since the call is over, return it with dropm.
671	MOVW	savedm-4(SP), R6
672	CMP	$0, R6
673	B.NE	3(PC)
674	MOVW	$runtime·dropm(SB), R0
675	BL	(R0)
676
677	// Done!
678	RET
679
680// void setg(G*); set g. for use by needm.
681TEXT runtime·setg(SB),NOSPLIT,$-4-4
682	MOVW	gg+0(FP), R0
683	B	setg<>(SB)
684
685TEXT setg<>(SB),NOSPLIT,$-4-0
686	MOVW	R0, g
687
688	// Save g to thread-local storage.
689	MOVB	runtime·iscgo(SB), R0
690	CMP	$0, R0
691	B.EQ	2(PC)
692	B	runtime·save_g(SB)
693
694	MOVW	g, R0
695	RET
696
697TEXT runtime·getcallerpc(SB),NOSPLIT,$4-8
698	MOVW	8(R13), R0		// LR saved by caller
699	MOVW	runtime·stackBarrierPC(SB), R1
700	CMP	R0, R1
701	BNE	nobar
702	// Get original return PC.
703	BL	runtime·nextBarrierPC(SB)
704	MOVW	4(R13), R0
705nobar:
706	MOVW	R0, ret+4(FP)
707	RET
708
709TEXT runtime·setcallerpc(SB),NOSPLIT,$4-8
710	MOVW	pc+4(FP), R0
711	MOVW	8(R13), R1
712	MOVW	runtime·stackBarrierPC(SB), R2
713	CMP	R1, R2
714	BEQ	setbar
715	MOVW	R0, 8(R13)		// set LR in caller
716	RET
717setbar:
718	// Set the stack barrier return PC.
719	MOVW	R0, 4(R13)
720	BL	runtime·setNextBarrierPC(SB)
721	RET
722
723TEXT runtime·emptyfunc(SB),0,$0-0
724	RET
725
726TEXT runtime·abort(SB),NOSPLIT,$-4-0
727	MOVW	$0, R0
728	MOVW	(R0), R1
729
730// armPublicationBarrier is a native store/store barrier for ARMv7+.
731// On earlier ARM revisions, armPublicationBarrier is a no-op.
732// This will not work on SMP ARMv6 machines, if any are in use.
733// To implement publicationBarrier in sys_$GOOS_arm.s using the native
734// instructions, use:
735//
736//	TEXT ·publicationBarrier(SB),NOSPLIT,$-4-0
737//		B	runtime·armPublicationBarrier(SB)
738//
739TEXT runtime·armPublicationBarrier(SB),NOSPLIT,$-4-0
740	MOVB	runtime·goarm(SB), R11
741	CMP	$7, R11
742	BLT	2(PC)
743	WORD $0xf57ff05e	// DMB ST
744	RET
745
746// AES hashing not implemented for ARM
747TEXT runtime·aeshash(SB),NOSPLIT,$-4-0
748	MOVW	$0, R0
749	MOVW	(R0), R1
750TEXT runtime·aeshash32(SB),NOSPLIT,$-4-0
751	MOVW	$0, R0
752	MOVW	(R0), R1
753TEXT runtime·aeshash64(SB),NOSPLIT,$-4-0
754	MOVW	$0, R0
755	MOVW	(R0), R1
756TEXT runtime·aeshashstr(SB),NOSPLIT,$-4-0
757	MOVW	$0, R0
758	MOVW	(R0), R1
759
760// memhash_varlen(p unsafe.Pointer, h seed) uintptr
761// redirects to memhash(p, h, size) using the size
762// stored in the closure.
763TEXT runtime·memhash_varlen(SB),NOSPLIT,$16-12
764	GO_ARGS
765	NO_LOCAL_POINTERS
766	MOVW	p+0(FP), R0
767	MOVW	h+4(FP), R1
768	MOVW	4(R7), R2
769	MOVW	R0, 4(R13)
770	MOVW	R1, 8(R13)
771	MOVW	R2, 12(R13)
772	BL	runtime·memhash(SB)
773	MOVW	16(R13), R0
774	MOVW	R0, ret+8(FP)
775	RET
776
777// memequal(p, q unsafe.Pointer, size uintptr) bool
778TEXT runtime·memequal(SB),NOSPLIT,$-4-13
779	MOVW	a+0(FP), R1
780	MOVW	b+4(FP), R2
781	MOVW	size+8(FP), R3
782	ADD	R1, R3, R6
783	MOVW	$1, R0
784	MOVB	R0, ret+12(FP)
785	CMP	R1, R2
786	RET.EQ
787loop:
788	CMP	R1, R6
789	RET.EQ
790	MOVBU.P	1(R1), R4
791	MOVBU.P	1(R2), R5
792	CMP	R4, R5
793	BEQ	loop
794
795	MOVW	$0, R0
796	MOVB	R0, ret+12(FP)
797	RET
798
799// memequal_varlen(a, b unsafe.Pointer) bool
800TEXT runtime·memequal_varlen(SB),NOSPLIT,$16-9
801	MOVW	a+0(FP), R0
802	MOVW	b+4(FP), R1
803	CMP	R0, R1
804	BEQ	eq
805	MOVW	4(R7), R2    // compiler stores size at offset 4 in the closure
806	MOVW	R0, 4(R13)
807	MOVW	R1, 8(R13)
808	MOVW	R2, 12(R13)
809	BL	runtime·memequal(SB)
810	MOVB	16(R13), R0
811	MOVB	R0, ret+8(FP)
812	RET
813eq:
814	MOVW	$1, R0
815	MOVB	R0, ret+8(FP)
816	RET
817
818TEXT runtime·cmpstring(SB),NOSPLIT,$-4-20
819	MOVW	s1_base+0(FP), R2
820	MOVW	s1_len+4(FP), R0
821	MOVW	s2_base+8(FP), R3
822	MOVW	s2_len+12(FP), R1
823	ADD	$20, R13, R7
824	B	runtime·cmpbody(SB)
825
826TEXT bytes·Compare(SB),NOSPLIT,$-4-28
827	MOVW	s1+0(FP), R2
828	MOVW	s1+4(FP), R0
829	MOVW	s2+12(FP), R3
830	MOVW	s2+16(FP), R1
831	ADD	$28, R13, R7
832	B	runtime·cmpbody(SB)
833
834// On entry:
835// R0 is the length of s1
836// R1 is the length of s2
837// R2 points to the start of s1
838// R3 points to the start of s2
839// R7 points to return value (-1/0/1 will be written here)
840//
841// On exit:
842// R4, R5, and R6 are clobbered
843TEXT runtime·cmpbody(SB),NOSPLIT,$-4-0
844	CMP	R2, R3
845	BEQ	samebytes
846	CMP 	R0, R1
847	MOVW 	R0, R6
848	MOVW.LT	R1, R6	// R6 is min(R0, R1)
849
850	ADD	R2, R6	// R2 is current byte in s1, R6 is last byte in s1 to compare
851loop:
852	CMP	R2, R6
853	BEQ	samebytes // all compared bytes were the same; compare lengths
854	MOVBU.P	1(R2), R4
855	MOVBU.P	1(R3), R5
856	CMP	R4, R5
857	BEQ	loop
858	// bytes differed
859	MOVW.LT	$1, R0
860	MOVW.GT	$-1, R0
861	MOVW	R0, (R7)
862	RET
863samebytes:
864	CMP	R0, R1
865	MOVW.LT	$1, R0
866	MOVW.GT	$-1, R0
867	MOVW.EQ	$0, R0
868	MOVW	R0, (R7)
869	RET
870
871// eqstring tests whether two strings are equal.
872// The compiler guarantees that strings passed
873// to eqstring have equal length.
874// See runtime_test.go:eqstring_generic for
875// equivalent Go code.
876TEXT runtime·eqstring(SB),NOSPLIT,$-4-17
877	MOVW	s1_base+0(FP), R2
878	MOVW	s2_base+8(FP), R3
879	MOVW	$1, R8
880	MOVB	R8, ret+16(FP)
881	CMP	R2, R3
882	RET.EQ
883	MOVW	s1_len+4(FP), R0
884	ADD	R2, R0, R6
885loop:
886	CMP	R2, R6
887	RET.EQ
888	MOVBU.P	1(R2), R4
889	MOVBU.P	1(R3), R5
890	CMP	R4, R5
891	BEQ	loop
892	MOVW	$0, R8
893	MOVB	R8, ret+16(FP)
894	RET
895
896// TODO: share code with memequal?
897TEXT bytes·Equal(SB),NOSPLIT,$0-25
898	MOVW	a_len+4(FP), R1
899	MOVW	b_len+16(FP), R3
900
901	CMP	R1, R3		// unequal lengths are not equal
902	B.NE	notequal
903
904	MOVW	a+0(FP), R0
905	MOVW	b+12(FP), R2
906	ADD	R0, R1		// end
907
908loop:
909	CMP	R0, R1
910	B.EQ	equal		// reached the end
911	MOVBU.P	1(R0), R4
912	MOVBU.P	1(R2), R5
913	CMP	R4, R5
914	B.EQ	loop
915
916notequal:
917	MOVW	$0, R0
918	MOVBU	R0, ret+24(FP)
919	RET
920
921equal:
922	MOVW	$1, R0
923	MOVBU	R0, ret+24(FP)
924	RET
925
926TEXT bytes·IndexByte(SB),NOSPLIT,$0-20
927	MOVW	s+0(FP), R0
928	MOVW	s_len+4(FP), R1
929	MOVBU	c+12(FP), R2	// byte to find
930	MOVW	R0, R4		// store base for later
931	ADD	R0, R1		// end
932
933_loop:
934	CMP	R0, R1
935	B.EQ	_notfound
936	MOVBU.P	1(R0), R3
937	CMP	R2, R3
938	B.NE	_loop
939
940	SUB	$1, R0		// R0 will be one beyond the position we want
941	SUB	R4, R0		// remove base
942	MOVW    R0, ret+16(FP)
943	RET
944
945_notfound:
946	MOVW	$-1, R0
947	MOVW	R0, ret+16(FP)
948	RET
949
950TEXT strings·IndexByte(SB),NOSPLIT,$0-16
951	MOVW	s+0(FP), R0
952	MOVW	s_len+4(FP), R1
953	MOVBU	c+8(FP), R2	// byte to find
954	MOVW	R0, R4		// store base for later
955	ADD	R0, R1		// end
956
957_sib_loop:
958	CMP	R0, R1
959	B.EQ	_sib_notfound
960	MOVBU.P	1(R0), R3
961	CMP	R2, R3
962	B.NE	_sib_loop
963
964	SUB	$1, R0		// R0 will be one beyond the position we want
965	SUB	R4, R0		// remove base
966	MOVW	R0, ret+12(FP)
967	RET
968
969_sib_notfound:
970	MOVW	$-1, R0
971	MOVW	R0, ret+12(FP)
972	RET
973
974TEXT runtime·fastrand(SB),NOSPLIT,$-4-4
975	MOVW	g_m(g), R1
976	MOVW	m_fastrand(R1), R0
977	ADD.S	R0, R0
978	EOR.MI	$0x88888eef, R0
979	MOVW	R0, m_fastrand(R1)
980	MOVW	R0, ret+0(FP)
981	RET
982
983TEXT runtime·return0(SB),NOSPLIT,$0
984	MOVW	$0, R0
985	RET
986
987TEXT runtime·procyield(SB),NOSPLIT,$-4
988	MOVW	cycles+0(FP), R1
989	MOVW	$0, R0
990yieldloop:
991	CMP	R0, R1
992	B.NE	2(PC)
993	RET
994	SUB	$1, R1
995	B yieldloop
996
997// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
998// Must obey the gcc calling convention.
999TEXT _cgo_topofstack(SB),NOSPLIT,$8
1000	// R11 and g register are clobbered by load_g. They are
1001	// callee-save in the gcc calling convention, so save them here.
1002	MOVW	R11, saveR11-4(SP)
1003	MOVW	g, saveG-8(SP)
1004
1005	BL	runtime·load_g(SB)
1006	MOVW	g_m(g), R0
1007	MOVW	m_curg(R0), R0
1008	MOVW	(g_stack+stack_hi)(R0), R0
1009
1010	MOVW	saveG-8(SP), g
1011	MOVW	saveR11-4(SP), R11
1012	RET
1013
1014// The top-most function running on a goroutine
1015// returns to goexit+PCQuantum.
1016TEXT runtime·goexit(SB),NOSPLIT,$-4-0
1017	MOVW	R0, R0	// NOP
1018	BL	runtime·goexit1(SB)	// does not return
1019	// traceback from goexit1 must hit code range of goexit
1020	MOVW	R0, R0	// NOP
1021
1022TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4
1023	RET
1024
1025TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4
1026	RET
1027
1028TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4
1029	RET
1030
1031TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4
1032	RET
1033
1034// x -> x/1000000, x%1000000, called from Go with args, results on stack.
1035TEXT runtime·usplit(SB),NOSPLIT,$0-12
1036	MOVW	x+0(FP), R0
1037	CALL	runtime·usplitR0(SB)
1038	MOVW	R0, q+4(FP)
1039	MOVW	R1, r+8(FP)
1040	RET
1041
1042// R0, R1 = R0/1000000, R0%1000000
1043TEXT runtime·usplitR0(SB),NOSPLIT,$0
1044	// magic multiply to avoid software divide without available m.
1045	// see output of go tool compile -S for x/1000000.
1046	MOVW	R0, R3
1047	MOVW	$1125899907, R1
1048	MULLU	R1, R0, (R0, R1)
1049	MOVW	R0>>18, R0
1050	MOVW	$1000000, R1
1051	MULU	R0, R1
1052	SUB	R1, R3, R1
1053	RET
1054
1055TEXT runtime·sigreturn(SB),NOSPLIT,$0-0
1056	RET
1057
1058#ifndef GOOS_nacl
1059// This is called from .init_array and follows the platform, not Go, ABI.
1060TEXT runtime·addmoduledata(SB),NOSPLIT,$0-4
1061	MOVW	R9, saver9-4(SP) // The access to global variables below implicitly uses R9, which is callee-save
1062	MOVW	runtime·lastmoduledatap(SB), R1
1063	MOVW	R0, moduledata_next(R1)
1064	MOVW	R0, runtime·lastmoduledatap(SB)
1065	MOVW	saver9-4(SP), R9
1066	RET
1067#endif
1068
1069TEXT ·checkASM(SB),NOSPLIT,$0-1
1070	MOVW	$1, R3
1071	MOVB	R3, ret+0(FP)
1072	RET
1073