1/*
2 *  PowerPC version
3 *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
4 *  Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
5 *    Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
6 *  Adapted for Power Macintosh by Paul Mackerras.
7 *  Low-level exception handlers and MMU support
8 *  rewritten by Paul Mackerras.
9 *    Copyright (C) 1996 Paul Mackerras.
10 *  MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
11 *
12 *  This file contains the system call entry code, context switch
13 *  code, and exception/interrupt return code for PowerPC.
14 *
15 *  This program is free software; you can redistribute it and/or
16 *  modify it under the terms of the GNU General Public License
17 *  as published by the Free Software Foundation; either version
18 *  2 of the License, or (at your option) any later version.
19 */
20
21#include <linux/errno.h>
22#include <asm/unistd.h>
23#include <asm/processor.h>
24#include <asm/page.h>
25#include <asm/mmu.h>
26#include <asm/thread_info.h>
27#include <asm/ppc_asm.h>
28#include <asm/asm-offsets.h>
29#include <asm/cputable.h>
30#include <asm/firmware.h>
31#include <asm/bug.h>
32#include <asm/ptrace.h>
33#include <asm/irqflags.h>
34#include <asm/ftrace.h>
35#include <asm/hw_irq.h>
36
37/*
38 * System calls.
39 */
40	.section	".toc","aw"
41.SYS_CALL_TABLE:
42	.tc .sys_call_table[TC],.sys_call_table
43
44/* This value is used to mark exception frames on the stack. */
45exception_marker:
46	.tc	ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER
47
48	.section	".text"
49	.align 7
50
51#undef SHOW_SYSCALLS
52
53	.globl system_call_common
54system_call_common:
55	andi.	r10,r12,MSR_PR
56	mr	r10,r1
57	addi	r1,r1,-INT_FRAME_SIZE
58	beq-	1f
59	ld	r1,PACAKSAVE(r13)
601:	std	r10,0(r1)
61	std	r11,_NIP(r1)
62	std	r12,_MSR(r1)
63	std	r0,GPR0(r1)
64	std	r10,GPR1(r1)
65	ACCOUNT_CPU_USER_ENTRY(r10, r11)
66	/*
67	 * This "crclr so" clears CR0.SO, which is the error indication on
68	 * return from this system call.  There must be no cmp instruction
69	 * between it and the "mfcr r9" below, otherwise if XER.SO is set,
70	 * CR0.SO will get set, causing all system calls to appear to fail.
71	 */
72	crclr	so
73	std	r2,GPR2(r1)
74	std	r3,GPR3(r1)
75	std	r4,GPR4(r1)
76	std	r5,GPR5(r1)
77	std	r6,GPR6(r1)
78	std	r7,GPR7(r1)
79	std	r8,GPR8(r1)
80	li	r11,0
81	std	r11,GPR9(r1)
82	std	r11,GPR10(r1)
83	std	r11,GPR11(r1)
84	std	r11,GPR12(r1)
85	std	r9,GPR13(r1)
86	mfcr	r9
87	mflr	r10
88	li	r11,0xc01
89	std	r9,_CCR(r1)
90	std	r10,_LINK(r1)
91	std	r11,_TRAP(r1)
92	mfxer	r9
93	mfctr	r10
94	std	r9,_XER(r1)
95	std	r10,_CTR(r1)
96	std	r3,ORIG_GPR3(r1)
97	ld	r2,PACATOC(r13)
98	addi	r9,r1,STACK_FRAME_OVERHEAD
99	ld	r11,exception_marker@toc(r2)
100	std	r11,-16(r9)		/* "regshere" marker */
101#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR)
102BEGIN_FW_FTR_SECTION
103	beq	33f
104	/* if from user, see if there are any DTL entries to process */
105	ld	r10,PACALPPACAPTR(r13)	/* get ptr to VPA */
106	ld	r11,PACA_DTL_RIDX(r13)	/* get log read index */
107	ld	r10,LPPACA_DTLIDX(r10)	/* get log write index */
108	cmpd	cr1,r11,r10
109	beq+	cr1,33f
110	bl	.accumulate_stolen_time
111	REST_GPR(0,r1)
112	REST_4GPRS(3,r1)
113	REST_2GPRS(7,r1)
114	addi	r9,r1,STACK_FRAME_OVERHEAD
11533:
116END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
117#endif /* CONFIG_VIRT_CPU_ACCOUNTING && CONFIG_PPC_SPLPAR */
118
119	/*
120	 * A syscall should always be called with interrupts enabled
121	 * so we just unconditionally hard-enable here. When some kind
122	 * of irq tracing is used, we additionally check that condition
123	 * is correct
124	 */
125#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_BUG)
126	lbz	r10,PACASOFTIRQEN(r13)
127	xori	r10,r10,1
1281:	tdnei	r10,0
129	EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
130#endif
131
132#ifdef CONFIG_PPC_BOOK3E
133	wrteei	1
134#else
135	ld	r11,PACAKMSR(r13)
136	ori	r11,r11,MSR_EE
137	mtmsrd	r11,1
138#endif /* CONFIG_PPC_BOOK3E */
139
140	/* We do need to set SOFTE in the stack frame or the return
141	 * from interrupt will be painful
142	 */
143	li	r10,1
144	std	r10,SOFTE(r1)
145
146#ifdef SHOW_SYSCALLS
147	bl	.do_show_syscall
148	REST_GPR(0,r1)
149	REST_4GPRS(3,r1)
150	REST_2GPRS(7,r1)
151	addi	r9,r1,STACK_FRAME_OVERHEAD
152#endif
153	clrrdi	r11,r1,THREAD_SHIFT
154	ld	r10,TI_FLAGS(r11)
155	andi.	r11,r10,_TIF_SYSCALL_T_OR_A
156	bne-	syscall_dotrace
157syscall_dotrace_cont:
158	cmpldi	0,r0,NR_syscalls
159	bge-	syscall_enosys
160
161system_call:			/* label this so stack traces look sane */
162/*
163 * Need to vector to 32 Bit or default sys_call_table here,
164 * based on caller's run-mode / personality.
165 */
166	ld	r11,.SYS_CALL_TABLE@toc(2)
167	andi.	r10,r10,_TIF_32BIT
168	beq	15f
169	addi	r11,r11,8	/* use 32-bit syscall entries */
170	clrldi	r3,r3,32
171	clrldi	r4,r4,32
172	clrldi	r5,r5,32
173	clrldi	r6,r6,32
174	clrldi	r7,r7,32
175	clrldi	r8,r8,32
17615:
177	slwi	r0,r0,4
178	ldx	r10,r11,r0	/* Fetch system call handler [ptr] */
179	mtctr   r10
180	bctrl			/* Call handler */
181
182syscall_exit:
183	std	r3,RESULT(r1)
184#ifdef SHOW_SYSCALLS
185	bl	.do_show_syscall_exit
186	ld	r3,RESULT(r1)
187#endif
188	clrrdi	r12,r1,THREAD_SHIFT
189
190	ld	r8,_MSR(r1)
191#ifdef CONFIG_PPC_BOOK3S
192	/* No MSR:RI on BookE */
193	andi.	r10,r8,MSR_RI
194	beq-	unrecov_restore
195#endif
196	/*
197	 * Disable interrupts so current_thread_info()->flags can't change,
198	 * and so that we don't get interrupted after loading SRR0/1.
199	 */
200#ifdef CONFIG_PPC_BOOK3E
201	wrteei	0
202#else
203	ld	r10,PACAKMSR(r13)
204	mtmsrd	r10,1
205#endif /* CONFIG_PPC_BOOK3E */
206
207	ld	r9,TI_FLAGS(r12)
208	li	r11,-_LAST_ERRNO
209	andi.	r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
210	bne-	syscall_exit_work
211	cmpld	r3,r11
212	ld	r5,_CCR(r1)
213	bge-	syscall_error
214syscall_error_cont:
215	ld	r7,_NIP(r1)
216BEGIN_FTR_SECTION
217	stdcx.	r0,0,r1			/* to clear the reservation */
218END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
219	andi.	r6,r8,MSR_PR
220	ld	r4,_LINK(r1)
221	/*
222	 * Clear RI before restoring r13.  If we are returning to
223	 * userspace and we take an exception after restoring r13,
224	 * we end up corrupting the userspace r13 value.
225	 */
226#ifdef CONFIG_PPC_BOOK3S
227	/* No MSR:RI on BookE */
228	li	r12,MSR_RI
229	andc	r11,r10,r12
230	mtmsrd	r11,1			/* clear MSR.RI */
231#endif /* CONFIG_PPC_BOOK3S */
232
233	beq-	1f
234	ACCOUNT_CPU_USER_EXIT(r11, r12)
235	ld	r13,GPR13(r1)	/* only restore r13 if returning to usermode */
2361:	ld	r2,GPR2(r1)
237	ld	r1,GPR1(r1)
238	mtlr	r4
239	mtcr	r5
240	mtspr	SPRN_SRR0,r7
241	mtspr	SPRN_SRR1,r8
242	RFI
243	b	.	/* prevent speculative execution */
244
245syscall_error:
246	oris	r5,r5,0x1000	/* Set SO bit in CR */
247	neg	r3,r3
248	std	r5,_CCR(r1)
249	b	syscall_error_cont
250
251/* Traced system call support */
252syscall_dotrace:
253	bl	.save_nvgprs
254	addi	r3,r1,STACK_FRAME_OVERHEAD
255	bl	.do_syscall_trace_enter
256	/*
257	 * Restore argument registers possibly just changed.
258	 * We use the return value of do_syscall_trace_enter
259	 * for the call number to look up in the table (r0).
260	 */
261	mr	r0,r3
262	ld	r3,GPR3(r1)
263	ld	r4,GPR4(r1)
264	ld	r5,GPR5(r1)
265	ld	r6,GPR6(r1)
266	ld	r7,GPR7(r1)
267	ld	r8,GPR8(r1)
268	addi	r9,r1,STACK_FRAME_OVERHEAD
269	clrrdi	r10,r1,THREAD_SHIFT
270	ld	r10,TI_FLAGS(r10)
271	b	syscall_dotrace_cont
272
273syscall_enosys:
274	li	r3,-ENOSYS
275	b	syscall_exit
276
277syscall_exit_work:
278	/* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr.
279	 If TIF_NOERROR is set, just save r3 as it is. */
280
281	andi.	r0,r9,_TIF_RESTOREALL
282	beq+	0f
283	REST_NVGPRS(r1)
284	b	2f
2850:	cmpld	r3,r11		/* r10 is -LAST_ERRNO */
286	blt+	1f
287	andi.	r0,r9,_TIF_NOERROR
288	bne-	1f
289	ld	r5,_CCR(r1)
290	neg	r3,r3
291	oris	r5,r5,0x1000	/* Set SO bit in CR */
292	std	r5,_CCR(r1)
2931:	std	r3,GPR3(r1)
2942:	andi.	r0,r9,(_TIF_PERSYSCALL_MASK)
295	beq	4f
296
297	/* Clear per-syscall TIF flags if any are set.  */
298
299	li	r11,_TIF_PERSYSCALL_MASK
300	addi	r12,r12,TI_FLAGS
3013:	ldarx	r10,0,r12
302	andc	r10,r10,r11
303	stdcx.	r10,0,r12
304	bne-	3b
305	subi	r12,r12,TI_FLAGS
306
3074:	/* Anything else left to do? */
308	andi.	r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP)
309	beq	.ret_from_except_lite
310
311	/* Re-enable interrupts */
312#ifdef CONFIG_PPC_BOOK3E
313	wrteei	1
314#else
315	ld	r10,PACAKMSR(r13)
316	ori	r10,r10,MSR_EE
317	mtmsrd	r10,1
318#endif /* CONFIG_PPC_BOOK3E */
319
320	bl	.save_nvgprs
321	addi	r3,r1,STACK_FRAME_OVERHEAD
322	bl	.do_syscall_trace_leave
323	b	.ret_from_except
324
325/* Save non-volatile GPRs, if not already saved. */
326_GLOBAL(save_nvgprs)
327	ld	r11,_TRAP(r1)
328	andi.	r0,r11,1
329	beqlr-
330	SAVE_NVGPRS(r1)
331	clrrdi	r0,r11,1
332	std	r0,_TRAP(r1)
333	blr
334
335
336/*
337 * The sigsuspend and rt_sigsuspend system calls can call do_signal
338 * and thus put the process into the stopped state where we might
339 * want to examine its user state with ptrace.  Therefore we need
340 * to save all the nonvolatile registers (r14 - r31) before calling
341 * the C code.  Similarly, fork, vfork and clone need the full
342 * register state on the stack so that it can be copied to the child.
343 */
344
345_GLOBAL(ppc_fork)
346	bl	.save_nvgprs
347	bl	.sys_fork
348	b	syscall_exit
349
350_GLOBAL(ppc_vfork)
351	bl	.save_nvgprs
352	bl	.sys_vfork
353	b	syscall_exit
354
355_GLOBAL(ppc_clone)
356	bl	.save_nvgprs
357	bl	.sys_clone
358	b	syscall_exit
359
360_GLOBAL(ppc32_swapcontext)
361	bl	.save_nvgprs
362	bl	.compat_sys_swapcontext
363	b	syscall_exit
364
365_GLOBAL(ppc64_swapcontext)
366	bl	.save_nvgprs
367	bl	.sys_swapcontext
368	b	syscall_exit
369
370_GLOBAL(ret_from_fork)
371	bl	.schedule_tail
372	REST_NVGPRS(r1)
373	li	r3,0
374	b	syscall_exit
375
376/*
377 * This routine switches between two different tasks.  The process
378 * state of one is saved on its kernel stack.  Then the state
379 * of the other is restored from its kernel stack.  The memory
380 * management hardware is updated to the second process's state.
381 * Finally, we can return to the second process, via ret_from_except.
382 * On entry, r3 points to the THREAD for the current task, r4
383 * points to the THREAD for the new task.
384 *
385 * Note: there are two ways to get to the "going out" portion
386 * of this code; either by coming in via the entry (_switch)
387 * or via "fork" which must set up an environment equivalent
388 * to the "_switch" path.  If you change this you'll have to change
389 * the fork code also.
390 *
391 * The code which creates the new task context is in 'copy_thread'
392 * in arch/powerpc/kernel/process.c
393 */
394	.align	7
395_GLOBAL(_switch)
396	mflr	r0
397	std	r0,16(r1)
398	stdu	r1,-SWITCH_FRAME_SIZE(r1)
399	/* r3-r13 are caller saved -- Cort */
400	SAVE_8GPRS(14, r1)
401	SAVE_10GPRS(22, r1)
402	mflr	r20		/* Return to switch caller */
403	mfmsr	r22
404	li	r0, MSR_FP
405#ifdef CONFIG_VSX
406BEGIN_FTR_SECTION
407	oris	r0,r0,MSR_VSX@h	/* Disable VSX */
408END_FTR_SECTION_IFSET(CPU_FTR_VSX)
409#endif /* CONFIG_VSX */
410#ifdef CONFIG_ALTIVEC
411BEGIN_FTR_SECTION
412	oris	r0,r0,MSR_VEC@h	/* Disable altivec */
413	mfspr	r24,SPRN_VRSAVE	/* save vrsave register value */
414	std	r24,THREAD_VRSAVE(r3)
415END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
416#endif /* CONFIG_ALTIVEC */
417#ifdef CONFIG_PPC64
418BEGIN_FTR_SECTION
419	mfspr	r25,SPRN_DSCR
420	std	r25,THREAD_DSCR(r3)
421END_FTR_SECTION_IFSET(CPU_FTR_DSCR)
422#endif
423	and.	r0,r0,r22
424	beq+	1f
425	andc	r22,r22,r0
426	MTMSRD(r22)
427	isync
4281:	std	r20,_NIP(r1)
429	mfcr	r23
430	std	r23,_CCR(r1)
431	std	r1,KSP(r3)	/* Set old stack pointer */
432
433#ifdef CONFIG_SMP
434	/* We need a sync somewhere here to make sure that if the
435	 * previous task gets rescheduled on another CPU, it sees all
436	 * stores it has performed on this one.
437	 */
438	sync
439#endif /* CONFIG_SMP */
440
441	/*
442	 * If we optimise away the clear of the reservation in system
443	 * calls because we know the CPU tracks the address of the
444	 * reservation, then we need to clear it here to cover the
445	 * case that the kernel context switch path has no larx
446	 * instructions.
447	 */
448BEGIN_FTR_SECTION
449	ldarx	r6,0,r1
450END_FTR_SECTION_IFSET(CPU_FTR_STCX_CHECKS_ADDRESS)
451
452	addi	r6,r4,-THREAD	/* Convert THREAD to 'current' */
453	std	r6,PACACURRENT(r13)	/* Set new 'current' */
454
455	ld	r8,KSP(r4)	/* new stack pointer */
456#ifdef CONFIG_PPC_BOOK3S
457BEGIN_FTR_SECTION
458  BEGIN_FTR_SECTION_NESTED(95)
459	clrrdi	r6,r8,28	/* get its ESID */
460	clrrdi	r9,r1,28	/* get current sp ESID */
461  FTR_SECTION_ELSE_NESTED(95)
462	clrrdi	r6,r8,40	/* get its 1T ESID */
463	clrrdi	r9,r1,40	/* get current sp 1T ESID */
464  ALT_MMU_FTR_SECTION_END_NESTED_IFCLR(MMU_FTR_1T_SEGMENT, 95)
465FTR_SECTION_ELSE
466	b	2f
467ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_SLB)
468	clrldi.	r0,r6,2		/* is new ESID c00000000? */
469	cmpd	cr1,r6,r9	/* or is new ESID the same as current ESID? */
470	cror	eq,4*cr1+eq,eq
471	beq	2f		/* if yes, don't slbie it */
472
473	/* Bolt in the new stack SLB entry */
474	ld	r7,KSP_VSID(r4)	/* Get new stack's VSID */
475	oris	r0,r6,(SLB_ESID_V)@h
476	ori	r0,r0,(SLB_NUM_BOLTED-1)@l
477BEGIN_FTR_SECTION
478	li	r9,MMU_SEGSIZE_1T	/* insert B field */
479	oris	r6,r6,(MMU_SEGSIZE_1T << SLBIE_SSIZE_SHIFT)@h
480	rldimi	r7,r9,SLB_VSID_SSIZE_SHIFT,0
481END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
482
483	/* Update the last bolted SLB.  No write barriers are needed
484	 * here, provided we only update the current CPU's SLB shadow
485	 * buffer.
486	 */
487	ld	r9,PACA_SLBSHADOWPTR(r13)
488	li	r12,0
489	std	r12,SLBSHADOW_STACKESID(r9) /* Clear ESID */
490	std	r7,SLBSHADOW_STACKVSID(r9)  /* Save VSID */
491	std	r0,SLBSHADOW_STACKESID(r9)  /* Save ESID */
492
493	/* No need to check for MMU_FTR_NO_SLBIE_B here, since when
494	 * we have 1TB segments, the only CPUs known to have the errata
495	 * only support less than 1TB of system memory and we'll never
496	 * actually hit this code path.
497	 */
498
499	slbie	r6
500	slbie	r6		/* Workaround POWER5 < DD2.1 issue */
501	slbmte	r7,r0
502	isync
5032:
504#endif /* !CONFIG_PPC_BOOK3S */
505
506	clrrdi	r7,r8,THREAD_SHIFT	/* base of new stack */
507	/* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE
508	   because we don't need to leave the 288-byte ABI gap at the
509	   top of the kernel stack. */
510	addi	r7,r7,THREAD_SIZE-SWITCH_FRAME_SIZE
511
512	mr	r1,r8		/* start using new stack pointer */
513	std	r7,PACAKSAVE(r13)
514
515	ld	r6,_CCR(r1)
516	mtcrf	0xFF,r6
517
518#ifdef CONFIG_ALTIVEC
519BEGIN_FTR_SECTION
520	ld	r0,THREAD_VRSAVE(r4)
521	mtspr	SPRN_VRSAVE,r0		/* if G4, restore VRSAVE reg */
522END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
523#endif /* CONFIG_ALTIVEC */
524#ifdef CONFIG_PPC64
525BEGIN_FTR_SECTION
526	ld	r0,THREAD_DSCR(r4)
527	cmpd	r0,r25
528	beq	1f
529	mtspr	SPRN_DSCR,r0
5301:
531END_FTR_SECTION_IFSET(CPU_FTR_DSCR)
532#endif
533
534	/* r3-r13 are destroyed -- Cort */
535	REST_8GPRS(14, r1)
536	REST_10GPRS(22, r1)
537
538	/* convert old thread to its task_struct for return value */
539	addi	r3,r3,-THREAD
540	ld	r7,_NIP(r1)	/* Return to _switch caller in new task */
541	mtlr	r7
542	addi	r1,r1,SWITCH_FRAME_SIZE
543	blr
544
545	.align	7
546_GLOBAL(ret_from_except)
547	ld	r11,_TRAP(r1)
548	andi.	r0,r11,1
549	bne	.ret_from_except_lite
550	REST_NVGPRS(r1)
551
552_GLOBAL(ret_from_except_lite)
553	/*
554	 * Disable interrupts so that current_thread_info()->flags
555	 * can't change between when we test it and when we return
556	 * from the interrupt.
557	 */
558#ifdef CONFIG_PPC_BOOK3E
559	wrteei	0
560#else
561	ld	r10,PACAKMSR(r13) /* Get kernel MSR without EE */
562	mtmsrd	r10,1		  /* Update machine state */
563#endif /* CONFIG_PPC_BOOK3E */
564
565#ifdef CONFIG_PREEMPT
566	clrrdi	r9,r1,THREAD_SHIFT	/* current_thread_info() */
567	li	r0,_TIF_NEED_RESCHED	/* bits to check */
568	ld	r3,_MSR(r1)
569	ld	r4,TI_FLAGS(r9)
570	/* Move MSR_PR bit in r3 to _TIF_SIGPENDING position in r0 */
571	rlwimi	r0,r3,32+TIF_SIGPENDING-MSR_PR_LG,_TIF_SIGPENDING
572	and.	r0,r4,r0	/* check NEED_RESCHED and maybe SIGPENDING */
573	bne	do_work
574
575#else /* !CONFIG_PREEMPT */
576	ld	r3,_MSR(r1)	/* Returning to user mode? */
577	andi.	r3,r3,MSR_PR
578	beq	restore		/* if not, just restore regs and return */
579
580	/* Check current_thread_info()->flags */
581	clrrdi	r9,r1,THREAD_SHIFT
582	ld	r4,TI_FLAGS(r9)
583	andi.	r0,r4,_TIF_USER_WORK_MASK
584	bne	do_work
585#endif /* !CONFIG_PREEMPT */
586
587	.globl	fast_exc_return_irq
588fast_exc_return_irq:
589restore:
590	/*
591	 * This is the main kernel exit path. First we check if we
592	 * are about to re-enable interrupts
593	 */
594	ld	r5,SOFTE(r1)
595	lbz	r6,PACASOFTIRQEN(r13)
596	cmpwi	cr0,r5,0
597	beq	restore_irq_off
598
599	/* We are enabling, were we already enabled ? Yes, just return */
600	cmpwi	cr0,r6,1
601	beq	cr0,do_restore
602
603	/*
604	 * We are about to soft-enable interrupts (we are hard disabled
605	 * at this point). We check if there's anything that needs to
606	 * be replayed first.
607	 */
608	lbz	r0,PACAIRQHAPPENED(r13)
609	cmpwi	cr0,r0,0
610	bne-	restore_check_irq_replay
611
612	/*
613	 * Get here when nothing happened while soft-disabled, just
614	 * soft-enable and move-on. We will hard-enable as a side
615	 * effect of rfi
616	 */
617restore_no_replay:
618	TRACE_ENABLE_INTS
619	li	r0,1
620	stb	r0,PACASOFTIRQEN(r13);
621
622	/*
623	 * Final return path. BookE is handled in a different file
624	 */
625do_restore:
626#ifdef CONFIG_PPC_BOOK3E
627	b	.exception_return_book3e
628#else
629	/*
630	 * Clear the reservation. If we know the CPU tracks the address of
631	 * the reservation then we can potentially save some cycles and use
632	 * a larx. On POWER6 and POWER7 this is significantly faster.
633	 */
634BEGIN_FTR_SECTION
635	stdcx.	r0,0,r1		/* to clear the reservation */
636FTR_SECTION_ELSE
637	ldarx	r4,0,r1
638ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
639
640	/*
641	 * Some code path such as load_up_fpu or altivec return directly
642	 * here. They run entirely hard disabled and do not alter the
643	 * interrupt state. They also don't use lwarx/stwcx. and thus
644	 * are known not to leave dangling reservations.
645	 */
646	.globl	fast_exception_return
647fast_exception_return:
648	ld	r3,_MSR(r1)
649	ld	r4,_CTR(r1)
650	ld	r0,_LINK(r1)
651	mtctr	r4
652	mtlr	r0
653	ld	r4,_XER(r1)
654	mtspr	SPRN_XER,r4
655
656	REST_8GPRS(5, r1)
657
658	andi.	r0,r3,MSR_RI
659	beq-	unrecov_restore
660
661	/*
662	 * Clear RI before restoring r13.  If we are returning to
663	 * userspace and we take an exception after restoring r13,
664	 * we end up corrupting the userspace r13 value.
665	 */
666	ld	r4,PACAKMSR(r13) /* Get kernel MSR without EE */
667	andc	r4,r4,r0	 /* r0 contains MSR_RI here */
668	mtmsrd	r4,1
669
670	/*
671	 * r13 is our per cpu area, only restore it if we are returning to
672	 * userspace the value stored in the stack frame may belong to
673	 * another CPU.
674	 */
675	andi.	r0,r3,MSR_PR
676	beq	1f
677	ACCOUNT_CPU_USER_EXIT(r2, r4)
678	REST_GPR(13, r1)
6791:
680	mtspr	SPRN_SRR1,r3
681
682	ld	r2,_CCR(r1)
683	mtcrf	0xFF,r2
684	ld	r2,_NIP(r1)
685	mtspr	SPRN_SRR0,r2
686
687	ld	r0,GPR0(r1)
688	ld	r2,GPR2(r1)
689	ld	r3,GPR3(r1)
690	ld	r4,GPR4(r1)
691	ld	r1,GPR1(r1)
692
693	rfid
694	b	.	/* prevent speculative execution */
695
696#endif /* CONFIG_PPC_BOOK3E */
697
698	/*
699	 * We are returning to a context with interrupts soft disabled.
700	 *
701	 * However, we may also about to hard enable, so we need to
702	 * make sure that in this case, we also clear PACA_IRQ_HARD_DIS
703	 * or that bit can get out of sync and bad things will happen
704	 */
705restore_irq_off:
706	ld	r3,_MSR(r1)
707	lbz	r7,PACAIRQHAPPENED(r13)
708	andi.	r0,r3,MSR_EE
709	beq	1f
710	rlwinm	r7,r7,0,~PACA_IRQ_HARD_DIS
711	stb	r7,PACAIRQHAPPENED(r13)
7121:	li	r0,0
713	stb	r0,PACASOFTIRQEN(r13);
714	TRACE_DISABLE_INTS
715	b	do_restore
716
717	/*
718	 * Something did happen, check if a re-emit is needed
719	 * (this also clears paca->irq_happened)
720	 */
721restore_check_irq_replay:
722	/* XXX: We could implement a fast path here where we check
723	 * for irq_happened being just 0x01, in which case we can
724	 * clear it and return. That means that we would potentially
725	 * miss a decrementer having wrapped all the way around.
726	 *
727	 * Still, this might be useful for things like hash_page
728	 */
729	bl	.__check_irq_replay
730	cmpwi	cr0,r3,0
731 	beq	restore_no_replay
732
733	/*
734	 * We need to re-emit an interrupt. We do so by re-using our
735	 * existing exception frame. We first change the trap value,
736	 * but we need to ensure we preserve the low nibble of it
737	 */
738	ld	r4,_TRAP(r1)
739	clrldi	r4,r4,60
740	or	r4,r4,r3
741	std	r4,_TRAP(r1)
742
743	/*
744	 * Then find the right handler and call it. Interrupts are
745	 * still soft-disabled and we keep them that way.
746	*/
747	cmpwi	cr0,r3,0x500
748	bne	1f
749	addi	r3,r1,STACK_FRAME_OVERHEAD;
750 	bl	.do_IRQ
751	b	.ret_from_except
7521:	cmpwi	cr0,r3,0x900
753	bne	1f
754	addi	r3,r1,STACK_FRAME_OVERHEAD;
755	bl	.timer_interrupt
756	b	.ret_from_except
757#ifdef CONFIG_PPC_BOOK3E
7581:	cmpwi	cr0,r3,0x280
759	bne	1f
760	addi	r3,r1,STACK_FRAME_OVERHEAD;
761	bl	.doorbell_exception
762	b	.ret_from_except
763#endif /* CONFIG_PPC_BOOK3E */
7641:	b	.ret_from_except /* What else to do here ? */
765
766
767
7683:
769do_work:
770#ifdef CONFIG_PREEMPT
771	andi.	r0,r3,MSR_PR	/* Returning to user mode? */
772	bne	user_work
773	/* Check that preempt_count() == 0 and interrupts are enabled */
774	lwz	r8,TI_PREEMPT(r9)
775	cmpwi	cr1,r8,0
776	ld	r0,SOFTE(r1)
777	cmpdi	r0,0
778	crandc	eq,cr1*4+eq,eq
779	bne	restore
780
781	/*
782	 * Here we are preempting the current task. We want to make
783	 * sure we are soft-disabled first
784	 */
785	SOFT_DISABLE_INTS(r3,r4)
7861:	bl	.preempt_schedule_irq
787
788	/* Re-test flags and eventually loop */
789	clrrdi	r9,r1,THREAD_SHIFT
790	ld	r4,TI_FLAGS(r9)
791	andi.	r0,r4,_TIF_NEED_RESCHED
792	bne	1b
793	b	restore
794
795user_work:
796#endif /* CONFIG_PREEMPT */
797
798	andi.	r0,r4,_TIF_NEED_RESCHED
799	beq	1f
800	bl	.restore_interrupts
801	bl	.schedule
802	b	.ret_from_except_lite
803
8041:	bl	.save_nvgprs
805	bl	.restore_interrupts
806	addi	r3,r1,STACK_FRAME_OVERHEAD
807	bl	.do_notify_resume
808	b	.ret_from_except
809
810unrecov_restore:
811	addi	r3,r1,STACK_FRAME_OVERHEAD
812	bl	.unrecoverable_exception
813	b	unrecov_restore
814
815#ifdef CONFIG_PPC_RTAS
816/*
817 * On CHRP, the Run-Time Abstraction Services (RTAS) have to be
818 * called with the MMU off.
819 *
820 * In addition, we need to be in 32b mode, at least for now.
821 *
822 * Note: r3 is an input parameter to rtas, so don't trash it...
823 */
824_GLOBAL(enter_rtas)
825	mflr	r0
826	std	r0,16(r1)
827        stdu	r1,-RTAS_FRAME_SIZE(r1)	/* Save SP and create stack space. */
828
829	/* Because RTAS is running in 32b mode, it clobbers the high order half
830	 * of all registers that it saves.  We therefore save those registers
831	 * RTAS might touch to the stack.  (r0, r3-r13 are caller saved)
832   	 */
833	SAVE_GPR(2, r1)			/* Save the TOC */
834	SAVE_GPR(13, r1)		/* Save paca */
835	SAVE_8GPRS(14, r1)		/* Save the non-volatiles */
836	SAVE_10GPRS(22, r1)		/* ditto */
837
838	mfcr	r4
839	std	r4,_CCR(r1)
840	mfctr	r5
841	std	r5,_CTR(r1)
842	mfspr	r6,SPRN_XER
843	std	r6,_XER(r1)
844	mfdar	r7
845	std	r7,_DAR(r1)
846	mfdsisr	r8
847	std	r8,_DSISR(r1)
848
849	/* Temporary workaround to clear CR until RTAS can be modified to
850	 * ignore all bits.
851	 */
852	li	r0,0
853	mtcr	r0
854
855#ifdef CONFIG_BUG
856	/* There is no way it is acceptable to get here with interrupts enabled,
857	 * check it with the asm equivalent of WARN_ON
858	 */
859	lbz	r0,PACASOFTIRQEN(r13)
8601:	tdnei	r0,0
861	EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
862#endif
863
864	/* Hard-disable interrupts */
865	mfmsr	r6
866	rldicl	r7,r6,48,1
867	rotldi	r7,r7,16
868	mtmsrd	r7,1
869
870	/* Unfortunately, the stack pointer and the MSR are also clobbered,
871	 * so they are saved in the PACA which allows us to restore
872	 * our original state after RTAS returns.
873         */
874	std	r1,PACAR1(r13)
875        std	r6,PACASAVEDMSR(r13)
876
877	/* Setup our real return addr */
878	LOAD_REG_ADDR(r4,.rtas_return_loc)
879	clrldi	r4,r4,2			/* convert to realmode address */
880       	mtlr	r4
881
882	li	r0,0
883	ori	r0,r0,MSR_EE|MSR_SE|MSR_BE|MSR_RI
884	andc	r0,r6,r0
885
886        li      r9,1
887        rldicr  r9,r9,MSR_SF_LG,(63-MSR_SF_LG)
888	ori	r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP|MSR_RI
889	andc	r6,r0,r9
890	sync				/* disable interrupts so SRR0/1 */
891	mtmsrd	r0			/* don't get trashed */
892
893	LOAD_REG_ADDR(r4, rtas)
894	ld	r5,RTASENTRY(r4)	/* get the rtas->entry value */
895	ld	r4,RTASBASE(r4)		/* get the rtas->base value */
896
897	mtspr	SPRN_SRR0,r5
898	mtspr	SPRN_SRR1,r6
899	rfid
900	b	.	/* prevent speculative execution */
901
902_STATIC(rtas_return_loc)
903	/* relocation is off at this point */
904	GET_PACA(r4)
905	clrldi	r4,r4,2			/* convert to realmode address */
906
907	bcl	20,31,$+4
9080:	mflr	r3
909	ld	r3,(1f-0b)(r3)		/* get &.rtas_restore_regs */
910
911	mfmsr   r6
912	li	r0,MSR_RI
913	andc	r6,r6,r0
914	sync
915	mtmsrd  r6
916
917        ld	r1,PACAR1(r4)           /* Restore our SP */
918        ld	r4,PACASAVEDMSR(r4)     /* Restore our MSR */
919
920	mtspr	SPRN_SRR0,r3
921	mtspr	SPRN_SRR1,r4
922	rfid
923	b	.	/* prevent speculative execution */
924
925	.align	3
9261:	.llong	.rtas_restore_regs
927
928_STATIC(rtas_restore_regs)
929	/* relocation is on at this point */
930	REST_GPR(2, r1)			/* Restore the TOC */
931	REST_GPR(13, r1)		/* Restore paca */
932	REST_8GPRS(14, r1)		/* Restore the non-volatiles */
933	REST_10GPRS(22, r1)		/* ditto */
934
935	GET_PACA(r13)
936
937	ld	r4,_CCR(r1)
938	mtcr	r4
939	ld	r5,_CTR(r1)
940	mtctr	r5
941	ld	r6,_XER(r1)
942	mtspr	SPRN_XER,r6
943	ld	r7,_DAR(r1)
944	mtdar	r7
945	ld	r8,_DSISR(r1)
946	mtdsisr	r8
947
948        addi	r1,r1,RTAS_FRAME_SIZE	/* Unstack our frame */
949	ld	r0,16(r1)		/* get return address */
950
951	mtlr    r0
952        blr				/* return to caller */
953
954#endif /* CONFIG_PPC_RTAS */
955
956_GLOBAL(enter_prom)
957	mflr	r0
958	std	r0,16(r1)
959        stdu	r1,-PROM_FRAME_SIZE(r1)	/* Save SP and create stack space */
960
961	/* Because PROM is running in 32b mode, it clobbers the high order half
962	 * of all registers that it saves.  We therefore save those registers
963	 * PROM might touch to the stack.  (r0, r3-r13 are caller saved)
964   	 */
965	SAVE_GPR(2, r1)
966	SAVE_GPR(13, r1)
967	SAVE_8GPRS(14, r1)
968	SAVE_10GPRS(22, r1)
969	mfcr	r10
970	mfmsr	r11
971	std	r10,_CCR(r1)
972	std	r11,_MSR(r1)
973
974	/* Get the PROM entrypoint */
975	mtlr	r4
976
977	/* Switch MSR to 32 bits mode
978	 */
979#ifdef CONFIG_PPC_BOOK3E
980	rlwinm	r11,r11,0,1,31
981	mtmsr	r11
982#else /* CONFIG_PPC_BOOK3E */
983        mfmsr   r11
984        li      r12,1
985        rldicr  r12,r12,MSR_SF_LG,(63-MSR_SF_LG)
986        andc    r11,r11,r12
987        li      r12,1
988        rldicr  r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG)
989        andc    r11,r11,r12
990        mtmsrd  r11
991#endif /* CONFIG_PPC_BOOK3E */
992        isync
993
994	/* Enter PROM here... */
995	blrl
996
997	/* Just make sure that r1 top 32 bits didn't get
998	 * corrupt by OF
999	 */
1000	rldicl	r1,r1,0,32
1001
1002	/* Restore the MSR (back to 64 bits) */
1003	ld	r0,_MSR(r1)
1004	MTMSRD(r0)
1005        isync
1006
1007	/* Restore other registers */
1008	REST_GPR(2, r1)
1009	REST_GPR(13, r1)
1010	REST_8GPRS(14, r1)
1011	REST_10GPRS(22, r1)
1012	ld	r4,_CCR(r1)
1013	mtcr	r4
1014
1015        addi	r1,r1,PROM_FRAME_SIZE
1016	ld	r0,16(r1)
1017	mtlr    r0
1018        blr
1019
1020#ifdef CONFIG_FUNCTION_TRACER
1021#ifdef CONFIG_DYNAMIC_FTRACE
1022_GLOBAL(mcount)
1023_GLOBAL(_mcount)
1024	blr
1025
1026_GLOBAL(ftrace_caller)
1027	/* Taken from output of objdump from lib64/glibc */
1028	mflr	r3
1029	ld	r11, 0(r1)
1030	stdu	r1, -112(r1)
1031	std	r3, 128(r1)
1032	ld	r4, 16(r11)
1033	subi	r3, r3, MCOUNT_INSN_SIZE
1034.globl ftrace_call
1035ftrace_call:
1036	bl	ftrace_stub
1037	nop
1038#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1039.globl ftrace_graph_call
1040ftrace_graph_call:
1041	b	ftrace_graph_stub
1042_GLOBAL(ftrace_graph_stub)
1043#endif
1044	ld	r0, 128(r1)
1045	mtlr	r0
1046	addi	r1, r1, 112
1047_GLOBAL(ftrace_stub)
1048	blr
1049#else
1050_GLOBAL(mcount)
1051	blr
1052
1053_GLOBAL(_mcount)
1054	/* Taken from output of objdump from lib64/glibc */
1055	mflr	r3
1056	ld	r11, 0(r1)
1057	stdu	r1, -112(r1)
1058	std	r3, 128(r1)
1059	ld	r4, 16(r11)
1060
1061	subi	r3, r3, MCOUNT_INSN_SIZE
1062	LOAD_REG_ADDR(r5,ftrace_trace_function)
1063	ld	r5,0(r5)
1064	ld	r5,0(r5)
1065	mtctr	r5
1066	bctrl
1067	nop
1068
1069
1070#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1071	b	ftrace_graph_caller
1072#endif
1073	ld	r0, 128(r1)
1074	mtlr	r0
1075	addi	r1, r1, 112
1076_GLOBAL(ftrace_stub)
1077	blr
1078
1079#endif /* CONFIG_DYNAMIC_FTRACE */
1080
1081#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1082_GLOBAL(ftrace_graph_caller)
1083	/* load r4 with local address */
1084	ld	r4, 128(r1)
1085	subi	r4, r4, MCOUNT_INSN_SIZE
1086
1087	/* get the parent address */
1088	ld	r11, 112(r1)
1089	addi	r3, r11, 16
1090
1091	bl	.prepare_ftrace_return
1092	nop
1093
1094	ld	r0, 128(r1)
1095	mtlr	r0
1096	addi	r1, r1, 112
1097	blr
1098
1099_GLOBAL(return_to_handler)
1100	/* need to save return values */
1101	std	r4,  -24(r1)
1102	std	r3,  -16(r1)
1103	std	r31, -8(r1)
1104	mr	r31, r1
1105	stdu	r1, -112(r1)
1106
1107	bl	.ftrace_return_to_handler
1108	nop
1109
1110	/* return value has real return address */
1111	mtlr	r3
1112
1113	ld	r1, 0(r1)
1114	ld	r4,  -24(r1)
1115	ld	r3,  -16(r1)
1116	ld	r31, -8(r1)
1117
1118	/* Jump back to real return address */
1119	blr
1120
1121_GLOBAL(mod_return_to_handler)
1122	/* need to save return values */
1123	std	r4,  -32(r1)
1124	std	r3,  -24(r1)
1125	/* save TOC */
1126	std	r2,  -16(r1)
1127	std	r31, -8(r1)
1128	mr	r31, r1
1129	stdu	r1, -112(r1)
1130
1131	/*
1132	 * We are in a module using the module's TOC.
1133	 * Switch to our TOC to run inside the core kernel.
1134	 */
1135	ld	r2, PACATOC(r13)
1136
1137	bl	.ftrace_return_to_handler
1138	nop
1139
1140	/* return value has real return address */
1141	mtlr	r3
1142
1143	ld	r1, 0(r1)
1144	ld	r4,  -32(r1)
1145	ld	r3,  -24(r1)
1146	ld	r2,  -16(r1)
1147	ld	r31, -8(r1)
1148
1149	/* Jump back to real return address */
1150	blr
1151#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
1152#endif /* CONFIG_FUNCTION_TRACER */
1153