1/*
2 * Copyright 2015, Cyril Bur, IBM Corp.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9
10#include "../basic_asm.h"
11
12#define PUSH_FPU(pos) \
13	stfd	f14,pos(sp); \
14	stfd	f15,pos+8(sp); \
15	stfd	f16,pos+16(sp); \
16	stfd	f17,pos+24(sp); \
17	stfd	f18,pos+32(sp); \
18	stfd	f19,pos+40(sp); \
19	stfd	f20,pos+48(sp); \
20	stfd	f21,pos+56(sp); \
21	stfd	f22,pos+64(sp); \
22	stfd	f23,pos+72(sp); \
23	stfd	f24,pos+80(sp); \
24	stfd	f25,pos+88(sp); \
25	stfd	f26,pos+96(sp); \
26	stfd	f27,pos+104(sp); \
27	stfd	f28,pos+112(sp); \
28	stfd	f29,pos+120(sp); \
29	stfd	f30,pos+128(sp); \
30	stfd	f31,pos+136(sp);
31
32#define POP_FPU(pos) \
33	lfd	f14,pos(sp); \
34	lfd	f15,pos+8(sp); \
35	lfd	f16,pos+16(sp); \
36	lfd	f17,pos+24(sp); \
37	lfd	f18,pos+32(sp); \
38	lfd	f19,pos+40(sp); \
39	lfd	f20,pos+48(sp); \
40	lfd	f21,pos+56(sp); \
41	lfd	f22,pos+64(sp); \
42	lfd	f23,pos+72(sp); \
43	lfd	f24,pos+80(sp); \
44	lfd	f25,pos+88(sp); \
45	lfd	f26,pos+96(sp); \
46	lfd	f27,pos+104(sp); \
47	lfd	f28,pos+112(sp); \
48	lfd	f29,pos+120(sp); \
49	lfd	f30,pos+128(sp); \
50	lfd	f31,pos+136(sp);
51
52# Careful calling this, it will 'clobber' fpu (by design)
53# Don't call this from C
54FUNC_START(load_fpu)
55	lfd	f14,0(r3)
56	lfd	f15,8(r3)
57	lfd	f16,16(r3)
58	lfd	f17,24(r3)
59	lfd	f18,32(r3)
60	lfd	f19,40(r3)
61	lfd	f20,48(r3)
62	lfd	f21,56(r3)
63	lfd	f22,64(r3)
64	lfd	f23,72(r3)
65	lfd	f24,80(r3)
66	lfd	f25,88(r3)
67	lfd	f26,96(r3)
68	lfd	f27,104(r3)
69	lfd	f28,112(r3)
70	lfd	f29,120(r3)
71	lfd	f30,128(r3)
72	lfd	f31,136(r3)
73	blr
74FUNC_END(load_fpu)
75
76FUNC_START(check_fpu)
77	mr r4,r3
78	li	r3,1 # assume a bad result
79	lfd	f0,0(r4)
80	fcmpu	cr1,f0,f14
81	bne	cr1,1f
82	lfd	f0,8(r4)
83	fcmpu	cr1,f0,f15
84	bne	cr1,1f
85	lfd	f0,16(r4)
86	fcmpu	cr1,f0,f16
87	bne	cr1,1f
88	lfd	f0,24(r4)
89	fcmpu	cr1,f0,f17
90	bne	cr1,1f
91	lfd	f0,32(r4)
92	fcmpu	cr1,f0,f18
93	bne	cr1,1f
94	lfd	f0,40(r4)
95	fcmpu	cr1,f0,f19
96	bne	cr1,1f
97	lfd	f0,48(r4)
98	fcmpu	cr1,f0,f20
99	bne	cr1,1f
100	lfd	f0,56(r4)
101	fcmpu	cr1,f0,f21
102	bne	cr1,1f
103	lfd	f0,64(r4)
104	fcmpu	cr1,f0,f22
105	bne	cr1,1f
106	lfd	f0,72(r4)
107	fcmpu	cr1,f0,f23
108	bne	cr1,1f
109	lfd	f0,80(r4)
110	fcmpu	cr1,f0,f24
111	bne	cr1,1f
112	lfd	f0,88(r4)
113	fcmpu	cr1,f0,f25
114	bne	cr1,1f
115	lfd	f0,96(r4)
116	fcmpu	cr1,f0,f26
117	bne	cr1,1f
118	lfd	f0,104(r4)
119	fcmpu	cr1,f0,f27
120	bne	cr1,1f
121	lfd	f0,112(r4)
122	fcmpu	cr1,f0,f28
123	bne	cr1,1f
124	lfd	f0,120(r4)
125	fcmpu	cr1,f0,f29
126	bne	cr1,1f
127	lfd	f0,128(r4)
128	fcmpu	cr1,f0,f30
129	bne	cr1,1f
130	lfd	f0,136(r4)
131	fcmpu	cr1,f0,f31
132	bne	cr1,1f
133	li	r3,0 # Success!!!
1341:	blr
135
136FUNC_START(test_fpu)
137	# r3 holds pointer to where to put the result of fork
138	# r4 holds pointer to the pid
139	# f14-f31 are non volatiles
140	PUSH_BASIC_STACK(256)
141	std	r3,STACK_FRAME_PARAM(0)(sp) # Address of darray
142	std r4,STACK_FRAME_PARAM(1)(sp) # Address of pid
143	PUSH_FPU(STACK_FRAME_LOCAL(2,0))
144
145	bl load_fpu
146	nop
147	li	r0,__NR_fork
148	sc
149
150	# pass the result of the fork to the caller
151	ld	r9,STACK_FRAME_PARAM(1)(sp)
152	std	r3,0(r9)
153
154	ld r3,STACK_FRAME_PARAM(0)(sp)
155	bl check_fpu
156	nop
157
158	POP_FPU(STACK_FRAME_LOCAL(2,0))
159	POP_BASIC_STACK(256)
160	blr
161FUNC_END(test_fpu)
162
163# int preempt_fpu(double *darray, int *threads_running, int *running)
164# On starting will (atomically) decrement not_ready as a signal that the FPU
165# has been loaded with darray. Will proceed to check the validity of the FPU
166# registers while running is not zero.
167FUNC_START(preempt_fpu)
168	PUSH_BASIC_STACK(256)
169	std r3,STACK_FRAME_PARAM(0)(sp) # double *darray
170	std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting
171	std r5,STACK_FRAME_PARAM(2)(sp) # int *running
172	PUSH_FPU(STACK_FRAME_LOCAL(3,0))
173
174	bl load_fpu
175	nop
176
177	sync
178	# Atomic DEC
179	ld r3,STACK_FRAME_PARAM(1)(sp)
1801:	lwarx r4,0,r3
181	addi r4,r4,-1
182	stwcx. r4,0,r3
183	bne- 1b
184
1852:	ld r3,STACK_FRAME_PARAM(0)(sp)
186	bl check_fpu
187	nop
188	cmpdi r3,0
189	bne 3f
190	ld r4,STACK_FRAME_PARAM(2)(sp)
191	ld r5,0(r4)
192	cmpwi r5,0
193	bne 2b
194
1953:	POP_FPU(STACK_FRAME_LOCAL(3,0))
196	POP_BASIC_STACK(256)
197	blr
198FUNC_END(preempt_fpu)
199