fpu-internal.h revision 235b80226b986dabcbba844968f7807866bd0bfe
1/*
2 * Copyright (C) 1994 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * General FPU state handling cleanups
6 *	Gareth Hughes <gareth@valinux.com>, May 2000
7 * x86-64 work by Andi Kleen 2002
8 */
9
10#ifndef _FPU_INTERNAL_H
11#define _FPU_INTERNAL_H
12
13#include <linux/kernel_stat.h>
14#include <linux/regset.h>
15#include <linux/compat.h>
16#include <linux/slab.h>
17#include <asm/asm.h>
18#include <asm/cpufeature.h>
19#include <asm/processor.h>
20#include <asm/sigcontext.h>
21#include <asm/user.h>
22#include <asm/uaccess.h>
23#include <asm/xsave.h>
24#include <asm/smap.h>
25
26#ifdef CONFIG_X86_64
27# include <asm/sigcontext32.h>
28# include <asm/user32.h>
29struct ksignal;
30int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
31			compat_sigset_t *set, struct pt_regs *regs);
32int ia32_setup_frame(int sig, struct ksignal *ksig,
33		     compat_sigset_t *set, struct pt_regs *regs);
34#else
35# define user_i387_ia32_struct	user_i387_struct
36# define user32_fxsr_struct	user_fxsr_struct
37# define ia32_setup_frame	__setup_frame
38# define ia32_setup_rt_frame	__setup_rt_frame
39#endif
40
41extern unsigned int mxcsr_feature_mask;
42extern void fpu_init(void);
43extern void eager_fpu_init(void);
44
45DECLARE_PER_CPU(struct task_struct *, fpu_owner_task);
46
47extern void convert_from_fxsr(struct user_i387_ia32_struct *env,
48			      struct task_struct *tsk);
49extern void convert_to_fxsr(struct task_struct *tsk,
50			    const struct user_i387_ia32_struct *env);
51
52extern user_regset_active_fn fpregs_active, xfpregs_active;
53extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get,
54				xstateregs_get;
55extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set,
56				 xstateregs_set;
57
58/*
59 * xstateregs_active == fpregs_active. Please refer to the comment
60 * at the definition of fpregs_active.
61 */
62#define xstateregs_active	fpregs_active
63
64#ifdef CONFIG_MATH_EMULATION
65# define HAVE_HWFP		(boot_cpu_data.hard_math)
66extern void finit_soft_fpu(struct i387_soft_struct *soft);
67#else
68# define HAVE_HWFP		1
69static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
70#endif
71
72static inline int is_ia32_compat_frame(void)
73{
74	return config_enabled(CONFIG_IA32_EMULATION) &&
75	       test_thread_flag(TIF_IA32);
76}
77
78static inline int is_ia32_frame(void)
79{
80	return config_enabled(CONFIG_X86_32) || is_ia32_compat_frame();
81}
82
83static inline int is_x32_frame(void)
84{
85	return config_enabled(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32);
86}
87
88#define X87_FSW_ES (1 << 7)	/* Exception Summary */
89
90static __always_inline __pure bool use_eager_fpu(void)
91{
92	return static_cpu_has(X86_FEATURE_EAGER_FPU);
93}
94
95static __always_inline __pure bool use_xsaveopt(void)
96{
97	return static_cpu_has(X86_FEATURE_XSAVEOPT);
98}
99
100static __always_inline __pure bool use_xsave(void)
101{
102	return static_cpu_has(X86_FEATURE_XSAVE);
103}
104
105static __always_inline __pure bool use_fxsr(void)
106{
107        return static_cpu_has(X86_FEATURE_FXSR);
108}
109
110static inline void fx_finit(struct i387_fxsave_struct *fx)
111{
112	memset(fx, 0, xstate_size);
113	fx->cwd = 0x37f;
114	fx->mxcsr = MXCSR_DEFAULT;
115}
116
117extern void __sanitize_i387_state(struct task_struct *);
118
119static inline void sanitize_i387_state(struct task_struct *tsk)
120{
121	if (!use_xsaveopt())
122		return;
123	__sanitize_i387_state(tsk);
124}
125
126#define user_insn(insn, output, input...)				\
127({									\
128	int err;							\
129	asm volatile(ASM_STAC "\n"					\
130		     "1:" #insn "\n\t"					\
131		     "2: " ASM_CLAC "\n"				\
132		     ".section .fixup,\"ax\"\n"				\
133		     "3:  movl $-1,%[err]\n"				\
134		     "    jmp  2b\n"					\
135		     ".previous\n"					\
136		     _ASM_EXTABLE(1b, 3b)				\
137		     : [err] "=r" (err), output				\
138		     : "0"(0), input);					\
139	err;								\
140})
141
142#define check_insn(insn, output, input...)				\
143({									\
144	int err;							\
145	asm volatile("1:" #insn "\n\t"					\
146		     "2:\n"						\
147		     ".section .fixup,\"ax\"\n"				\
148		     "3:  movl $-1,%[err]\n"				\
149		     "    jmp  2b\n"					\
150		     ".previous\n"					\
151		     _ASM_EXTABLE(1b, 3b)				\
152		     : [err] "=r" (err), output				\
153		     : "0"(0), input);					\
154	err;								\
155})
156
157static inline int fsave_user(struct i387_fsave_struct __user *fx)
158{
159	return user_insn(fnsave %[fx]; fwait,  [fx] "=m" (*fx), "m" (*fx));
160}
161
162static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
163{
164	if (config_enabled(CONFIG_X86_32))
165		return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
166	else if (config_enabled(CONFIG_AS_FXSAVEQ))
167		return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx));
168
169	/* See comment in fpu_fxsave() below. */
170	return user_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx));
171}
172
173static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
174{
175	if (config_enabled(CONFIG_X86_32))
176		return check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
177	else if (config_enabled(CONFIG_AS_FXSAVEQ))
178		return check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
179
180	/* See comment in fpu_fxsave() below. */
181	return check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx),
182			  "m" (*fx));
183}
184
185static inline int fxrstor_user(struct i387_fxsave_struct __user *fx)
186{
187	if (config_enabled(CONFIG_X86_32))
188		return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
189	else if (config_enabled(CONFIG_AS_FXSAVEQ))
190		return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
191
192	/* See comment in fpu_fxsave() below. */
193	return user_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx),
194			  "m" (*fx));
195}
196
197static inline int frstor_checking(struct i387_fsave_struct *fx)
198{
199	return check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
200}
201
202static inline int frstor_user(struct i387_fsave_struct __user *fx)
203{
204	return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
205}
206
207static inline void fpu_fxsave(struct fpu *fpu)
208{
209	if (config_enabled(CONFIG_X86_32))
210		asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state->fxsave));
211	else if (config_enabled(CONFIG_AS_FXSAVEQ))
212		asm volatile("fxsaveq %0" : "=m" (fpu->state->fxsave));
213	else {
214		/* Using "rex64; fxsave %0" is broken because, if the memory
215		 * operand uses any extended registers for addressing, a second
216		 * REX prefix will be generated (to the assembler, rex64
217		 * followed by semicolon is a separate instruction), and hence
218		 * the 64-bitness is lost.
219		 *
220		 * Using "fxsaveq %0" would be the ideal choice, but is only
221		 * supported starting with gas 2.16.
222		 *
223		 * Using, as a workaround, the properly prefixed form below
224		 * isn't accepted by any binutils version so far released,
225		 * complaining that the same type of prefix is used twice if
226		 * an extended register is needed for addressing (fix submitted
227		 * to mainline 2005-11-21).
228		 *
229		 *  asm volatile("rex64/fxsave %0" : "=m" (fpu->state->fxsave));
230		 *
231		 * This, however, we can work around by forcing the compiler to
232		 * select an addressing mode that doesn't require extended
233		 * registers.
234		 */
235		asm volatile( "rex64/fxsave (%[fx])"
236			     : "=m" (fpu->state->fxsave)
237			     : [fx] "R" (&fpu->state->fxsave));
238	}
239}
240
241/*
242 * These must be called with preempt disabled. Returns
243 * 'true' if the FPU state is still intact.
244 */
245static inline int fpu_save_init(struct fpu *fpu)
246{
247	if (use_xsave()) {
248		fpu_xsave(fpu);
249
250		/*
251		 * xsave header may indicate the init state of the FP.
252		 */
253		if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP))
254			return 1;
255	} else if (use_fxsr()) {
256		fpu_fxsave(fpu);
257	} else {
258		asm volatile("fnsave %[fx]; fwait"
259			     : [fx] "=m" (fpu->state->fsave));
260		return 0;
261	}
262
263	/*
264	 * If exceptions are pending, we need to clear them so
265	 * that we don't randomly get exceptions later.
266	 *
267	 * FIXME! Is this perhaps only true for the old-style
268	 * irq13 case? Maybe we could leave the x87 state
269	 * intact otherwise?
270	 */
271	if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) {
272		asm volatile("fnclex");
273		return 0;
274	}
275	return 1;
276}
277
278static inline int __save_init_fpu(struct task_struct *tsk)
279{
280	return fpu_save_init(&tsk->thread.fpu);
281}
282
283static inline int fpu_restore_checking(struct fpu *fpu)
284{
285	if (use_xsave())
286		return fpu_xrstor_checking(&fpu->state->xsave);
287	else if (use_fxsr())
288		return fxrstor_checking(&fpu->state->fxsave);
289	else
290		return frstor_checking(&fpu->state->fsave);
291}
292
293static inline int restore_fpu_checking(struct task_struct *tsk)
294{
295	/* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
296	   is pending.  Clear the x87 state here by setting it to fixed
297	   values. "m" is a random variable that should be in L1 */
298	alternative_input(
299		ASM_NOP8 ASM_NOP2,
300		"emms\n\t"		/* clear stack tags */
301		"fildl %P[addr]",	/* set F?P to defined value */
302		X86_FEATURE_FXSAVE_LEAK,
303		[addr] "m" (tsk->thread.fpu.has_fpu));
304
305	return fpu_restore_checking(&tsk->thread.fpu);
306}
307
308/*
309 * Software FPU state helpers. Careful: these need to
310 * be preemption protection *and* they need to be
311 * properly paired with the CR0.TS changes!
312 */
313static inline int __thread_has_fpu(struct task_struct *tsk)
314{
315	return tsk->thread.fpu.has_fpu;
316}
317
318/* Must be paired with an 'stts' after! */
319static inline void __thread_clear_has_fpu(struct task_struct *tsk)
320{
321	tsk->thread.fpu.has_fpu = 0;
322	this_cpu_write(fpu_owner_task, NULL);
323}
324
325/* Must be paired with a 'clts' before! */
326static inline void __thread_set_has_fpu(struct task_struct *tsk)
327{
328	tsk->thread.fpu.has_fpu = 1;
329	this_cpu_write(fpu_owner_task, tsk);
330}
331
332/*
333 * Encapsulate the CR0.TS handling together with the
334 * software flag.
335 *
336 * These generally need preemption protection to work,
337 * do try to avoid using these on their own.
338 */
339static inline void __thread_fpu_end(struct task_struct *tsk)
340{
341	__thread_clear_has_fpu(tsk);
342	if (!use_eager_fpu())
343		stts();
344}
345
346static inline void __thread_fpu_begin(struct task_struct *tsk)
347{
348	if (!use_eager_fpu())
349		clts();
350	__thread_set_has_fpu(tsk);
351}
352
353static inline void __drop_fpu(struct task_struct *tsk)
354{
355	if (__thread_has_fpu(tsk)) {
356		/* Ignore delayed exceptions from user space */
357		asm volatile("1: fwait\n"
358			     "2:\n"
359			     _ASM_EXTABLE(1b, 2b));
360		__thread_fpu_end(tsk);
361	}
362}
363
364static inline void drop_fpu(struct task_struct *tsk)
365{
366	/*
367	 * Forget coprocessor state..
368	 */
369	preempt_disable();
370	tsk->fpu_counter = 0;
371	__drop_fpu(tsk);
372	clear_used_math();
373	preempt_enable();
374}
375
376static inline void drop_init_fpu(struct task_struct *tsk)
377{
378	if (!use_eager_fpu())
379		drop_fpu(tsk);
380	else {
381		if (use_xsave())
382			xrstor_state(init_xstate_buf, -1);
383		else
384			fxrstor_checking(&init_xstate_buf->i387);
385	}
386}
387
388/*
389 * FPU state switching for scheduling.
390 *
391 * This is a two-stage process:
392 *
393 *  - switch_fpu_prepare() saves the old state and
394 *    sets the new state of the CR0.TS bit. This is
395 *    done within the context of the old process.
396 *
397 *  - switch_fpu_finish() restores the new state as
398 *    necessary.
399 */
400typedef struct { int preload; } fpu_switch_t;
401
402/*
403 * Must be run with preemption disabled: this clears the fpu_owner_task,
404 * on this CPU.
405 *
406 * This will disable any lazy FPU state restore of the current FPU state,
407 * but if the current thread owns the FPU, it will still be saved by.
408 */
409static inline void __cpu_disable_lazy_restore(unsigned int cpu)
410{
411	per_cpu(fpu_owner_task, cpu) = NULL;
412}
413
414static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
415{
416	return new == this_cpu_read_stable(fpu_owner_task) &&
417		cpu == new->thread.fpu.last_cpu;
418}
419
420static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu)
421{
422	fpu_switch_t fpu;
423
424	/*
425	 * If the task has used the math, pre-load the FPU on xsave processors
426	 * or if the past 5 consecutive context-switches used math.
427	 */
428	fpu.preload = tsk_used_math(new) && (use_eager_fpu() ||
429					     new->fpu_counter > 5);
430	if (__thread_has_fpu(old)) {
431		if (!__save_init_fpu(old))
432			cpu = ~0;
433		old->thread.fpu.last_cpu = cpu;
434		old->thread.fpu.has_fpu = 0;	/* But leave fpu_owner_task! */
435
436		/* Don't change CR0.TS if we just switch! */
437		if (fpu.preload) {
438			new->fpu_counter++;
439			__thread_set_has_fpu(new);
440			prefetch(new->thread.fpu.state);
441		} else if (!use_eager_fpu())
442			stts();
443	} else {
444		old->fpu_counter = 0;
445		old->thread.fpu.last_cpu = ~0;
446		if (fpu.preload) {
447			new->fpu_counter++;
448			if (!use_eager_fpu() && fpu_lazy_restore(new, cpu))
449				fpu.preload = 0;
450			else
451				prefetch(new->thread.fpu.state);
452			__thread_fpu_begin(new);
453		}
454	}
455	return fpu;
456}
457
458/*
459 * By the time this gets called, we've already cleared CR0.TS and
460 * given the process the FPU if we are going to preload the FPU
461 * state - all we need to do is to conditionally restore the register
462 * state itself.
463 */
464static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
465{
466	if (fpu.preload) {
467		if (unlikely(restore_fpu_checking(new)))
468			drop_init_fpu(new);
469	}
470}
471
472/*
473 * Signal frame handlers...
474 */
475extern int save_xstate_sig(void __user *buf, void __user *fx, int size);
476extern int __restore_xstate_sig(void __user *buf, void __user *fx, int size);
477
478static inline int xstate_sigframe_size(void)
479{
480	return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size;
481}
482
483static inline int restore_xstate_sig(void __user *buf, int ia32_frame)
484{
485	void __user *buf_fx = buf;
486	int size = xstate_sigframe_size();
487
488	if (ia32_frame && use_fxsr()) {
489		buf_fx = buf + sizeof(struct i387_fsave_struct);
490		size += sizeof(struct i387_fsave_struct);
491	}
492
493	return __restore_xstate_sig(buf, buf_fx, size);
494}
495
496/*
497 * Need to be preemption-safe.
498 *
499 * NOTE! user_fpu_begin() must be used only immediately before restoring
500 * it. This function does not do any save/restore on their own.
501 */
502static inline void user_fpu_begin(void)
503{
504	preempt_disable();
505	if (!user_has_fpu())
506		__thread_fpu_begin(current);
507	preempt_enable();
508}
509
510static inline void __save_fpu(struct task_struct *tsk)
511{
512	if (use_xsave())
513		xsave_state(&tsk->thread.fpu.state->xsave, -1);
514	else
515		fpu_fxsave(&tsk->thread.fpu);
516}
517
518/*
519 * These disable preemption on their own and are safe
520 */
521static inline void save_init_fpu(struct task_struct *tsk)
522{
523	WARN_ON_ONCE(!__thread_has_fpu(tsk));
524
525	if (use_eager_fpu()) {
526		__save_fpu(tsk);
527		return;
528	}
529
530	preempt_disable();
531	__save_init_fpu(tsk);
532	__thread_fpu_end(tsk);
533	preempt_enable();
534}
535
536/*
537 * i387 state interaction
538 */
539static inline unsigned short get_fpu_cwd(struct task_struct *tsk)
540{
541	if (cpu_has_fxsr) {
542		return tsk->thread.fpu.state->fxsave.cwd;
543	} else {
544		return (unsigned short)tsk->thread.fpu.state->fsave.cwd;
545	}
546}
547
548static inline unsigned short get_fpu_swd(struct task_struct *tsk)
549{
550	if (cpu_has_fxsr) {
551		return tsk->thread.fpu.state->fxsave.swd;
552	} else {
553		return (unsigned short)tsk->thread.fpu.state->fsave.swd;
554	}
555}
556
557static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk)
558{
559	if (cpu_has_xmm) {
560		return tsk->thread.fpu.state->fxsave.mxcsr;
561	} else {
562		return MXCSR_DEFAULT;
563	}
564}
565
566static bool fpu_allocated(struct fpu *fpu)
567{
568	return fpu->state != NULL;
569}
570
571static inline int fpu_alloc(struct fpu *fpu)
572{
573	if (fpu_allocated(fpu))
574		return 0;
575	fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL);
576	if (!fpu->state)
577		return -ENOMEM;
578	WARN_ON((unsigned long)fpu->state & 15);
579	return 0;
580}
581
582static inline void fpu_free(struct fpu *fpu)
583{
584	if (fpu->state) {
585		kmem_cache_free(task_xstate_cachep, fpu->state);
586		fpu->state = NULL;
587	}
588}
589
590static inline void fpu_copy(struct task_struct *dst, struct task_struct *src)
591{
592	if (use_eager_fpu()) {
593		memset(&dst->thread.fpu.state->xsave, 0, xstate_size);
594		__save_fpu(dst);
595	} else {
596		struct fpu *dfpu = &dst->thread.fpu;
597		struct fpu *sfpu = &src->thread.fpu;
598
599		unlazy_fpu(src);
600		memcpy(dfpu->state, sfpu->state, xstate_size);
601	}
602}
603
604static inline unsigned long
605alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx,
606		unsigned long *size)
607{
608	unsigned long frame_size = xstate_sigframe_size();
609
610	*buf_fx = sp = round_down(sp - frame_size, 64);
611	if (ia32_frame && use_fxsr()) {
612		frame_size += sizeof(struct i387_fsave_struct);
613		sp -= sizeof(struct i387_fsave_struct);
614	}
615
616	*size = frame_size;
617	return sp;
618}
619
620#endif
621