1/*
2 *  x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
3 *
4 *  Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include <math.h>
21#include "cpu.h"
22#include "helper.h"
23#include "qemu/aes.h"
24#include "qemu/host-utils.h"
25
26#if !defined(CONFIG_USER_ONLY)
27#include "exec/softmmu_exec.h"
28#endif /* !defined(CONFIG_USER_ONLY) */
29
30#define RC_MASK         0xc00
31#define RC_NEAR         0x000
32#define RC_DOWN         0x400
33#define RC_UP           0x800
34#define RC_CHOP         0xc00
35
36#define MAXTAN 9223372036854775808.0
37
38/* the following deal with x86 long double-precision numbers */
39#define MAXEXPD 0x7fff
40#define EXPBIAS 16383
41#define EXPD(fp)        (fp.l.upper & 0x7fff)
42#define SIGND(fp)       ((fp.l.upper) & 0x8000)
43#define MANTD(fp)       (fp.l.lower)
44#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
45
46#define floatx80_lg2 make_floatx80( 0x3ffd, 0x9a209a84fbcff799LL )
47#define floatx80_l2e make_floatx80( 0x3fff, 0xb8aa3b295c17f0bcLL )
48#define floatx80_l2t make_floatx80( 0x4000, 0xd49a784bcd1b8afeLL )
49
50static const floatx80 f15rk[7] =
51{
52    floatx80_zero,
53    floatx80_one,
54    floatx80_pi,
55    floatx80_lg2,
56    floatx80_ln2,
57    floatx80_l2e,
58    floatx80_l2t,
59};
60
61static inline void fpush(CPUX86State *env)
62{
63    env->fpstt = (env->fpstt - 1) & 7;
64    env->fptags[env->fpstt] = 0; /* validate stack entry */
65}
66
67static inline void fpop(CPUX86State *env)
68{
69    env->fptags[env->fpstt] = 1; /* invvalidate stack entry */
70    env->fpstt = (env->fpstt + 1) & 7;
71}
72
73static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr)
74{
75    floatx80 temp;
76
77    temp.low = cpu_ldq_data(env, ptr);
78    temp.high = cpu_lduw_data(env, ptr + 8);
79    return temp;
80}
81
82static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr)
83{
84    cpu_stq_data(env, ptr, f.low);
85    cpu_stw_data(env, ptr + 8, f.high);
86}
87
88#define FPUS_IE (1 << 0)
89#define FPUS_DE (1 << 1)
90#define FPUS_ZE (1 << 2)
91#define FPUS_OE (1 << 3)
92#define FPUS_UE (1 << 4)
93#define FPUS_PE (1 << 5)
94#define FPUS_SF (1 << 6)
95#define FPUS_SE (1 << 7)
96#define FPUS_B  (1 << 15)
97
98#define FPUC_EM 0x3f
99
100/* x87 FPU helpers */
101
102static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
103{
104    union {
105        float64 f64;
106        double d;
107    } u;
108
109    u.f64 = floatx80_to_float64(a, &env->fp_status);
110    return u.d;
111}
112
113static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
114{
115    union {
116        float64 f64;
117        double d;
118    } u;
119
120    u.d = a;
121    return float64_to_floatx80(u.f64, &env->fp_status);
122}
123
124static void fpu_set_exception(CPUX86State *env, int mask)
125{
126    env->fpus |= mask;
127    if (env->fpus & (~env->fpuc & FPUC_EM))
128        env->fpus |= FPUS_SE | FPUS_B;
129}
130
131static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
132{
133    if (floatx80_is_zero(b)) {
134        fpu_set_exception(env, FPUS_ZE);
135    }
136    return floatx80_div(a, b, &env->fp_status);
137}
138
139static void fpu_raise_exception(CPUX86State *env)
140{
141    if (env->cr[0] & CR0_NE_MASK) {
142        raise_exception(env, EXCP10_COPR);
143    }
144#if !defined(CONFIG_USER_ONLY)
145    else {
146        cpu_set_ferr(env);
147    }
148#endif
149}
150
151void helper_flds_FT0(CPUX86State *env, uint32_t val)
152{
153    union {
154        float32 f;
155        uint32_t i;
156    } u;
157    u.i = val;
158    FT0 = float32_to_floatx80(u.f, &env->fp_status);
159}
160
161void helper_fldl_FT0(CPUX86State *env, uint64_t val)
162{
163    union {
164        float64 f;
165        uint64_t i;
166    } u;
167    u.i = val;
168    FT0 = float64_to_floatx80(u.f, &env->fp_status);
169}
170
171void helper_fildl_FT0(CPUX86State *env, int32_t val)
172{
173    FT0 = int32_to_floatx80(val, &env->fp_status);
174}
175
176void helper_flds_ST0(CPUX86State *env, uint32_t val)
177{
178    int new_fpstt;
179    union {
180        float32 f;
181        uint32_t i;
182    } u;
183    new_fpstt = (env->fpstt - 1) & 7;
184    u.i = val;
185    env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
186    env->fpstt = new_fpstt;
187    env->fptags[new_fpstt] = 0; /* validate stack entry */
188}
189
190void helper_fldl_ST0(CPUX86State *env, uint64_t val)
191{
192    int new_fpstt;
193    union {
194        float64 f;
195        uint64_t i;
196    } u;
197    new_fpstt = (env->fpstt - 1) & 7;
198    u.i = val;
199    env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
200    env->fpstt = new_fpstt;
201    env->fptags[new_fpstt] = 0; /* validate stack entry */
202}
203
204void helper_fildl_ST0(CPUX86State *env, int32_t val)
205{
206    int new_fpstt;
207    new_fpstt = (env->fpstt - 1) & 7;
208    env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
209    env->fpstt = new_fpstt;
210    env->fptags[new_fpstt] = 0; /* validate stack entry */
211}
212
213void helper_fildll_ST0(CPUX86State *env, int64_t val)
214{
215    int new_fpstt;
216    new_fpstt = (env->fpstt - 1) & 7;
217    env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
218    env->fpstt = new_fpstt;
219    env->fptags[new_fpstt] = 0; /* validate stack entry */
220}
221
222uint32_t helper_fsts_ST0(CPUX86State *env)
223{
224    union {
225        float32 f;
226        uint32_t i;
227    } u;
228    u.f = floatx80_to_float32(ST0, &env->fp_status);
229    return u.i;
230}
231
232uint64_t helper_fstl_ST0(CPUX86State *env)
233{
234    union {
235        float64 f;
236        uint64_t i;
237    } u;
238    u.f = floatx80_to_float64(ST0, &env->fp_status);
239    return u.i;
240}
241
242int32_t helper_fist_ST0(CPUX86State *env)
243{
244    int32_t val;
245    val = floatx80_to_int32(ST0, &env->fp_status);
246    if (val != (int16_t)val)
247        val = -32768;
248    return val;
249}
250
251int32_t helper_fistl_ST0(CPUX86State *env)
252{
253    int32_t val;
254    val = floatx80_to_int32(ST0, &env->fp_status);
255    return val;
256}
257
258int64_t helper_fistll_ST0(CPUX86State *env)
259{
260    int64_t val;
261    val = floatx80_to_int64(ST0, &env->fp_status);
262    return val;
263}
264
265int32_t helper_fistt_ST0(CPUX86State *env)
266{
267    int32_t val;
268    val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
269    if (val != (int16_t)val)
270        val = -32768;
271    return val;
272}
273
274int32_t helper_fisttl_ST0(CPUX86State *env)
275{
276    int32_t val;
277    val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
278    return val;
279}
280
281int64_t helper_fisttll_ST0(CPUX86State *env)
282{
283    int64_t val;
284    val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
285    return val;
286}
287
288void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
289{
290    int new_fpstt;
291    new_fpstt = (env->fpstt - 1) & 7;
292    env->fpregs[new_fpstt].d = helper_fldt(env, ptr);
293    env->fpstt = new_fpstt;
294    env->fptags[new_fpstt] = 0; /* validate stack entry */
295}
296
297void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
298{
299    helper_fstt(env, ST0, ptr);
300}
301
302void helper_fpush(CPUX86State *env)
303{
304    fpush(env);
305}
306
307void helper_fpop(CPUX86State *env)
308{
309    fpop(env);
310}
311
312void helper_fdecstp(CPUX86State *env)
313{
314    env->fpstt = (env->fpstt - 1) & 7;
315    env->fpus &= (~0x4700);
316}
317
318void helper_fincstp(CPUX86State *env)
319{
320    env->fpstt = (env->fpstt + 1) & 7;
321    env->fpus &= (~0x4700);
322}
323
324/* FPU move */
325
326void helper_ffree_STN(CPUX86State *env, int st_index)
327{
328    env->fptags[(env->fpstt + st_index) & 7] = 1;
329}
330
331void helper_fmov_ST0_FT0(CPUX86State *env)
332{
333    ST0 = FT0;
334}
335
336void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
337{
338    FT0 = ST(st_index);
339}
340
341void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
342{
343    ST0 = ST(st_index);
344}
345
346void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
347{
348    ST(st_index) = ST0;
349}
350
351void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
352{
353    floatx80 tmp;
354    tmp = ST(st_index);
355    ST(st_index) = ST0;
356    ST0 = tmp;
357}
358
359/* FPU operations */
360
361static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
362
363void helper_fcom_ST0_FT0(CPUX86State *env)
364{
365    int ret;
366
367    ret = floatx80_compare(ST0, FT0, &env->fp_status);
368    env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
369}
370
371void helper_fucom_ST0_FT0(CPUX86State *env)
372{
373    int ret;
374
375    ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
376    env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret+ 1];
377}
378
379static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
380
381void helper_fcomi_ST0_FT0(CPUX86State *env)
382{
383    int eflags;
384    int ret;
385
386    ret = floatx80_compare(ST0, FT0, &env->fp_status);
387    eflags = helper_cc_compute_all(env, CC_OP);
388    eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
389    CC_SRC = eflags;
390}
391
392void helper_fucomi_ST0_FT0(CPUX86State *env)
393{
394    int eflags;
395    int ret;
396
397    ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
398    eflags = helper_cc_compute_all(env, CC_OP);
399    eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
400    CC_SRC = eflags;
401}
402
403void helper_fadd_ST0_FT0(CPUX86State *env)
404{
405    ST0 = floatx80_add(ST0, FT0, &env->fp_status);
406}
407
408void helper_fmul_ST0_FT0(CPUX86State *env)
409{
410    ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
411}
412
413void helper_fsub_ST0_FT0(CPUX86State *env)
414{
415    ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
416}
417
418void helper_fsubr_ST0_FT0(CPUX86State *env)
419{
420    ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
421}
422
423void helper_fdiv_ST0_FT0(CPUX86State *env)
424{
425    ST0 = helper_fdiv(env, ST0, FT0);
426}
427
428void helper_fdivr_ST0_FT0(CPUX86State *env)
429{
430    ST0 = helper_fdiv(env, FT0, ST0);
431}
432
433/* fp operations between STN and ST0 */
434
435void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
436{
437    ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
438}
439
440void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
441{
442    ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
443}
444
445void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
446{
447    ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
448}
449
450void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
451{
452    floatx80 *p;
453    p = &ST(st_index);
454    *p = floatx80_sub(ST0, *p, &env->fp_status);
455}
456
457void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
458{
459    floatx80 *p;
460    p = &ST(st_index);
461    *p = helper_fdiv(env, *p, ST0);
462}
463
464void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
465{
466    floatx80 *p;
467    p = &ST(st_index);
468    *p = helper_fdiv(env, ST0, *p);
469}
470
471/* misc FPU operations */
472void helper_fchs_ST0(CPUX86State *env)
473{
474    ST0 = floatx80_chs(ST0);
475}
476
477void helper_fabs_ST0(CPUX86State *env)
478{
479    ST0 = floatx80_abs(ST0);
480}
481
482void helper_fld1_ST0(CPUX86State *env)
483{
484    ST0 = f15rk[1];
485}
486
487void helper_fldl2t_ST0(CPUX86State *env)
488{
489    ST0 = f15rk[6];
490}
491
492void helper_fldl2e_ST0(CPUX86State *env)
493{
494    ST0 = f15rk[5];
495}
496
497void helper_fldpi_ST0(CPUX86State *env)
498{
499    ST0 = f15rk[2];
500}
501
502void helper_fldlg2_ST0(CPUX86State *env)
503{
504    ST0 = f15rk[3];
505}
506
507void helper_fldln2_ST0(CPUX86State *env)
508{
509    ST0 = f15rk[4];
510}
511
512void helper_fldz_ST0(CPUX86State *env)
513{
514    ST0 = f15rk[0];
515}
516
517void helper_fldz_FT0(CPUX86State *env)
518{
519    FT0 = f15rk[0];
520}
521
522uint32_t helper_fnstsw(CPUX86State *env)
523{
524    return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
525}
526
527uint32_t helper_fnstcw(CPUX86State *env)
528{
529    return env->fpuc;
530}
531
532static void update_fp_status(CPUX86State *env)
533{
534    int rnd_type;
535
536    /* set rounding mode */
537    switch(env->fpuc & RC_MASK) {
538    default:
539    case RC_NEAR:
540        rnd_type = float_round_nearest_even;
541        break;
542    case RC_DOWN:
543        rnd_type = float_round_down;
544        break;
545    case RC_UP:
546        rnd_type = float_round_up;
547        break;
548    case RC_CHOP:
549        rnd_type = float_round_to_zero;
550        break;
551    }
552    set_float_rounding_mode(rnd_type, &env->fp_status);
553    switch((env->fpuc >> 8) & 3) {
554    case 0:
555        rnd_type = 32;
556        break;
557    case 2:
558        rnd_type = 64;
559        break;
560    case 3:
561    default:
562        rnd_type = 80;
563        break;
564    }
565    set_floatx80_rounding_precision(rnd_type, &env->fp_status);
566}
567
568void helper_fldcw(CPUX86State *env, uint32_t val)
569{
570    env->fpuc = val;
571    update_fp_status(env);
572}
573
574void helper_fclex(CPUX86State *env)
575{
576    env->fpus &= 0x7f00;
577}
578
579void helper_fwait(CPUX86State *env)
580{
581    if (env->fpus & FPUS_SE)
582        fpu_raise_exception(env);
583}
584
585void helper_fninit(CPUX86State *env)
586{
587    env->fpus = 0;
588    env->fpstt = 0;
589    env->fpuc = 0x37f;
590    env->fptags[0] = 1;
591    env->fptags[1] = 1;
592    env->fptags[2] = 1;
593    env->fptags[3] = 1;
594    env->fptags[4] = 1;
595    env->fptags[5] = 1;
596    env->fptags[6] = 1;
597    env->fptags[7] = 1;
598}
599
600/* BCD ops */
601
602void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
603{
604    floatx80 tmp;
605    uint64_t val;
606    unsigned int v;
607    int i;
608
609    val = 0;
610    for(i = 8; i >= 0; i--) {
611        v = cpu_ldub_data(env, ptr + i);
612        val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
613    }
614    tmp = int64_to_floatx80(val, &env->fp_status);
615    if (cpu_ldub_data(env, ptr + 9) & 0x80) {
616        floatx80_chs(tmp);
617    }
618    fpush(env);
619    ST0 = tmp;
620}
621
622void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
623{
624    int v;
625    target_ulong mem_ref, mem_end;
626    int64_t val;
627
628    val = floatx80_to_int64(ST0, &env->fp_status);
629    mem_ref = ptr;
630    mem_end = mem_ref + 9;
631    if (val < 0) {
632        cpu_stb_data(env, mem_end, 0x80);
633        val = -val;
634    } else {
635        cpu_stb_data(env, mem_end, 0x00);
636    }
637    while (mem_ref < mem_end) {
638        if (val == 0)
639            break;
640        v = val % 100;
641        val = val / 100;
642        v = ((v / 10) << 4) | (v % 10);
643        cpu_stb_data(env, mem_ref++, v);
644    }
645    while (mem_ref < mem_end) {
646        cpu_stb_data(env, mem_ref++, 0);
647    }
648}
649
650void helper_f2xm1(CPUX86State *env)
651{
652    double val = floatx80_to_double(env, ST0);
653    val = pow(2.0, val) - 1.0;
654    ST0 = double_to_floatx80(env, val);
655}
656
657void helper_fyl2x(CPUX86State *env)
658{
659    double fptemp = floatx80_to_double(env, ST0);
660
661    if (fptemp>0.0){
662        fptemp = log(fptemp)/log(2.0);   /* log2(ST) */
663        fptemp *= floatx80_to_double(env, ST1);
664        ST1 = double_to_floatx80(env, fptemp);
665        fpop(env);
666    } else {
667        env->fpus &= (~0x4700);
668        env->fpus |= 0x400;
669    }
670}
671
672void helper_fptan(CPUX86State *env)
673{
674    double fptemp = floatx80_to_double(env, ST0);
675
676    if((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
677        env->fpus |= 0x400;
678    } else {
679        fptemp = tan(fptemp);
680        ST0 = double_to_floatx80(env, fptemp);
681        fpush(env);
682        ST0 = floatx80_one;
683        env->fpus &= (~0x400);  /* C2 <-- 0 */
684        /* the above code is for  |arg| < 2**52 only */
685    }
686}
687
688void helper_fpatan(CPUX86State *env)
689{
690    double fptemp, fpsrcop;
691
692    fpsrcop = floatx80_to_double(env, ST1);
693    fptemp = floatx80_to_double(env, ST0);
694    ST1 = double_to_floatx80(env, atan2(fpsrcop,fptemp));
695    fpop(env);
696}
697
698void helper_fxtract(CPUX86State *env)
699{
700    CPU_LDoubleU temp;
701    unsigned int expdif;
702
703    temp.d = ST0;
704    expdif = EXPD(temp) - EXPBIAS;
705    /*DP exponent bias*/
706    ST0 = int32_to_floatx80(expdif, &env->fp_status);
707    fpush(env);
708    BIASEXPONENT(temp);
709    ST0 = temp.d;
710}
711
712void helper_fprem1(CPUX86State *env)
713{
714    double st0, st1, dblq, fpsrcop, fptemp;
715    CPU_LDoubleU fpsrcop1, fptemp1;
716    int expdif;
717    signed long long int q;
718
719    st0 = floatx80_to_double(env, ST0);
720    st1 = floatx80_to_double(env, ST1);
721
722    if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
723        ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
724        env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
725        return;
726    }
727
728    fpsrcop = st0;
729    fptemp = st1;
730    fpsrcop1.d = ST0;
731    fptemp1.d = ST1;
732    expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
733
734    if (expdif < 0) {
735        /* optimisation? taken from the AMD docs */
736        env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
737        /* ST0 is unchanged */
738        return;
739    }
740
741    if (expdif < 53) {
742        dblq = fpsrcop / fptemp;
743        /* round dblq towards nearest integer */
744        dblq = rint(dblq);
745        st0 = fpsrcop - fptemp * dblq;
746
747        /* convert dblq to q by truncating towards zero */
748        if (dblq < 0.0)
749           q = (signed long long int)(-dblq);
750        else
751           q = (signed long long int)dblq;
752
753        env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
754                                /* (C0,C3,C1) <-- (q2,q1,q0) */
755        env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
756        env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
757        env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
758    } else {
759        env->fpus |= 0x400;  /* C2 <-- 1 */
760        fptemp = pow(2.0, expdif - 50);
761        fpsrcop = (st0 / st1) / fptemp;
762        /* fpsrcop = integer obtained by chopping */
763        fpsrcop = (fpsrcop < 0.0) ?
764                  -(floor(fabs(fpsrcop))) : floor(fpsrcop);
765        st0 -= (st1 * fpsrcop * fptemp);
766    }
767    ST0 = double_to_floatx80(env, st0);
768}
769
770void helper_fprem(CPUX86State *env)
771{
772    double st0, st1, dblq, fpsrcop, fptemp;
773    CPU_LDoubleU fpsrcop1, fptemp1;
774    int expdif;
775    signed long long int q;
776
777    st0 = floatx80_to_double(env, ST0);
778    st1 = floatx80_to_double(env, ST1);
779
780    if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
781       ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
782       env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
783       return;
784    }
785
786    fpsrcop = st0;
787    fptemp = st1;
788    fpsrcop1.d = ST0;
789    fptemp1.d = ST1;
790    expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
791
792    if (expdif < 0) {
793        /* optimisation? taken from the AMD docs */
794        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
795        /* ST0 is unchanged */
796        return;
797    }
798
799    if (expdif < 53) {
800        dblq = fpsrcop / fptemp; /* ST0 / ST1*/;
801        /* round dblq towards zero */
802        dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
803        st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
804
805        /* convert dblq to q by truncating towards zero */
806        if (dblq < 0.0) {
807           q = (signed long long int)(-dblq);
808        } else {
809           q = (signed long long int)dblq;
810        }
811
812        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
813                              /* (C0,C3,C1) <-- (q2,q1,q0) */
814        env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
815        env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
816        env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
817    } else {
818        int N = 32 + (expdif % 32); /* as per AMD docs */
819        env->fpus |= 0x400;  /* C2 <-- 1 */
820        fptemp = pow(2.0, (double)(expdif - N));
821        fpsrcop = (st0 / st1) / fptemp;
822        /* fpsrcop = integer obtained by chopping */
823        fpsrcop = (fpsrcop < 0.0) ?
824                  -(floor(fabs(fpsrcop))) : floor(fpsrcop);
825        st0 -= (st1 * fpsrcop * fptemp);
826    }
827    ST0 = double_to_floatx80(env, st0);
828}
829
830void helper_fyl2xp1(CPUX86State *env)
831{
832    double fptemp = floatx80_to_double(env, ST0);
833
834    if ((fptemp+1.0)>0.0) {
835        fptemp = log(fptemp+1.0) / log(2.0); /* log2(ST+1.0) */
836        fptemp *= floatx80_to_double(env, ST1);
837        ST1 = double_to_floatx80(env, fptemp);
838        fpop(env);
839    } else {
840        env->fpus &= (~0x4700);
841        env->fpus |= 0x400;
842    }
843}
844
845void helper_fsqrt(CPUX86State *env)
846{
847    double fptemp = floatx80_to_double(env, ST0);
848
849    if (fptemp<0.0) {
850        env->fpus &= (~0x4700);  /* (C3,C2,C1,C0) <-- 0000 */
851        env->fpus |= 0x400;
852    }
853    ST0 = floatx80_sqrt(ST0, &env->fp_status);
854}
855
856void helper_fsincos(CPUX86State *env)
857{
858    double fptemp = floatx80_to_double(env, ST0);
859
860    if ((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
861        env->fpus |= 0x400;
862    } else {
863        ST0 = double_to_floatx80(env, sin(fptemp));
864        fpush(env);
865        ST0 = double_to_floatx80(env, cos(fptemp));
866        env->fpus &= (~0x400);  /* C2 <-- 0 */
867        /* the above code is for  |arg| < 2**63 only */
868    }
869}
870
871void helper_frndint(CPUX86State *env)
872{
873    ST0 = floatx80_round_to_int(ST0, &env->fp_status);
874}
875
876void helper_fscale(CPUX86State *env)
877{
878    double st0 = floatx80_to_double(env, ST0);
879    double st1 = floatx80_to_double(env, ST1);
880    double val = ldexp(st0, (int)st1);
881    ST0 = double_to_floatx80(env, val);
882}
883
884void helper_fsin(CPUX86State *env)
885{
886    double fptemp = floatx80_to_double(env, ST0);
887
888    if ((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
889        env->fpus |= 0x400;
890    } else {
891        ST0 = double_to_floatx80(env, sin(fptemp));
892        env->fpus &= (~0x400);  /* C2 <-- 0 */
893        /* the above code is for  |arg| < 2**53 only */
894    }
895}
896
897void helper_fcos(CPUX86State *env)
898{
899    double fptemp = floatx80_to_double(env, ST0);
900
901    if((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
902        env->fpus |= 0x400;
903    } else {
904        ST0 = double_to_floatx80(env, cos(fptemp));
905        env->fpus &= (~0x400);  /* C2 <-- 0 */
906        /* the above code is for  |arg5 < 2**63 only */
907    }
908}
909
910void helper_fxam_ST0(CPUX86State *env)
911{
912    CPU_LDoubleU temp;
913    int expdif;
914
915    temp.d = ST0;
916
917    env->fpus &= (~0x4700);  /* (C3,C2,C1,C0) <-- 0000 */
918    if (SIGND(temp))
919        env->fpus |= 0x200; /* C1 <-- 1 */
920
921    /* XXX: test fptags too */
922    expdif = EXPD(temp);
923    if (expdif == MAXEXPD) {
924        if (MANTD(temp) == 0x8000000000000000ULL) {
925            env->fpus |=  0x500 /*Infinity*/;
926        } else {
927            env->fpus |=  0x100 /*NaN*/;
928        }
929    } else if (expdif == 0) {
930        if (MANTD(temp) == 0) {
931            env->fpus |=  0x4000 /*Zero*/;
932        } else {
933            env->fpus |= 0x4400 /*Denormal*/;
934        }
935    } else {
936        env->fpus |= 0x400;
937    }
938}
939
940void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
941{
942    int fpus, fptag, exp, i;
943    uint64_t mant;
944    CPU_LDoubleU tmp;
945
946    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
947    fptag = 0;
948    for (i=7; i>=0; i--) {
949        fptag <<= 2;
950        if (env->fptags[i]) {
951            fptag |= 3;
952        } else {
953            tmp.d = env->fpregs[i].d;
954            exp = EXPD(tmp);
955            mant = MANTD(tmp);
956            if (exp == 0 && mant == 0) {
957                /* zero */
958                fptag |= 1;
959            } else if (exp == 0 || exp == MAXEXPD
960                       || (mant & (1LL << 63)) == 0) {
961                /* NaNs, infinity, denormal */
962                fptag |= 2;
963            }
964        }
965    }
966    if (data32) {
967        /* 32 bit */
968        cpu_stl_data(env, ptr, env->fpuc);
969        cpu_stl_data(env, ptr + 4, fpus);
970        cpu_stl_data(env, ptr + 8, fptag);
971        cpu_stl_data(env, ptr + 12, 0); /* fpip */
972        cpu_stl_data(env, ptr + 16, 0); /* fpcs */
973        cpu_stl_data(env, ptr + 20, 0); /* fpoo */
974        cpu_stl_data(env, ptr + 24, 0); /* fpos */
975    } else {
976        /* 16 bit */
977        cpu_stw_data(env, ptr, env->fpuc);
978        cpu_stw_data(env, ptr + 2, fpus);
979        cpu_stw_data(env, ptr + 4, fptag);
980        cpu_stw_data(env, ptr + 6, 0);
981        cpu_stw_data(env, ptr + 8, 0);
982        cpu_stw_data(env, ptr + 10, 0);
983        cpu_stw_data(env, ptr + 12, 0);
984    }
985}
986
987void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
988{
989    int i, fpus, fptag;
990
991    if (data32) {
992        env->fpuc = cpu_lduw_data(env, ptr);
993        fpus = cpu_lduw_data(env, ptr + 4);
994        fptag = cpu_lduw_data(env, ptr + 8);
995    }
996    else {
997        env->fpuc = cpu_lduw_data(env, ptr);
998        fpus = cpu_lduw_data(env, ptr + 2);
999        fptag = cpu_lduw_data(env, ptr + 4);
1000    }
1001    env->fpstt = (fpus >> 11) & 7;
1002    env->fpus = fpus & ~0x3800;
1003    for(i = 0;i < 8; i++) {
1004        env->fptags[i] = ((fptag & 3) == 3);
1005        fptag >>= 2;
1006    }
1007}
1008
1009void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
1010{
1011    floatx80 tmp;
1012    int i;
1013
1014    helper_fstenv(env, ptr, data32);
1015
1016    ptr += (14 << data32);
1017    for(i = 0;i < 8; i++) {
1018        tmp = ST(i);
1019        helper_fstt(env, tmp, ptr);
1020        ptr += 10;
1021    }
1022
1023    /* fninit */
1024    env->fpus = 0;
1025    env->fpstt = 0;
1026    env->fpuc = 0x37f;
1027    env->fptags[0] = 1;
1028    env->fptags[1] = 1;
1029    env->fptags[2] = 1;
1030    env->fptags[3] = 1;
1031    env->fptags[4] = 1;
1032    env->fptags[5] = 1;
1033    env->fptags[6] = 1;
1034    env->fptags[7] = 1;
1035}
1036
1037void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
1038{
1039    floatx80 tmp;
1040    int i;
1041
1042    helper_fldenv(env, ptr, data32);
1043    ptr += (14 << data32);
1044
1045    for(i = 0;i < 8; i++) {
1046        tmp = helper_fldt(env, ptr);
1047        ST(i) = tmp;
1048        ptr += 10;
1049    }
1050}
1051
1052void helper_fxsave(CPUX86State *env, target_ulong ptr, int data64)
1053{
1054    int fpus, fptag, i, nb_xmm_regs;
1055    floatx80 tmp;
1056    target_ulong addr;
1057
1058    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1059    fptag = 0;
1060    for(i = 0; i < 8; i++) {
1061        fptag |= (env->fptags[i] << i);
1062    }
1063    cpu_stw_data(env, ptr, env->fpuc);
1064    cpu_stw_data(env, ptr + 2, fpus);
1065    cpu_stw_data(env, ptr + 4, fptag ^ 0xff);
1066#ifdef TARGET_X86_64
1067    if (data64) {
1068        cpu_stq_data(env, ptr + 0x08, 0); /* rip */
1069        cpu_stq_data(env, ptr + 0x10, 0); /* rdp */
1070    } else
1071#endif
1072    {
1073        cpu_stl_data(env, ptr + 0x08, 0); /* eip */
1074        cpu_stl_data(env, ptr + 0x0c, 0); /* sel  */
1075        cpu_stl_data(env, ptr + 0x10, 0); /* dp */
1076        cpu_stl_data(env, ptr + 0x14, 0); /* sel  */
1077    }
1078
1079    addr = ptr + 0x20;
1080    for(i = 0;i < 8; i++) {
1081        tmp = ST(i);
1082        helper_fstt(env, tmp, addr);
1083        addr += 16;
1084    }
1085
1086    if (env->cr[4] & CR4_OSFXSR_MASK) {
1087        /* XXX: finish it */
1088        cpu_stl_data(env, ptr + 0x18, env->mxcsr); /* mxcsr */
1089        cpu_stl_data(env, ptr + 0x1c, 0x0000ffff); /* mxcsr_mask */
1090        if (env->hflags & HF_CS64_MASK)
1091            nb_xmm_regs = 16;
1092        else
1093            nb_xmm_regs = 8;
1094        addr = ptr + 0xa0;
1095        /* Fast FXSAVE leaves out the XMM registers */
1096        if (!(env->efer & MSR_EFER_FFXSR)
1097          || (env->hflags & HF_CPL_MASK)
1098          || !(env->hflags & HF_LMA_MASK)) {
1099            for(i = 0; i < nb_xmm_regs; i++) {
1100                cpu_stq_data(env, addr, env->xmm_regs[i].XMM_Q(0));
1101                cpu_stq_data(env, addr + 8, env->xmm_regs[i].XMM_Q(1));
1102                addr += 16;
1103            }
1104        }
1105    }
1106}
1107
1108void helper_fxrstor(CPUX86State *env, target_ulong ptr, int data64)
1109{
1110    int i, fpus, fptag, nb_xmm_regs;
1111    floatx80 tmp;
1112    target_ulong addr;
1113
1114    env->fpuc = cpu_lduw_data(env, ptr);
1115    fpus = cpu_lduw_data(env, ptr + 2);
1116    fptag = cpu_lduw_data(env, ptr + 4);
1117    env->fpstt = (fpus >> 11) & 7;
1118    env->fpus = fpus & ~0x3800;
1119    fptag ^= 0xff;
1120    for(i = 0;i < 8; i++) {
1121        env->fptags[i] = ((fptag >> i) & 1);
1122    }
1123
1124    addr = ptr + 0x20;
1125    for(i = 0;i < 8; i++) {
1126        tmp = helper_fldt(env, addr);
1127        ST(i) = tmp;
1128        addr += 16;
1129    }
1130
1131    if (env->cr[4] & CR4_OSFXSR_MASK) {
1132        /* XXX: finish it */
1133        env->mxcsr = cpu_ldl_data(env, ptr + 0x18);
1134        //ldl(ptr + 0x1c);
1135        if (env->hflags & HF_CS64_MASK)
1136            nb_xmm_regs = 16;
1137        else
1138            nb_xmm_regs = 8;
1139        addr = ptr + 0xa0;
1140        /* Fast FXRESTORE leaves out the XMM registers */
1141        if (!(env->efer & MSR_EFER_FFXSR)
1142          || (env->hflags & HF_CPL_MASK)
1143          || !(env->hflags & HF_LMA_MASK)) {
1144            for(i = 0; i < nb_xmm_regs; i++) {
1145                env->xmm_regs[i].XMM_Q(0) = cpu_ldq_data(env, addr);
1146                env->xmm_regs[i].XMM_Q(1) = cpu_ldq_data(env, addr + 8);
1147                addr += 16;
1148            }
1149        }
1150    }
1151}
1152
1153void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
1154{
1155    CPU_LDoubleU temp;
1156
1157    temp.d = f;
1158    *pmant = temp.l.lower;
1159    *pexp = temp.l.upper;
1160}
1161
1162floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper)
1163{
1164    CPU_LDoubleU temp;
1165
1166    temp.l.upper = upper;
1167    temp.l.lower = mant;
1168    return temp.d;
1169}
1170
1171/* MMX/SSE */
1172/* XXX: optimize by storing fptt and fptags in the static cpu state */
1173void helper_enter_mmx(CPUX86State *env)
1174{
1175    env->fpstt = 0;
1176    memset(env->fptags, 0, sizeof(env->fptags));
1177}
1178
1179void helper_emms(CPUX86State *env)
1180{
1181    /* set to empty state */
1182    memset(env->fptags, 1, sizeof(env->fptags));
1183}
1184
1185/* XXX: suppress */
1186void helper_movq(CPUX86State *env, void *d, void *s)
1187{
1188    *(uint64_t *)d = *(uint64_t *)s;
1189}
1190
1191#define SHIFT 0
1192#include "ops_sse.h"
1193
1194#define SHIFT 1
1195#include "ops_sse.h"
1196