1/*
2 *  KQEMU support
3 *
4 *  Copyright (c) 2005-2008 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA  02110-1301 USA
19 */
20#include "config.h"
21#ifdef _WIN32
22#include <windows.h>
23#include <winioctl.h>
24#else
25#include <sys/types.h>
26#include <sys/mman.h>
27#include <sys/ioctl.h>
28#endif
29#ifdef CONFIG_SOLARIS
30#include <sys/ioccom.h>
31#endif
32#include <stdlib.h>
33#include <stdio.h>
34#include <stdarg.h>
35#include <string.h>
36#include <errno.h>
37#include <unistd.h>
38#include <inttypes.h>
39
40#include "cpu.h"
41#include "exec-all.h"
42#include "qemu-common.h"
43
44#ifdef CONFIG_KQEMU
45
46#define DEBUG
47//#define PROFILE
48
49
50#ifdef DEBUG
51#  define LOG_INT(...) qemu_log_mask(CPU_LOG_INT, ## __VA_ARGS__)
52#  define LOG_INT_STATE(env) log_cpu_state_mask(CPU_LOG_INT, (env), 0)
53#else
54#  define LOG_INT(...) do { } while (0)
55#  define LOG_INT_STATE(env) do { } while (0)
56#endif
57
58#include <unistd.h>
59#include <fcntl.h>
60#include "kqemu.h"
61
62#ifdef _WIN32
63#define KQEMU_DEVICE "\\\\.\\kqemu"
64#else
65#define KQEMU_DEVICE "/dev/kqemu"
66#endif
67
68static void qpi_init(void);
69
70#ifdef _WIN32
71#define KQEMU_INVALID_FD INVALID_HANDLE_VALUE
72HANDLE kqemu_fd = KQEMU_INVALID_FD;
73#define kqemu_closefd(x) CloseHandle(x)
74#else
75#define KQEMU_INVALID_FD -1
76int kqemu_fd = KQEMU_INVALID_FD;
77#define kqemu_closefd(x) close(x)
78#endif
79
80/* 0 = not allowed
81   1 = user kqemu
82   2 = kernel kqemu
83*/
84int kqemu_allowed = 1;
85uint64_t *pages_to_flush;
86unsigned int nb_pages_to_flush;
87uint64_t *ram_pages_to_update;
88unsigned int nb_ram_pages_to_update;
89uint64_t *modified_ram_pages;
90unsigned int nb_modified_ram_pages;
91uint8_t *modified_ram_pages_table;
92int qpi_io_memory;
93uint32_t kqemu_comm_base; /* physical address of the QPI communication page */
94ram_addr_t kqemu_phys_ram_size;
95uint8_t *kqemu_phys_ram_base;
96
97#define cpuid(index, eax, ebx, ecx, edx) \
98  asm volatile ("cpuid" \
99                : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) \
100                : "0" (index))
101
102#ifdef __x86_64__
103static int is_cpuid_supported(void)
104{
105    return 1;
106}
107#else
108static int is_cpuid_supported(void)
109{
110    int v0, v1;
111    asm volatile ("pushf\n"
112                  "popl %0\n"
113                  "movl %0, %1\n"
114                  "xorl $0x00200000, %0\n"
115                  "pushl %0\n"
116                  "popf\n"
117                  "pushf\n"
118                  "popl %0\n"
119                  : "=a" (v0), "=d" (v1)
120                  :
121                  : "cc");
122    return (v0 != v1);
123}
124#endif
125
126static void kqemu_update_cpuid(CPUState *env)
127{
128    int critical_features_mask, features, ext_features, ext_features_mask;
129    uint32_t eax, ebx, ecx, edx;
130
131    /* the following features are kept identical on the host and
132       target cpus because they are important for user code. Strictly
133       speaking, only SSE really matters because the OS must support
134       it if the user code uses it. */
135    critical_features_mask =
136        CPUID_CMOV | CPUID_CX8 |
137        CPUID_FXSR | CPUID_MMX | CPUID_SSE |
138        CPUID_SSE2 | CPUID_SEP;
139    ext_features_mask = CPUID_EXT_SSE3 | CPUID_EXT_MONITOR;
140    if (!is_cpuid_supported()) {
141        features = 0;
142        ext_features = 0;
143    } else {
144        cpuid(1, eax, ebx, ecx, edx);
145        features = edx;
146        ext_features = ecx;
147    }
148#ifdef __x86_64__
149    /* NOTE: on x86_64 CPUs, SYSENTER is not supported in
150       compatibility mode, so in order to have the best performances
151       it is better not to use it */
152    features &= ~CPUID_SEP;
153#endif
154    env->cpuid_features = (env->cpuid_features & ~critical_features_mask) |
155        (features & critical_features_mask);
156    env->cpuid_ext_features = (env->cpuid_ext_features & ~ext_features_mask) |
157        (ext_features & ext_features_mask);
158    /* XXX: we could update more of the target CPUID state so that the
159       non accelerated code sees exactly the same CPU features as the
160       accelerated code */
161}
162
163int kqemu_init(CPUState *env)
164{
165    struct kqemu_init kinit;
166    int ret, version;
167#ifdef _WIN32
168    DWORD temp;
169#endif
170
171    if (!kqemu_allowed)
172        return -1;
173
174#ifdef _WIN32
175    kqemu_fd = CreateFile(KQEMU_DEVICE, GENERIC_WRITE | GENERIC_READ,
176                          FILE_SHARE_READ | FILE_SHARE_WRITE,
177                          NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL,
178                          NULL);
179    if (kqemu_fd == KQEMU_INVALID_FD) {
180        fprintf(stderr, "Could not open '%s' - QEMU acceleration layer not activated: %lu\n",
181                KQEMU_DEVICE, GetLastError());
182        return -1;
183    }
184#else
185    kqemu_fd = open(KQEMU_DEVICE, O_RDWR);
186    if (kqemu_fd == KQEMU_INVALID_FD) {
187        fprintf(stderr, "Could not open '%s' - QEMU acceleration layer not activated: %s\n",
188                KQEMU_DEVICE, strerror(errno));
189        return -1;
190    }
191#endif
192    version = 0;
193#ifdef _WIN32
194    DeviceIoControl(kqemu_fd, KQEMU_GET_VERSION, NULL, 0,
195                    &version, sizeof(version), &temp, NULL);
196#else
197    ioctl(kqemu_fd, KQEMU_GET_VERSION, &version);
198#endif
199    if (version != KQEMU_VERSION) {
200        fprintf(stderr, "Version mismatch between kqemu module and qemu (%08x %08x) - disabling kqemu use\n",
201                version, KQEMU_VERSION);
202        goto fail;
203    }
204
205    pages_to_flush = qemu_vmalloc(KQEMU_MAX_PAGES_TO_FLUSH *
206                                  sizeof(uint64_t));
207    if (!pages_to_flush)
208        goto fail;
209
210    ram_pages_to_update = qemu_vmalloc(KQEMU_MAX_RAM_PAGES_TO_UPDATE *
211                                       sizeof(uint64_t));
212    if (!ram_pages_to_update)
213        goto fail;
214
215    modified_ram_pages = qemu_vmalloc(KQEMU_MAX_MODIFIED_RAM_PAGES *
216                                      sizeof(uint64_t));
217    if (!modified_ram_pages)
218        goto fail;
219    modified_ram_pages_table =
220        qemu_mallocz(kqemu_phys_ram_size >> TARGET_PAGE_BITS);
221    if (!modified_ram_pages_table)
222        goto fail;
223
224    memset(&kinit, 0, sizeof(kinit)); /* set the paddings to zero */
225    kinit.ram_base = kqemu_phys_ram_base;
226    kinit.ram_size = kqemu_phys_ram_size;
227    kinit.ram_dirty = phys_ram_dirty;
228    kinit.pages_to_flush = pages_to_flush;
229    kinit.ram_pages_to_update = ram_pages_to_update;
230    kinit.modified_ram_pages = modified_ram_pages;
231#ifdef _WIN32
232    ret = DeviceIoControl(kqemu_fd, KQEMU_INIT, &kinit, sizeof(kinit),
233                          NULL, 0, &temp, NULL) == TRUE ? 0 : -1;
234#else
235    ret = ioctl(kqemu_fd, KQEMU_INIT, &kinit);
236#endif
237    if (ret < 0) {
238        fprintf(stderr, "Error %d while initializing QEMU acceleration layer - disabling it for now\n", ret);
239    fail:
240        kqemu_closefd(kqemu_fd);
241        kqemu_fd = KQEMU_INVALID_FD;
242        return -1;
243    }
244    kqemu_update_cpuid(env);
245    env->kqemu_enabled = kqemu_allowed;
246    nb_pages_to_flush = 0;
247    nb_ram_pages_to_update = 0;
248
249    qpi_init();
250    return 0;
251}
252
253void kqemu_flush_page(CPUState *env, target_ulong addr)
254{
255    LOG_INT("kqemu_flush_page: addr=" TARGET_FMT_lx "\n", addr);
256    if (nb_pages_to_flush >= KQEMU_MAX_PAGES_TO_FLUSH)
257        nb_pages_to_flush = KQEMU_FLUSH_ALL;
258    else
259        pages_to_flush[nb_pages_to_flush++] = addr;
260}
261
262void kqemu_flush(CPUState *env, int global)
263{
264    LOG_INT("kqemu_flush:\n");
265    nb_pages_to_flush = KQEMU_FLUSH_ALL;
266}
267
268void kqemu_set_notdirty(CPUState *env, ram_addr_t ram_addr)
269{
270    LOG_INT("kqemu_set_notdirty: addr=%08lx\n",
271                (unsigned long)ram_addr);
272    /* we only track transitions to dirty state */
273    if (phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS] != 0xff)
274        return;
275    if (nb_ram_pages_to_update >= KQEMU_MAX_RAM_PAGES_TO_UPDATE)
276        nb_ram_pages_to_update = KQEMU_RAM_PAGES_UPDATE_ALL;
277    else
278        ram_pages_to_update[nb_ram_pages_to_update++] = ram_addr;
279}
280
281static void kqemu_reset_modified_ram_pages(void)
282{
283    int i;
284    unsigned long page_index;
285
286    for(i = 0; i < nb_modified_ram_pages; i++) {
287        page_index = modified_ram_pages[i] >> TARGET_PAGE_BITS;
288        modified_ram_pages_table[page_index] = 0;
289    }
290    nb_modified_ram_pages = 0;
291}
292
293void kqemu_modify_page(CPUState *env, ram_addr_t ram_addr)
294{
295    unsigned long page_index;
296    int ret;
297#ifdef _WIN32
298    DWORD temp;
299#endif
300
301    page_index = ram_addr >> TARGET_PAGE_BITS;
302    if (!modified_ram_pages_table[page_index]) {
303#if 0
304        printf("%d: modify_page=%08lx\n", nb_modified_ram_pages, ram_addr);
305#endif
306        modified_ram_pages_table[page_index] = 1;
307        modified_ram_pages[nb_modified_ram_pages++] = ram_addr;
308        if (nb_modified_ram_pages >= KQEMU_MAX_MODIFIED_RAM_PAGES) {
309            /* flush */
310#ifdef _WIN32
311            ret = DeviceIoControl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES,
312                                  &nb_modified_ram_pages,
313                                  sizeof(nb_modified_ram_pages),
314                                  NULL, 0, &temp, NULL);
315#else
316            ret = ioctl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES,
317                        &nb_modified_ram_pages);
318#endif
319            kqemu_reset_modified_ram_pages();
320        }
321    }
322}
323
324void kqemu_set_phys_mem(uint64_t start_addr, ram_addr_t size,
325                        ram_addr_t phys_offset)
326{
327    struct kqemu_phys_mem kphys_mem1, *kphys_mem = &kphys_mem1;
328    uint64_t end;
329    int ret, io_index;
330
331    end = (start_addr + size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
332    start_addr &= TARGET_PAGE_MASK;
333    kphys_mem->phys_addr = start_addr;
334    kphys_mem->size = end - start_addr;
335    kphys_mem->ram_addr = phys_offset & TARGET_PAGE_MASK;
336    io_index = phys_offset & ~TARGET_PAGE_MASK;
337    switch(io_index) {
338    case IO_MEM_RAM:
339        kphys_mem->io_index = KQEMU_IO_MEM_RAM;
340        break;
341    case IO_MEM_ROM:
342        kphys_mem->io_index = KQEMU_IO_MEM_ROM;
343        break;
344    default:
345        if (qpi_io_memory == io_index) {
346            kphys_mem->io_index = KQEMU_IO_MEM_COMM;
347        } else {
348            kphys_mem->io_index = KQEMU_IO_MEM_UNASSIGNED;
349        }
350        break;
351    }
352#ifdef _WIN32
353    {
354        DWORD temp;
355        ret = DeviceIoControl(kqemu_fd, KQEMU_SET_PHYS_MEM,
356                              kphys_mem, sizeof(*kphys_mem),
357                              NULL, 0, &temp, NULL) == TRUE ? 0 : -1;
358    }
359#else
360    ret = ioctl(kqemu_fd, KQEMU_SET_PHYS_MEM, kphys_mem);
361#endif
362    if (ret < 0) {
363        fprintf(stderr, "kqemu: KQEMU_SET_PHYS_PAGE error=%d: start_addr=0x%016" PRIx64 " size=0x%08lx phys_offset=0x%08lx\n",
364                ret, start_addr,
365                (unsigned long)size, (unsigned long)phys_offset);
366    }
367}
368
369struct fpstate {
370    uint16_t fpuc;
371    uint16_t dummy1;
372    uint16_t fpus;
373    uint16_t dummy2;
374    uint16_t fptag;
375    uint16_t dummy3;
376
377    uint32_t fpip;
378    uint32_t fpcs;
379    uint32_t fpoo;
380    uint32_t fpos;
381    uint8_t fpregs1[8 * 10];
382};
383
384struct fpxstate {
385    uint16_t fpuc;
386    uint16_t fpus;
387    uint16_t fptag;
388    uint16_t fop;
389    uint32_t fpuip;
390    uint16_t cs_sel;
391    uint16_t dummy0;
392    uint32_t fpudp;
393    uint16_t ds_sel;
394    uint16_t dummy1;
395    uint32_t mxcsr;
396    uint32_t mxcsr_mask;
397    uint8_t fpregs1[8 * 16];
398    uint8_t xmm_regs[16 * 16];
399    uint8_t dummy2[96];
400};
401
402static struct fpxstate fpx1 __attribute__((aligned(16)));
403
404static void restore_native_fp_frstor(CPUState *env)
405{
406    int fptag, i, j;
407    struct fpstate fp1, *fp = &fp1;
408
409    fp->fpuc = env->fpuc;
410    fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
411    fptag = 0;
412    for (i=7; i>=0; i--) {
413	fptag <<= 2;
414	if (env->fptags[i]) {
415            fptag |= 3;
416        } else {
417            /* the FPU automatically computes it */
418        }
419    }
420    fp->fptag = fptag;
421    j = env->fpstt;
422    for(i = 0;i < 8; i++) {
423        memcpy(&fp->fpregs1[i * 10], &env->fpregs[j].d, 10);
424        j = (j + 1) & 7;
425    }
426    asm volatile ("frstor %0" : "=m" (*fp));
427}
428
429static void save_native_fp_fsave(CPUState *env)
430{
431    int fptag, i, j;
432    uint16_t fpuc;
433    struct fpstate fp1, *fp = &fp1;
434
435    asm volatile ("fsave %0" : : "m" (*fp));
436    env->fpuc = fp->fpuc;
437    env->fpstt = (fp->fpus >> 11) & 7;
438    env->fpus = fp->fpus & ~0x3800;
439    fptag = fp->fptag;
440    for(i = 0;i < 8; i++) {
441        env->fptags[i] = ((fptag & 3) == 3);
442        fptag >>= 2;
443    }
444    j = env->fpstt;
445    for(i = 0;i < 8; i++) {
446        memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 10], 10);
447        j = (j + 1) & 7;
448    }
449    /* we must restore the default rounding state */
450    fpuc = 0x037f | (env->fpuc & (3 << 10));
451    asm volatile("fldcw %0" : : "m" (fpuc));
452}
453
454static void restore_native_fp_fxrstor(CPUState *env)
455{
456    struct fpxstate *fp = &fpx1;
457    int i, j, fptag;
458
459    fp->fpuc = env->fpuc;
460    fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
461    fptag = 0;
462    for(i = 0; i < 8; i++)
463        fptag |= (env->fptags[i] << i);
464    fp->fptag = fptag ^ 0xff;
465
466    j = env->fpstt;
467    for(i = 0;i < 8; i++) {
468        memcpy(&fp->fpregs1[i * 16], &env->fpregs[j].d, 10);
469        j = (j + 1) & 7;
470    }
471    if (env->cpuid_features & CPUID_SSE) {
472        fp->mxcsr = env->mxcsr;
473        /* XXX: check if DAZ is not available */
474        fp->mxcsr_mask = 0xffff;
475        memcpy(fp->xmm_regs, env->xmm_regs, CPU_NB_REGS * 16);
476    }
477    asm volatile ("fxrstor %0" : "=m" (*fp));
478}
479
480static void save_native_fp_fxsave(CPUState *env)
481{
482    struct fpxstate *fp = &fpx1;
483    int fptag, i, j;
484    uint16_t fpuc;
485
486    asm volatile ("fxsave %0" : : "m" (*fp));
487    env->fpuc = fp->fpuc;
488    env->fpstt = (fp->fpus >> 11) & 7;
489    env->fpus = fp->fpus & ~0x3800;
490    fptag = fp->fptag ^ 0xff;
491    for(i = 0;i < 8; i++) {
492        env->fptags[i] = (fptag >> i) & 1;
493    }
494    j = env->fpstt;
495    for(i = 0;i < 8; i++) {
496        memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 16], 10);
497        j = (j + 1) & 7;
498    }
499    if (env->cpuid_features & CPUID_SSE) {
500        env->mxcsr = fp->mxcsr;
501        memcpy(env->xmm_regs, fp->xmm_regs, CPU_NB_REGS * 16);
502    }
503
504    /* we must restore the default rounding state */
505    asm volatile ("fninit");
506    fpuc = 0x037f | (env->fpuc & (3 << 10));
507    asm volatile("fldcw %0" : : "m" (fpuc));
508}
509
510static int do_syscall(CPUState *env,
511                      struct kqemu_cpu_state *kenv)
512{
513    int selector;
514
515    selector = (env->star >> 32) & 0xffff;
516#ifdef TARGET_X86_64
517    if (env->hflags & HF_LMA_MASK) {
518        int code64;
519
520        env->regs[R_ECX] = kenv->next_eip;
521        env->regs[11] = env->eflags;
522
523        code64 = env->hflags & HF_CS64_MASK;
524
525        cpu_x86_set_cpl(env, 0);
526        cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
527                               0, 0xffffffff,
528                               DESC_G_MASK | DESC_P_MASK |
529                               DESC_S_MASK |
530                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK);
531        cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
532                               0, 0xffffffff,
533                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
534                               DESC_S_MASK |
535                               DESC_W_MASK | DESC_A_MASK);
536        env->eflags &= ~env->fmask;
537        if (code64)
538            env->eip = env->lstar;
539        else
540            env->eip = env->cstar;
541    } else
542#endif
543    {
544        env->regs[R_ECX] = (uint32_t)kenv->next_eip;
545
546        cpu_x86_set_cpl(env, 0);
547        cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
548                           0, 0xffffffff,
549                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
550                               DESC_S_MASK |
551                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
552        cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
553                               0, 0xffffffff,
554                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
555                               DESC_S_MASK |
556                               DESC_W_MASK | DESC_A_MASK);
557        env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK);
558        env->eip = (uint32_t)env->star;
559    }
560    return 2;
561}
562
563#ifdef CONFIG_PROFILER
564
565#define PC_REC_SIZE 1
566#define PC_REC_HASH_BITS 16
567#define PC_REC_HASH_SIZE (1 << PC_REC_HASH_BITS)
568
569typedef struct PCRecord {
570    unsigned long pc;
571    int64_t count;
572    struct PCRecord *next;
573} PCRecord;
574
575static PCRecord *pc_rec_hash[PC_REC_HASH_SIZE];
576static int nb_pc_records;
577
578static void kqemu_record_pc(unsigned long pc)
579{
580    unsigned long h;
581    PCRecord **pr, *r;
582
583    h = pc / PC_REC_SIZE;
584    h = h ^ (h >> PC_REC_HASH_BITS);
585    h &= (PC_REC_HASH_SIZE - 1);
586    pr = &pc_rec_hash[h];
587    for(;;) {
588        r = *pr;
589        if (r == NULL)
590            break;
591        if (r->pc == pc) {
592            r->count++;
593            return;
594        }
595        pr = &r->next;
596    }
597    r = malloc(sizeof(PCRecord));
598    r->count = 1;
599    r->pc = pc;
600    r->next = NULL;
601    *pr = r;
602    nb_pc_records++;
603}
604
605static int pc_rec_cmp(const void *p1, const void *p2)
606{
607    PCRecord *r1 = *(PCRecord **)p1;
608    PCRecord *r2 = *(PCRecord **)p2;
609    if (r1->count < r2->count)
610        return 1;
611    else if (r1->count == r2->count)
612        return 0;
613    else
614        return -1;
615}
616
617static void kqemu_record_flush(void)
618{
619    PCRecord *r, *r_next;
620    int h;
621
622    for(h = 0; h < PC_REC_HASH_SIZE; h++) {
623        for(r = pc_rec_hash[h]; r != NULL; r = r_next) {
624            r_next = r->next;
625            free(r);
626        }
627        pc_rec_hash[h] = NULL;
628    }
629    nb_pc_records = 0;
630}
631
632void kqemu_record_dump(void)
633{
634    PCRecord **pr, *r;
635    int i, h;
636    FILE *f;
637    int64_t total, sum;
638
639    pr = malloc(sizeof(PCRecord *) * nb_pc_records);
640    i = 0;
641    total = 0;
642    for(h = 0; h < PC_REC_HASH_SIZE; h++) {
643        for(r = pc_rec_hash[h]; r != NULL; r = r->next) {
644            pr[i++] = r;
645            total += r->count;
646        }
647    }
648    qsort(pr, nb_pc_records, sizeof(PCRecord *), pc_rec_cmp);
649
650    f = fopen("/tmp/kqemu.stats", "w");
651    if (!f) {
652        perror("/tmp/kqemu.stats");
653        exit(1);
654    }
655    fprintf(f, "total: %" PRId64 "\n", total);
656    sum = 0;
657    for(i = 0; i < nb_pc_records; i++) {
658        r = pr[i];
659        sum += r->count;
660        fprintf(f, "%08lx: %" PRId64 " %0.2f%% %0.2f%%\n",
661                r->pc,
662                r->count,
663                (double)r->count / (double)total * 100.0,
664                (double)sum / (double)total * 100.0);
665    }
666    fclose(f);
667    free(pr);
668
669    kqemu_record_flush();
670}
671#endif
672
673static inline void kqemu_load_seg(struct kqemu_segment_cache *ksc,
674                                  const SegmentCache *sc)
675{
676    ksc->selector = sc->selector;
677    ksc->flags = sc->flags;
678    ksc->limit = sc->limit;
679    ksc->base = sc->base;
680}
681
682static inline void kqemu_save_seg(SegmentCache *sc,
683                                  const struct kqemu_segment_cache *ksc)
684{
685    sc->selector = ksc->selector;
686    sc->flags = ksc->flags;
687    sc->limit = ksc->limit;
688    sc->base = ksc->base;
689}
690
691int kqemu_cpu_exec(CPUState *env)
692{
693    struct kqemu_cpu_state kcpu_state, *kenv = &kcpu_state;
694    int ret, cpl, i;
695#ifdef CONFIG_PROFILER
696    int64_t ti;
697#endif
698#ifdef _WIN32
699    DWORD temp;
700#endif
701
702#ifdef CONFIG_PROFILER
703    ti = profile_getclock();
704#endif
705    LOG_INT("kqemu: cpu_exec: enter\n");
706    LOG_INT_STATE(env);
707    for(i = 0; i < CPU_NB_REGS; i++)
708        kenv->regs[i] = env->regs[i];
709    kenv->eip = env->eip;
710    kenv->eflags = env->eflags;
711    for(i = 0; i < 6; i++)
712        kqemu_load_seg(&kenv->segs[i], &env->segs[i]);
713    kqemu_load_seg(&kenv->ldt, &env->ldt);
714    kqemu_load_seg(&kenv->tr, &env->tr);
715    kqemu_load_seg(&kenv->gdt, &env->gdt);
716    kqemu_load_seg(&kenv->idt, &env->idt);
717    kenv->cr0 = env->cr[0];
718    kenv->cr2 = env->cr[2];
719    kenv->cr3 = env->cr[3];
720    kenv->cr4 = env->cr[4];
721    kenv->a20_mask = env->a20_mask;
722    kenv->efer = env->efer;
723    kenv->tsc_offset = 0;
724    kenv->star = env->star;
725    kenv->sysenter_cs = env->sysenter_cs;
726    kenv->sysenter_esp = env->sysenter_esp;
727    kenv->sysenter_eip = env->sysenter_eip;
728#ifdef TARGET_X86_64
729    kenv->lstar = env->lstar;
730    kenv->cstar = env->cstar;
731    kenv->fmask = env->fmask;
732    kenv->kernelgsbase = env->kernelgsbase;
733#endif
734    if (env->dr[7] & 0xff) {
735        kenv->dr7 = env->dr[7];
736        kenv->dr0 = env->dr[0];
737        kenv->dr1 = env->dr[1];
738        kenv->dr2 = env->dr[2];
739        kenv->dr3 = env->dr[3];
740    } else {
741        kenv->dr7 = 0;
742    }
743    kenv->dr6 = env->dr[6];
744    cpl = (env->hflags & HF_CPL_MASK);
745    kenv->cpl = cpl;
746    kenv->nb_pages_to_flush = nb_pages_to_flush;
747    kenv->user_only = (env->kqemu_enabled == 1);
748    kenv->nb_ram_pages_to_update = nb_ram_pages_to_update;
749    nb_ram_pages_to_update = 0;
750    kenv->nb_modified_ram_pages = nb_modified_ram_pages;
751
752    kqemu_reset_modified_ram_pages();
753
754    if (env->cpuid_features & CPUID_FXSR)
755        restore_native_fp_fxrstor(env);
756    else
757        restore_native_fp_frstor(env);
758
759#ifdef _WIN32
760    if (DeviceIoControl(kqemu_fd, KQEMU_EXEC,
761                        kenv, sizeof(struct kqemu_cpu_state),
762                        kenv, sizeof(struct kqemu_cpu_state),
763                        &temp, NULL)) {
764        ret = kenv->retval;
765    } else {
766        ret = -1;
767    }
768#else
769    ioctl(kqemu_fd, KQEMU_EXEC, kenv);
770    ret = kenv->retval;
771#endif
772    if (env->cpuid_features & CPUID_FXSR)
773        save_native_fp_fxsave(env);
774    else
775        save_native_fp_fsave(env);
776
777    for(i = 0; i < CPU_NB_REGS; i++)
778        env->regs[i] = kenv->regs[i];
779    env->eip = kenv->eip;
780    env->eflags = kenv->eflags;
781    for(i = 0; i < 6; i++)
782        kqemu_save_seg(&env->segs[i], &kenv->segs[i]);
783    cpu_x86_set_cpl(env, kenv->cpl);
784    kqemu_save_seg(&env->ldt, &kenv->ldt);
785    env->cr[0] = kenv->cr0;
786    env->cr[4] = kenv->cr4;
787    env->cr[3] = kenv->cr3;
788    env->cr[2] = kenv->cr2;
789    env->dr[6] = kenv->dr6;
790#ifdef TARGET_X86_64
791    env->kernelgsbase = kenv->kernelgsbase;
792#endif
793
794    /* flush pages as indicated by kqemu */
795    if (kenv->nb_pages_to_flush >= KQEMU_FLUSH_ALL) {
796        tlb_flush(env, 1);
797    } else {
798        for(i = 0; i < kenv->nb_pages_to_flush; i++) {
799            tlb_flush_page(env, pages_to_flush[i]);
800        }
801    }
802    nb_pages_to_flush = 0;
803
804#ifdef CONFIG_PROFILER
805    kqemu_time += profile_getclock() - ti;
806    kqemu_exec_count++;
807#endif
808
809    if (kenv->nb_ram_pages_to_update > 0) {
810        cpu_tlb_update_dirty(env);
811    }
812
813    if (kenv->nb_modified_ram_pages > 0) {
814        for(i = 0; i < kenv->nb_modified_ram_pages; i++) {
815            unsigned long addr;
816            addr = modified_ram_pages[i];
817            tb_invalidate_phys_page_range(addr, addr + TARGET_PAGE_SIZE, 0);
818        }
819    }
820
821    /* restore the hidden flags */
822    {
823        unsigned int new_hflags;
824#ifdef TARGET_X86_64
825        if ((env->hflags & HF_LMA_MASK) &&
826            (env->segs[R_CS].flags & DESC_L_MASK)) {
827            /* long mode */
828            new_hflags = HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
829        } else
830#endif
831        {
832            /* legacy / compatibility case */
833            new_hflags = (env->segs[R_CS].flags & DESC_B_MASK)
834                >> (DESC_B_SHIFT - HF_CS32_SHIFT);
835            new_hflags |= (env->segs[R_SS].flags & DESC_B_MASK)
836                >> (DESC_B_SHIFT - HF_SS32_SHIFT);
837            if (!(env->cr[0] & CR0_PE_MASK) ||
838                   (env->eflags & VM_MASK) ||
839                   !(env->hflags & HF_CS32_MASK)) {
840                /* XXX: try to avoid this test. The problem comes from the
841                   fact that is real mode or vm86 mode we only modify the
842                   'base' and 'selector' fields of the segment cache to go
843                   faster. A solution may be to force addseg to one in
844                   translate-i386.c. */
845                new_hflags |= HF_ADDSEG_MASK;
846            } else {
847                new_hflags |= ((env->segs[R_DS].base |
848                                env->segs[R_ES].base |
849                                env->segs[R_SS].base) != 0) <<
850                    HF_ADDSEG_SHIFT;
851            }
852        }
853        env->hflags = (env->hflags &
854           ~(HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)) |
855            new_hflags;
856    }
857    /* update FPU flags */
858    env->hflags = (env->hflags & ~(HF_MP_MASK | HF_EM_MASK | HF_TS_MASK)) |
859        ((env->cr[0] << (HF_MP_SHIFT - 1)) & (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK));
860    if (env->cr[4] & CR4_OSFXSR_MASK)
861        env->hflags |= HF_OSFXSR_MASK;
862    else
863        env->hflags &= ~HF_OSFXSR_MASK;
864
865    LOG_INT("kqemu: kqemu_cpu_exec: ret=0x%x\n", ret);
866    if (ret == KQEMU_RET_SYSCALL) {
867        /* syscall instruction */
868        return do_syscall(env, kenv);
869    } else
870    if ((ret & 0xff00) == KQEMU_RET_INT) {
871        env->exception_index = ret & 0xff;
872        env->error_code = 0;
873        env->exception_is_int = 1;
874        env->exception_next_eip = kenv->next_eip;
875#ifdef CONFIG_PROFILER
876        kqemu_ret_int_count++;
877#endif
878        LOG_INT("kqemu: interrupt v=%02x:\n", env->exception_index);
879        LOG_INT_STATE(env);
880        return 1;
881    } else if ((ret & 0xff00) == KQEMU_RET_EXCEPTION) {
882        env->exception_index = ret & 0xff;
883        env->error_code = kenv->error_code;
884        env->exception_is_int = 0;
885        env->exception_next_eip = 0;
886#ifdef CONFIG_PROFILER
887        kqemu_ret_excp_count++;
888#endif
889        LOG_INT("kqemu: exception v=%02x e=%04x:\n",
890                    env->exception_index, env->error_code);
891        LOG_INT_STATE(env);
892        return 1;
893    } else if (ret == KQEMU_RET_INTR) {
894#ifdef CONFIG_PROFILER
895        kqemu_ret_intr_count++;
896#endif
897        LOG_INT_STATE(env);
898        return 0;
899    } else if (ret == KQEMU_RET_SOFTMMU) {
900#ifdef CONFIG_PROFILER
901        {
902            unsigned long pc = env->eip + env->segs[R_CS].base;
903            kqemu_record_pc(pc);
904        }
905#endif
906        LOG_INT_STATE(env);
907        return 2;
908    } else {
909        cpu_dump_state(env, stderr, fprintf, 0);
910        fprintf(stderr, "Unsupported return value: 0x%x\n", ret);
911        exit(1);
912    }
913    return 0;
914}
915
916void kqemu_cpu_interrupt(CPUState *env)
917{
918#if defined(_WIN32)
919    /* cancelling the I/O request causes KQEMU to finish executing the
920       current block and successfully returning. */
921    CancelIo(kqemu_fd);
922#endif
923}
924
925/*
926   QEMU paravirtualization interface. The current interface only
927   allows to modify the IF and IOPL flags when running in
928   kqemu.
929
930   At this point it is not very satisfactory. I leave it for reference
931   as it adds little complexity.
932*/
933
934#define QPI_COMM_PAGE_PHYS_ADDR 0xff000000
935
936static uint32_t qpi_mem_readb(void *opaque, target_phys_addr_t addr)
937{
938    return 0;
939}
940
941static uint32_t qpi_mem_readw(void *opaque, target_phys_addr_t addr)
942{
943    return 0;
944}
945
946static void qpi_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
947{
948}
949
950static void qpi_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
951{
952}
953
954static uint32_t qpi_mem_readl(void *opaque, target_phys_addr_t addr)
955{
956    CPUState *env;
957
958    env = cpu_single_env;
959    if (!env)
960        return 0;
961    return env->eflags & (IF_MASK | IOPL_MASK);
962}
963
964/* Note: after writing to this address, the guest code must make sure
965   it is exiting the current TB. pushf/popf can be used for that
966   purpose. */
967static void qpi_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
968{
969    CPUState *env;
970
971    env = cpu_single_env;
972    if (!env)
973        return;
974    env->eflags = (env->eflags & ~(IF_MASK | IOPL_MASK)) |
975        (val & (IF_MASK | IOPL_MASK));
976}
977
978static CPUReadMemoryFunc *qpi_mem_read[3] = {
979    qpi_mem_readb,
980    qpi_mem_readw,
981    qpi_mem_readl,
982};
983
984static CPUWriteMemoryFunc *qpi_mem_write[3] = {
985    qpi_mem_writeb,
986    qpi_mem_writew,
987    qpi_mem_writel,
988};
989
990static void qpi_init(void)
991{
992    kqemu_comm_base = 0xff000000 | 1;
993    qpi_io_memory = cpu_register_io_memory(
994                                           qpi_mem_read,
995                                           qpi_mem_write, NULL);
996    cpu_register_physical_memory(kqemu_comm_base & ~0xfff,
997                                 0x1000, qpi_io_memory);
998}
999#endif
1000