Signed-off-by: Marcelo Tosatti Index: qemu-kvm/kvm/user/config-x86-common.mak =================================================================== --- qemu-kvm.orig/kvm/user/config-x86-common.mak +++ qemu-kvm/kvm/user/config-x86-common.mak @@ -56,6 +56,9 @@ $(TEST_DIR)/tsc.flat: $(cstart.o) $(TEST $(TEST_DIR)/apic.flat: $(cstart.o) $(TEST_DIR)/apic.o $(TEST_DIR)/vm.o \ $(TEST_DIR)/print.o +$(TEST_DIR)/time.flat: $(cstart.o) $(TEST_DIR)/time.o $(TEST_DIR)/vm.o \ + $(TEST_DIR)/print.o + $(TEST_DIR)/realmode.flat: $(TEST_DIR)/realmode.o $(CC) -m32 -nostdlib -o $@ -Wl,-T,$(TEST_DIR)/realmode.lds $^ Index: qemu-kvm/kvm/user/config-x86_64.mak =================================================================== --- qemu-kvm.orig/kvm/user/config-x86_64.mak +++ qemu-kvm/kvm/user/config-x86_64.mak @@ -7,6 +7,7 @@ CFLAGS += -D__x86_64__ tests = $(TEST_DIR)/access.flat $(TEST_DIR)/irq.flat $(TEST_DIR)/sieve.flat \ $(TEST_DIR)/simple.flat $(TEST_DIR)/stringio.flat \ $(TEST_DIR)/memtest1.flat $(TEST_DIR)/emulator.flat \ - $(TEST_DIR)/hypercall.flat $(TEST_DIR)/apic.flat + $(TEST_DIR)/hypercall.flat $(TEST_DIR)/apic.flat \ + $(TEST_DIR)/time.flat include config-x86-common.mak Index: qemu-kvm/kvm/user/test/x86/io.h =================================================================== --- /dev/null +++ qemu-kvm/kvm/user/test/x86/io.h @@ -0,0 +1,35 @@ +static inline void outb(unsigned char val, unsigned short port) +{ + asm volatile("outb %0, %w1": : "a"(val), "Nd" (port)); +} + +static inline void outw(unsigned short val, unsigned short port) +{ + asm volatile("outw %0, %w1": : "a"(val), "Nd" (port)); +} + +static inline void outl(unsigned long val, unsigned short port) +{ + asm volatile("outl %0, %w1": : "a"(val), "Nd" (port)); +} + +static inline unsigned char inb(unsigned short port) +{ + unsigned char val; + asm volatile("inb %w1, %0": "=a"(val) : "Nd" (port)); + return val; +} + +static inline short inw(unsigned short port) +{ + short val; + asm volatile("inw %w1, %0": "=a"(val) : "Nd" (port)); + return val; +} + +static inline unsigned int inl(unsigned short port) +{ + unsigned int val; + asm volatile("inl %w1, %0": "=a"(val) : "Nd" (port)); + return val; +} Index: qemu-kvm/kvm/user/test/x86/time.c =================================================================== --- /dev/null +++ qemu-kvm/kvm/user/test/x86/time.c @@ -0,0 +1,766 @@ +#include "libcflat.h" +#include "apic.h" +#include "vm.h" +#include "io.h" + +#ifndef NULL +#define NULL ((void*)0) +#endif + +static void *g_apic; +static void *g_ioapic; + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned u32; +typedef unsigned long ulong; +typedef unsigned long long u64; + +typedef u64 ns_t; + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + +typedef struct { + unsigned short offset0; + unsigned short selector; + unsigned short ist : 3; + unsigned short : 5; + unsigned short type : 4; + unsigned short : 1; + unsigned short dpl : 2; + unsigned short p : 1; + unsigned short offset1; +#ifdef __x86_64__ + unsigned offset2; + unsigned reserved; +#endif +} idt_entry_t; + +typedef struct { + ulong rflags; + ulong cs; + ulong rip; + ulong func; + ulong regs[sizeof(ulong)*2]; +} isr_regs_t; + +#ifdef __x86_64__ +# define R "r" +#else +# define R "e" +#endif + +extern char isr_entry_point[]; + +asm ( + "isr_entry_point: \n" +#ifdef __x86_64__ + "push %r15 \n\t" + "push %r14 \n\t" + "push %r13 \n\t" + "push %r12 \n\t" + "push %r11 \n\t" + "push %r10 \n\t" + "push %r9 \n\t" + "push %r8 \n\t" +#endif + "push %"R "di \n\t" + "push %"R "si \n\t" + "push %"R "bp \n\t" + "push %"R "sp \n\t" + "push %"R "bx \n\t" + "push %"R "dx \n\t" + "push %"R "cx \n\t" + "push %"R "ax \n\t" +#ifdef __x86_64__ + "mov %rsp, %rdi \n\t" + "callq *8*16(%rsp) \n\t" +#else + "push %esp \n\t" + "calll *4+4*8(%esp) \n\t" + "add $4, %esp \n\t" +#endif + "pop %"R "ax \n\t" + "pop %"R "cx \n\t" + "pop %"R "dx \n\t" + "pop %"R "bx \n\t" + "pop %"R "bp \n\t" + "pop %"R "bp \n\t" + "pop %"R "si \n\t" + "pop %"R "di \n\t" +#ifdef __x86_64__ + "pop %r8 \n\t" + "pop %r9 \n\t" + "pop %r10 \n\t" + "pop %r11 \n\t" + "pop %r12 \n\t" + "pop %r13 \n\t" + "pop %r14 \n\t" + "pop %r15 \n\t" +#endif +#ifdef __x86_64__ + "add $8, %rsp \n\t" + "iretq \n\t" +#else + "add $4, %esp \n\t" + "iretl \n\t" +#endif + ); + +static idt_entry_t idt[256]; + +static int g_fail; +static int g_tests; + +static void report(const char *msg, int pass) +{ + ++g_tests; + printf("%s: %s\n", msg, (pass ? "PASS" : "FAIL")); + if (!pass) + ++g_fail; +} + +static u32 apic_read(unsigned reg) +{ + return *(volatile u32 *)(g_apic + reg); +} + +static void apic_write(unsigned reg, u32 val) +{ + *(volatile u32 *)(g_apic + reg) = val; +} + +static void test_lapic_existence(void) +{ + u32 lvr; + + lvr = apic_read(APIC_LVR); + printf("apic version: %x\n", lvr); + report("apic existence", (u16)lvr == 0x14); +} + +static u16 read_cs(void) +{ + u16 v; + + asm("mov %%cs, %0" : "=rm"(v)); + return v; +} + +static void init_idt(void) +{ + struct { + u16 limit; + ulong idt; + } __attribute__((packed)) idt_ptr = { + sizeof(idt_entry_t) * 256 - 1, + (ulong)&idt, + }; + + asm volatile("lidt %0" : : "m"(idt_ptr)); +} + +static void set_idt_entry(unsigned vec, void (*func)(isr_regs_t *regs)) +{ + u8 *thunk = vmalloc(50); + ulong ptr = (ulong)thunk; + idt_entry_t ent = { + .offset0 = ptr, + .selector = read_cs(), + .ist = 0, + .type = 14, + .dpl = 0, + .p = 1, + .offset1 = ptr >> 16, +#ifdef __x86_64__ + .offset2 = ptr >> 32, +#endif + }; +#ifdef __x86_64__ + /* sub $8, %rsp */ + *thunk++ = 0x48; *thunk++ = 0x83; *thunk++ = 0xec; *thunk++ = 0x08; + /* mov $func_low, %(rsp) */ + *thunk++ = 0xc7; *thunk++ = 0x04; *thunk++ = 0x24; + *(u32 *)thunk = (ulong)func; thunk += 4; + /* mov $func_high, %(rsp+4) */ + *thunk++ = 0xc7; *thunk++ = 0x44; *thunk++ = 0x24; *thunk++ = 0x04; + *(u32 *)thunk = (ulong)func >> 32; thunk += 4; + /* jmp isr_entry_point */ + *thunk ++ = 0xe9; + *(u32 *)thunk = (ulong)isr_entry_point - (ulong)(thunk + 4); +#else + /* push $func */ + *thunk++ = 0x68; + *(u32 *)thunk = (ulong)func; + /* jmp isr_entry_point */ + *thunk ++ = 0xe9; + *(u32 *)thunk = (ulong)isr_entry_point - (ulong)(thunk + 4); +#endif + idt[vec] = ent; +} + +static void irq_disable(void) +{ + asm volatile("cli"); +} + +static void irq_enable(void) +{ + asm volatile("sti"); +} + +static void eoi(void) +{ + apic_write(APIC_EOI, 0); +} + +static int ipi_count; + +static void self_ipi_isr(isr_regs_t *regs) +{ + ++ipi_count; + eoi(); +} + +static void test_self_ipi(void) +{ + int vec = 0xf1; + + set_idt_entry(vec, self_ipi_isr); + irq_enable(); + apic_write(APIC_ICR, + APIC_DEST_SELF | APIC_DEST_PHYSICAL | APIC_DM_FIXED | vec); + asm volatile ("nop"); + report("self ipi", ipi_count == 1); +} + +static void ioapic_write_reg(unsigned reg, u32 value) +{ + *(volatile u32 *)g_ioapic = reg; + *(volatile u32 *)(g_ioapic + 0x10) = value; +} + +typedef struct { + u8 vector; + u8 delivery_mode:3; + u8 dest_mode:1; + u8 delivery_status:1; + u8 polarity:1; + u8 remote_irr:1; + u8 trig_mode:1; + u8 mask:1; + u8 reserve:7; + u8 reserved[4]; + u8 dest_id; +} ioapic_redir_entry_t; + +static void ioapic_write_redir(unsigned line, ioapic_redir_entry_t e) +{ + ioapic_write_reg(0x10 + line * 2 + 0, ((u32 *)&e)[0]); + ioapic_write_reg(0x10 + line * 2 + 1, ((u32 *)&e)[1]); +} + +static void set_ioapic_redir(unsigned line, unsigned vec) +{ + ioapic_redir_entry_t e = { + .vector = vec, + .delivery_mode = 0, + .trig_mode = 0, + }; + + ioapic_write_redir(line, e); +} + +static void enable_apic(void) +{ + apic_write(0xf0, 0x1ff); /* spurious vector register */ +} + +/* interrupt handlers */ + +struct int_table { + void (*func)(isr_regs_t *regs); + void (*irq_handler)(void *irq_priv); + void *irq_priv; +}; + +static struct int_table int_handlers[]; + +#define decl_irq_handler(N) \ +static void timer_int_##N(isr_regs_t *regs) { \ + struct int_table *t = &int_handlers[N]; \ + t->irq_handler(t->irq_priv); \ + eoi(); \ +} + +void set_irq_handler(int vec, void (*func)(void *irq_priv), void *irq_priv); + +void hlt(void) { asm volatile("hlt"); } + +#define NS_FREQ 1000000000ULL + +#define ns2cyc(ns) (ns*cpu_hz)/NS_FREQ +#define cyc2ns(cyc) (cyc*NS_FREQ)/cpu_hz + +#define us_to_ns(n) (1000*n) +#define ms_to_ns(n) (1000000*n) +#define s_to_ns(n) (1000000000*n) + +#define ns_to_ms(x) (x / 1000000) + +#define udelay(n) nsdelay(us_to_ns(n)) +#define mdelay(n) nsdelay(ms_to_ns(n)) +#define sdelay(n) nsdelay(s_to_ns(n)) + +u64 cpu_hz; + +u64 rdtsc(void) +{ + unsigned a, d; + + asm volatile("rdtsc" : "=a"(a), "=d"(d)); + return a | (u64)d << 32; +} + +void wrtsc(u64 tsc) +{ + unsigned a = tsc, d = tsc >> 32; + + asm volatile("wrmsr" : : "a"(a), "d"(d), "c"(0x10)); +} + +void nsdelay(u64 ns) +{ + u64 entry = cyc2ns(rdtsc()); + + do { + __asm__ volatile ("nop"); + } while (cyc2ns(rdtsc()) - entry < ns); +} + +struct clocksource { + char *name; + int (*init) (void); + u64 (*read) (void); + u64 freq; +}; + +/* return count in nanoseconds */ +u64 clocksource_read(struct clocksource *clock) +{ + u64 val = clock->read(); + + val = (val * NS_FREQ) / clock->freq; + return val; +} + +enum clockevt_type { CLOCKEVT_PERIODIC, CLOCKEVT_ONESHOT, }; + +struct clockevent { + char *name; + u64 (*init) (int vec); + int (*arm) (u64 count, enum clockevt_type); + void (*cancel)(void); + u64 (*remain) (void); + + u64 freq; + unsigned vec; +}; + +void clock_arm(struct clockevent *clockevt, enum clockevt_type type, + u64 period) +{ + u64 count = (period * clockevt->freq) / NS_FREQ; + + clockevt->arm(count, type); +} + +/* -------- TSC clocksource ------------- */ + +int tsc_init(void) { printf("%s\n", __func__); return 0; } +u64 tsc_read(void) { return rdtsc(); } + +struct clocksource tsc = { + .name = "tsc", + .init = tsc_init, + .read = tsc_read, +}; + +/* --------- ACPI clocksource ----------- */ + +#define ACPI_PORT 0xb008 +#define ACPI_FREQ 3579545 + +int acpi_init(void) { printf("%s\n", __func__); return 0; } +u64 acpi_read(void) { return inl(ACPI_PORT); } + +struct clocksource acpi = { + .name = "acpi", + .init = acpi_init, + .read = acpi_read, + .freq = ACPI_FREQ, +}; + +//struct clocksource *clocksources[] = { &tsc, &acpi, }; +struct clocksource *clocksources[] = { &tsc }; + +/* --------- LAPIC clockevent ---------- */ + +#define aprint(reg) printf("%s = %x\n", #reg , apic_read(reg)) + +static void dummy(void *irq_priv) +{ +} + +u64 lapic_timer_init(int vec) +{ + u64 hz; + + set_irq_handler(vec, dummy, NULL); + apic_write(APIC_LVTT, vec); + apic_write(APIC_TDCR, 0xB); /* divide by 1 */ + apic_write(APIC_TMICT, 0xffffffff); + sdelay(1); + hz = 0xffffffff - apic_read(APIC_TMCCT); + printf("%s: detected %d Hz timer\n", __func__, hz); + return hz; +} + +int lapic_timer_arm(u64 period, enum clockevt_type type) +{ + if (type == CLOCKEVT_PERIODIC) + apic_write(APIC_LVTT, apic_read(APIC_LVTT) | 1 << 17); + /* divide count */ + apic_write(APIC_TDCR, 0xB); + /* initial count */ + apic_write(APIC_TMICT, period); + return 0; +} + +void lapic_timer_cancel(void) +{ + apic_write(APIC_LVTT, apic_read(APIC_LVTT) & ~(1 << 17)); /* one-shot */ + apic_write(APIC_TMICT, 0); +} + +u64 lapic_timer_remain(void) +{ + return apic_read(APIC_TMCCT); +} + +struct clockevent lapic_timer = { + .name = "lapic", + .init = lapic_timer_init, + .arm = lapic_timer_arm, + .cancel = lapic_timer_cancel, + .remain = lapic_timer_remain, +}; + +/* ---------- PIT clockevent --------- */ + +#define PIT_FREQ 1193181 +#define PIT_CNT_0 0x40 +#define PIT_CNT_1 0x41 +#define PIT_CNT_2 0x42 +#define PIT_TCW 0x43 + +u64 pit_timer_remain(void) +{ + outb(0xf0, PIT_TCW); + return inb(PIT_CNT_0) | inb(PIT_CNT_0) << 8; +} + +u64 pit_timer_init(int vec) +{ + set_ioapic_redir(0, vec); + /* mask LINT0, int is coming through IO-APIC */ + apic_write(APIC_LVT0, 1 << 16); + return PIT_FREQ; +} + +int pit_timer_arm(u64 period, enum clockevt_type type) +{ + unsigned char ctrl_word = 0x30; + + if (type == CLOCKEVT_PERIODIC) + ctrl_word |= 0x4; + outb(ctrl_word, PIT_TCW); + outb(period & 0xff, PIT_CNT_0); + outb((period & 0xff00) >> 8, PIT_CNT_0); + return 0; +} + +void pit_timer_cancel(void) +{ + unsigned char ctrl_word = 0x30; + outb(ctrl_word, PIT_TCW); + outb(0, PIT_CNT_0); + outb(0, PIT_CNT_0); +} + +struct clockevent pit_timer = { + .name = "pit", + .init = pit_timer_init, + .arm = pit_timer_arm, + .cancel = pit_timer_cancel, + .remain = pit_timer_remain, +}; + + +#define NR_CLOCKEVENTS 2 + +/* clockevent initialization */ +struct clockevent *clockevents[NR_CLOCKEVENTS] = { + &pit_timer, &lapic_timer, +}; + +decl_irq_handler(0) +decl_irq_handler(1) + +static struct int_table int_handlers[NR_CLOCKEVENTS] = { + { .func = timer_int_0 }, { .func = timer_int_1 }, +}; + +#define TIMER_VEC_BASE 0x90 + +void set_irq_handler(int vec, void (*func)(void *irq_priv), void *irq_priv) +{ + int int_table_idx = vec - TIMER_VEC_BASE; + + if (int_table_idx >= NR_CLOCKEVENTS) + printf("%s invalid vec\n", __func__); + + int_handlers[int_table_idx].irq_handler = func; + int_handlers[int_table_idx].irq_priv = irq_priv; +} + +void init_interrupts(void) +{ + int i; + + for (i = 0; i < NR_CLOCKEVENTS; i++) { + int vec = TIMER_VEC_BASE+i; + + set_idt_entry(vec, int_handlers[i].func); + } +} + +int init_clockevents(void) +{ + int i; + int vec = TIMER_VEC_BASE; + + for (i=0; i < ARRAY_SIZE(clockevents); i++) { + u64 freq = clockevents[i]->init(vec); + clockevents[i]->freq = freq; + clockevents[i]->vec = vec; + vec++; + } + return 0; +} + +/* + * + * TODO: + * 1. test every divisor possible + * 3. monotonicity of clocksources + * 4. tests to mimic Linux calibration sites + * 5. SMP + * + */ + +/* actual tests */ + +#define TIME_TABLE_SZ 100 +struct time_table { + u64 val[TIME_TABLE_SZ]; + int idx; + struct clocksource *source; + u64 period; +}; + +void time_table_record(struct time_table *t) +{ + t->val[t->idx] = clocksource_read(t->source); + t->idx++; + if (t->idx >= TIME_TABLE_SZ) + t->idx = 0; +} + +void inspect_table(struct time_table *t) +{ + int i; + int percent_avg = 0; + + for (i = 1; i < t->idx; i++) { + u64 fire_period = t->val[i] - t->val[i-1]; + /* FIXME: handle clock wraparound */ + if (t->val[i] < t->val[i-1]) + break; + percent_avg += (fire_period*100) / t->period; + } + + percent_avg /= t->idx-1; + printf(" %d ms percent_off_avg = %d\n", ns_to_ms(t->period), percent_avg); + +} + +static void timer_int_record(void *irq_priv) +{ + time_table_record(irq_priv); +} + +void test_periodic_one_clock(struct clockevent *clockevt, + struct clocksource *source, u64 period) +{ + int i; + struct time_table *t = vmalloc(sizeof(struct time_table)); + + t->idx = 0; + t->period = period; + t->source = source; + + clockevt->cancel(); + set_irq_handler(clockevt->vec, timer_int_record, t); + + clock_arm(clockevt, CLOCKEVT_PERIODIC, period); + + for (i=0;i<50;i++) + hlt(); + + clockevt->cancel(); + inspect_table(t); + vfree(t); +} + +static int periodic_freqs[] = { 1, 2, 10, 15, 20, 50, 100, 200 }; + +void test_periodic_events(void) +{ + int i, n, x; + + + for (x = 0; x < ARRAY_SIZE(clocksources); x++) { + struct clocksource *clocksource = clocksources[x]; + + for (i = 0; i < ARRAY_SIZE(clockevents); i++) { + struct clockevent *clockevt = clockevents[i]; + + printf("clockevent = %s clocksource = %s\n", clockevt->name, + clocksource->name); + + for (n = 0; n < ARRAY_SIZE(periodic_freqs); n++) + test_periodic_one_clock(clockevt, clocksource, + ms_to_ns(periodic_freqs[n])); + } + } +} + +static void int_handler_reinject(void *irq_priv) +{ + int *ints = (int *)irq_priv; + *ints += 1; +} + +void test_reinjection(void) +{ + int i; + u64 period = ms_to_ns(1); + + for (i = 0; i < ARRAY_SIZE(clockevents); i++) { + struct clockevent *clockevt = clockevents[i]; + int ints = 0; + + printf("clockevent = %s\n", clockevt->name); + + clockevt->cancel(); + set_irq_handler(clockevt->vec, int_handler_reinject, &ints); + + clock_arm(clockevt, CLOCKEVT_PERIODIC, period); + irq_disable(); + mdelay(100); + irq_enable(); + printf("irqoff delay=100 ints=%d\n", ints); + + clockevt->cancel(); + } +} + +/* early calibration with PIT to detect TSC frequency, which is necessary + * to find lapic frequency. + */ +int timer_isr; +static void timer_int_handler(void *irq_priv) +{ + timer_isr++; +} + +void early_calibrate_cpu_hz(void) +{ + u64 t1, t2; + int ints_per_sec = (PIT_FREQ/0xffff)+1; + + timer_isr = 0; + t1 = rdtsc(); + do { + pit_timer.arm(0xffff, CLOCKEVT_ONESHOT); + __asm__ volatile ("hlt"); + } while (timer_isr < ints_per_sec); + t2 = rdtsc(); + cpu_hz = t2 - t1; + printf("detected %lld MHz cpu\n", cpu_hz); + tsc.freq = cpu_hz; +} + +void early_calibrate(void) +{ + pit_timer.init(TIMER_VEC_BASE); + set_irq_handler(TIMER_VEC_BASE, timer_int_handler, NULL); + early_calibrate_cpu_hz(); +} + +int main() +{ + setup_vm(); + init_interrupts(); + + g_apic = vmap(0xfee00000, 0x1000); + g_ioapic = vmap(0xfec00000, 0x1000); + + test_lapic_existence(); + + enable_apic(); + init_idt(); + + test_self_ipi(); + + early_calibrate(); + init_clockevents(); + + test_reinjection(); + test_periodic_events(); + + return g_fail != 0; +} + + +#if 0 +void calibrate(struct clocksource *csource, struct clockevent *cevent) +{ + u64 t1, t2; + int ints_per_sec = (cevent->freq / c + + csource->wait(); + t1 = clocksource_read(csource); + do { + cevent->arm(count, CLOCKEVT_ONESHOT); + __asm__ volatile ("hlt"); + } while (timer_isr < ints_per_sec); + t2 = clocksource_read(csource); + + printf(""); +} +#endif + --