This not only eases using the cached value in assembly code, but also improves the generated code resulting from such reads in C. Signed-off-by: Jan Beulich --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -64,7 +64,6 @@ #include DEFINE_PER_CPU(struct vcpu *, curr_vcpu); -DEFINE_PER_CPU(unsigned long, cr4); static void default_idle(void); void (*pm_idle) (void) __read_mostly = default_idle; --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -645,7 +645,7 @@ void __init noreturn __start_xen(unsigne parse_video_info(); rdmsrl(MSR_EFER, this_cpu(efer)); - asm volatile ( "mov %%cr4,%0" : "=r" (this_cpu(cr4)) ); + asm volatile ( "mov %%cr4,%0" : "=r" (get_cpu_info()->cr4) ); /* We initialise the serial devices very early so we can get debugging. */ ns16550.io_base = 0x3f8; --- a/xen/include/asm-x86/current.h +++ b/xen/include/asm-x86/current.h @@ -41,8 +41,8 @@ struct cpu_info { unsigned int processor_id; struct vcpu *current_vcpu; unsigned long per_cpu_offset; + unsigned long cr4; /* get_stack_bottom() must be 16-byte aligned */ - unsigned long __pad_for_stack_bottom; }; static inline struct cpu_info *get_cpu_info(void) --- a/xen/include/asm-x86/processor.h +++ b/xen/include/asm-x86/processor.h @@ -328,8 +328,6 @@ static inline unsigned long read_cr2(voi return cr2; } -DECLARE_PER_CPU(unsigned long, cr4); - static inline void raw_write_cr4(unsigned long val) { asm volatile ( "mov %0,%%cr4" : : "r" (val) ); @@ -337,12 +335,12 @@ static inline void raw_write_cr4(unsigne static inline unsigned long read_cr4(void) { - return this_cpu(cr4); + return get_cpu_info()->cr4; } static inline void write_cr4(unsigned long val) { - this_cpu(cr4) = val; + get_cpu_info()->cr4 = val; raw_write_cr4(val); }