]> Pileus Git - ~andy/linux/commitdiff
x86: Introduce x86_cpuinit.early_percpu_clock_init hook
authorIgor Mammedov <imammedo@redhat.com>
Tue, 7 Feb 2012 14:52:44 +0000 (15:52 +0100)
committerAvi Kivity <avi@redhat.com>
Mon, 5 Mar 2012 12:57:32 +0000 (14:57 +0200)
When kvm guest uses kvmclock, it may hang on vcpu hot-plug.
This is caused by an overflow in pvclock_get_nsec_offset,

    u64 delta = tsc - shadow->tsc_timestamp;

which in turn is caused by an undefined values from percpu
hv_clock that hasn't been initialized yet.
Uninitialized clock on being booted cpu is accessed from
   start_secondary
    -> smp_callin
      ->  smp_store_cpu_info
        -> identify_secondary_cpu
          -> mtrr_ap_init
            -> mtrr_restore
              -> stop_machine_from_inactive_cpu
                -> queue_stop_cpus_work
                  ...
                    -> sched_clock
                      -> kvm_clock_read
which is well before x86_cpuinit.setup_percpu_clockev call in
start_secondary, where percpu clock is initialized.

This patch introduces a hook that allows to setup/initialize
per_cpu clock early and avoid overflow due to reading
  - undefined values
  - old values if cpu was offlined and then onlined again

Another possible early user of this clock source is ftrace that
accesses it to get timestamps for ring buffer entries. So if
mtrr_ap_init is moved from identify_secondary_cpu to past
x86_cpuinit.setup_percpu_clockev in start_secondary, ftrace
may cause the same overflow/hang on cpu hot-plug anyway.

More complete description of the problem:
  https://lkml.org/lkml/2012/2/2/101

Credits to Marcelo Tosatti <mtosatti@redhat.com> for hook idea.

Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
arch/x86/include/asm/x86_init.h
arch/x86/kernel/kvmclock.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/x86_init.c

index 517d4767ffdda95e249cc5c173eb008a439cd346..5d0afac2962cf0510207b62ed60c5428619429a4 100644 (file)
@@ -145,9 +145,11 @@ struct x86_init_ops {
 /**
  * struct x86_cpuinit_ops - platform specific cpu hotplug setups
  * @setup_percpu_clockev:      set up the per cpu clock event device
+ * @early_percpu_clock_init:   early init of the per cpu clock event device
  */
 struct x86_cpuinit_ops {
        void (*setup_percpu_clockev)(void);
+       void (*early_percpu_clock_init)(void);
        void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node);
 };
 
index 44842d756b29fa9b2705fb8c57189fe5d28b5732..ca4e735adc5472f6b863d7fbd3f84b3e5b184144 100644 (file)
@@ -144,8 +144,6 @@ static void __cpuinit kvm_setup_secondary_clock(void)
         * we shouldn't fail.
         */
        WARN_ON(kvm_register_clock("secondary cpu clock"));
-       /* ok, done with our trickery, call native */
-       setup_secondary_APIC_clock();
 }
 #endif
 
@@ -194,7 +192,7 @@ void __init kvmclock_init(void)
        x86_platform.get_wallclock = kvm_get_wallclock;
        x86_platform.set_wallclock = kvm_set_wallclock;
 #ifdef CONFIG_X86_LOCAL_APIC
-       x86_cpuinit.setup_percpu_clockev =
+       x86_cpuinit.early_percpu_clock_init =
                kvm_setup_secondary_clock;
 #endif
        machine_ops.shutdown  = kvm_shutdown;
index 66d250c00d115bbaae4c7ab0917ce9c0dfe89643..a05d6fd5e06d5167f0afb9b06aad11dd9c145ba0 100644 (file)
@@ -255,6 +255,7 @@ notrace static void __cpuinit start_secondary(void *unused)
         * most necessary things.
         */
        cpu_init();
+       x86_cpuinit.early_percpu_clock_init();
        preempt_disable();
        smp_callin();
 
index 947a06ccc67305d014dd558a68be75f0f8a34c11..6f2ec53deed0b9a477bcf9bdd630b025e227c009 100644 (file)
@@ -91,6 +91,7 @@ struct x86_init_ops x86_init __initdata = {
 };
 
 struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = {
+       .early_percpu_clock_init        = x86_init_noop,
        .setup_percpu_clockev           = setup_secondary_APIC_clock,
        .fixup_cpu_id                   = x86_default_fixup_cpu_id,
 };