Merge branch 'cpus4096-for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel...

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 2 Jan 2009 19:44:09 +0000 (11:44 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 2 Jan 2009 19:44:09 +0000 (11:44 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 2 Jan 2009 19:44:09 +0000 (11:44 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 2 Jan 2009 19:44:09 +0000 (11:44 -0800)
diff --combined arch/arm/kernel/smp.c

index 019237d21622ba2cde3669d4915dca02a590ea26,bd905c0a73651f85d54fffb25146b50d7d267368..55fa7ff96a3e7aaf654d30c64677b1a4d666ee4b
--- 1/arch/arm/kernel/smp.c
--- 2/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@@ -33,16 -33,6 +33,6 @@@
   #include <asm/tlbflush.h>
   #include <asm/ptrace.h>
   
- /*
-  * bitmask of present and online CPUs.
-  * The present bitmask indicates that the CPU is physically present.
-  * The online bitmask indicates that the CPU is up and running.
-  */
- cpumask_t cpu_possible_map;
- EXPORT_SYMBOL(cpu_possible_map);
- cpumask_t cpu_online_map;
- EXPORT_SYMBOL(cpu_online_map);
- 
   /*
    * as from 2.5, kernels no longer have an init_tasks structure
    * so we need some other way of telling a new secondary core
@@@ -181,7 -171,7 +171,7 @@@ int __cpuexit __cpu_disable(void
         /*
          * Stop the local timer for this CPU.
          */
- -      local_timer_stop(cpu);
+ +      local_timer_stop();
   
         /*
          * Flush user cache and TLB mappings, and then remove this CPU
@@@ -284,7 -274,7 +274,7 @@@ asmlinkage void __cpuinit secondary_sta
         /*
          * Setup local timer for this CPU.
          */
- -      local_timer_setup(cpu);
+ +      local_timer_setup();
   
         calibrate_delay();
   
diff --combined arch/arm/mach-at91/at91rm9200_time.c

index d140eae53ded281bcc9f33fc6b991b1086c5fd55,72f51d39202c7a22d49a393657255d283b339889..1ff1bda0a894a4ce313ecb0896eb1011bd4a2413
--- 1/arch/arm/mach-at91/at91rm9200_time.c
--- 2/arch/arm/mach-at91/at91rm9200_time.c
+++ b/arch/arm/mach-at91/at91rm9200_time.c
@@@ -141,15 -141,6 +141,15 @@@ clkevt32k_next_event(unsigned long delt
         /* Use "raw" primitives so we behave correctly on RT kernels. */
         raw_local_irq_save(flags);
   
+ +      /*
+ +       * According to Thomas Gleixner irqs are already disabled here.  Simply
+ +       * removing raw_local_irq_save above (and the matching
+ +       * raw_local_irq_restore) was not accepted.  See
+ +       * http://thread.gmane.org/gmane.linux.ports.arm.kernel/41174
+ +       * So for now (2008-11-20) just warn once if irqs were not disabled ...
+ +       */
+ +      WARN_ON_ONCE(!raw_irqs_disabled_flags(flags));
+ +
         /* The alarm IRQ uses absolute time (now+delta), not the relative
          * time (delta) in our calling convention.  Like all clockevents
          * using such "match" hardware, we have a race to defend against.
@@@ -178,7 -169,6 +178,6 @@@ static struct clock_event_device clkev
         .features       = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
         .shift          = 32,
         .rating         = 150,
-       .cpumask        = CPU_MASK_CPU0,
         .set_next_event = clkevt32k_next_event,
         .set_mode       = clkevt32k_mode,
   };
@@@ -206,7 -196,7 +205,7 @@@ void __init at91rm9200_timer_init(void
         clkevt.mult = div_sc(AT91_SLOW_CLOCK, NSEC_PER_SEC, clkevt.shift);
         clkevt.max_delta_ns = clockevent_delta2ns(AT91_ST_ALMV, &clkevt);
         clkevt.min_delta_ns = clockevent_delta2ns(2, &clkevt) + 1;
-       clkevt.cpumask = cpumask_of_cpu(0);
+       clkevt.cpumask = cpumask_of(0);
         clockevents_register_device(&clkevt);
   
         /* register clocksource */
diff --combined arch/arm/mach-pxa/time.c

index 0016241585190e3770de17ac6e02b0db57547a2f,bf3c9a4aad509fc8fde9a6858f38e65e14bf60b9..95656a72268dd9f1a1ee7177c4ca5286d9d0a570
--- 1/arch/arm/mach-pxa/time.c
--- 2/arch/arm/mach-pxa/time.c
+++ b/arch/arm/mach-pxa/time.c
@@@ -22,8 -22,8 +22,8 @@@
   #include <asm/div64.h>
   #include <asm/mach/irq.h>
   #include <asm/mach/time.h>
+ +#include <mach/hardware.h>
   #include <mach/pxa-regs.h>
- -#include <asm/mach-types.h>
   
   /*
    * This is PXA's sched_clock implementation. This has a resolution
@@@ -122,7 -122,6 +122,6 @@@ static struct clock_event_device ckevt_
         .features       = CLOCK_EVT_FEAT_ONESHOT,
         .shift          = 32,
         .rating         = 200,
-       .cpumask        = CPU_MASK_CPU0,
         .set_next_event = pxa_osmr0_set_next_event,
         .set_mode       = pxa_osmr0_set_mode,
   };
@@@ -150,11 -149,18 +149,11 @@@ static struct irqaction pxa_ost0_irq = 
   
   static void __init pxa_timer_init(void)
   {
- -      unsigned long clock_tick_rate;
+ +      unsigned long clock_tick_rate = get_clock_tick_rate();
   
         OIER = 0;
         OSSR = OSSR_M0 | OSSR_M1 | OSSR_M2 | OSSR_M3;
   
- -      if (cpu_is_pxa25x())
- -              clock_tick_rate = 3686400;
- -      else if (machine_is_mainstone())
- -              clock_tick_rate = 3249600;
- -      else
- -              clock_tick_rate = 3250000;
- -
         set_oscr2ns_scale(clock_tick_rate);
   
         ckevt_pxa_osmr0.mult =
@@@ -163,6 -169,7 +162,7 @@@
                 clockevent_delta2ns(0x7fffffff, &ckevt_pxa_osmr0);
         ckevt_pxa_osmr0.min_delta_ns =
                 clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_pxa_osmr0) + 1;
+       ckevt_pxa_osmr0.cpumask = cpumask_of(0);
   
         cksrc_pxa_oscr0.mult =
                 clocksource_hz2mult(clock_tick_rate, cksrc_pxa_oscr0.shift);
diff --combined arch/arm/mach-realview/core.c

index 5f1d55963cedb8e7b4d3cbd0b6e71a24c84b10c9,b07cb9b7adb15d5f4a6580ecc9315a401184ae25..bd2aa4f16141d72895e655f63efe75cf6ffcd298
--- 1/arch/arm/mach-realview/core.c
--- 2/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@@ -28,14 -28,11 +28,14 @@@
   #include <linux/clocksource.h>
   #include <linux/clockchips.h>
   #include <linux/io.h>
+ +#include <linux/smc911x.h>
   
+ +#include <asm/clkdev.h>
   #include <asm/system.h>
   #include <mach/hardware.h>
   #include <asm/irq.h>
   #include <asm/leds.h>
+ +#include <asm/mach-types.h>
   #include <asm/hardware/arm_timer.h>
   #include <asm/hardware/icst307.h>
   
@@@ -52,7 -49,7 +52,7 @@@
   
   #define REALVIEW_REFCOUNTER   (__io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_24MHz_OFFSET)
   
- -/* used by entry-macro.S */
+ +/* used by entry-macro.S and platsmp.c */
   void __iomem *gic_cpu_base_addr;
   
   /*
@@@ -127,29 -124,6 +127,29 @@@ int realview_flash_register(struct reso
         return platform_device_register(&realview_flash_device);
   }
   
+ +static struct smc911x_platdata realview_smc911x_platdata = {
+ +      .flags          = SMC911X_USE_32BIT,
+ +      .irq_flags      = IRQF_SHARED,
+ +      .irq_polarity   = 1,
+ +};
+ +
+ +static struct platform_device realview_eth_device = {
+ +      .name           = "smc911x",
+ +      .id             = 0,
+ +      .num_resources  = 2,
+ +};
+ +
+ +int realview_eth_register(const char *name, struct resource *res)
+ +{
+ +      if (name)
+ +              realview_eth_device.name = name;
+ +      realview_eth_device.resource = res;
+ +      if (strcmp(realview_eth_device.name, "smc911x") == 0)
+ +              realview_eth_device.dev.platform_data = &realview_smc911x_platdata;
+ +
+ +      return platform_device_register(&realview_eth_device);
+ +}
+ +
   static struct resource realview_i2c_resource = {
         .start          = REALVIEW_I2C_BASE,
         .end            = REALVIEW_I2C_BASE + SZ_4K - 1,
@@@ -203,14 -177,9 +203,14 @@@ static const struct icst307_params real
   static void realview_oscvco_set(struct clk *clk, struct icst307_vco vco)
   {
         void __iomem *sys_lock = __io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_LOCK_OFFSET;
- -      void __iomem *sys_osc = __io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_OSC4_OFFSET;
+ +      void __iomem *sys_osc;
         u32 val;
   
+ +      if (machine_is_realview_pb1176())
+ +              sys_osc = __io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_OSC0_OFFSET;
+ +      else
+ +              sys_osc = __io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_OSC4_OFFSET;
+ +
         val = readl(sys_osc) & ~0x7ffff;
         val |= vco.v | (vco.r << 9) | (vco.s << 16);
   
@@@ -219,59 -188,12 +219,59 @@@
         writel(0, sys_lock);
   }
   
- -struct clk realview_clcd_clk = {
- -      .name   = "CLCDCLK",
+ +static struct clk oscvco_clk = {
         .params = &realview_oscvco_params,
         .setvco = realview_oscvco_set,
   };
   
+ +/*
+ + * These are fixed clocks.
+ + */
+ +static struct clk ref24_clk = {
+ +      .rate   = 24000000,
+ +};
+ +
+ +static struct clk_lookup lookups[] = {
+ +      {       /* UART0 */
+ +              .dev_id         = "dev:f1",
+ +              .clk            = &ref24_clk,
+ +      }, {    /* UART1 */
+ +              .dev_id         = "dev:f2",
+ +              .clk            = &ref24_clk,
+ +      }, {    /* UART2 */
+ +              .dev_id         = "dev:f3",
+ +              .clk            = &ref24_clk,
+ +      }, {    /* UART3 */
+ +              .dev_id         = "fpga:09",
+ +              .clk            = &ref24_clk,
+ +      }, {    /* KMI0 */
+ +              .dev_id         = "fpga:06",
+ +              .clk            = &ref24_clk,
+ +      }, {    /* KMI1 */
+ +              .dev_id         = "fpga:07",
+ +              .clk            = &ref24_clk,
+ +      }, {    /* MMC0 */
+ +              .dev_id         = "fpga:05",
+ +              .clk            = &ref24_clk,
+ +      }, {    /* EB:CLCD */
+ +              .dev_id         = "dev:20",
+ +              .clk            = &oscvco_clk,
+ +      }, {    /* PB:CLCD */
+ +              .dev_id         = "issp:20",
+ +              .clk            = &oscvco_clk,
+ +      }
+ +};
+ +
+ +static int __init clk_init(void)
+ +{
+ +      int i;
+ +
+ +      for (i = 0; i < ARRAY_SIZE(lookups); i++)
+ +              clkdev_add(&lookups[i]);
+ +      return 0;
+ +}
+ +arch_initcall(clk_init);
+ +
   /*
    * CLCD support.
    */
@@@ -304,30 -226,7 +304,30 @@@ static struct clcd_panel vga = 
         .width          = -1,
         .height         = -1,
         .tim2           = TIM2_BCD | TIM2_IPC,
- -      .cntl           = CNTL_LCDTFT | CNTL_LCDVCOMP(1),
+ +      .cntl           = CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
+ +      .bpp            = 16,
+ +};
+ +
+ +static struct clcd_panel xvga = {
+ +      .mode           = {
+ +              .name           = "XVGA",
+ +              .refresh        = 60,
+ +              .xres           = 1024,
+ +              .yres           = 768,
+ +              .pixclock       = 15748,
+ +              .left_margin    = 152,
+ +              .right_margin   = 48,
+ +              .upper_margin   = 23,
+ +              .lower_margin   = 3,
+ +              .hsync_len      = 104,
+ +              .vsync_len      = 4,
+ +              .sync           = 0,
+ +              .vmode          = FB_VMODE_NONINTERLACED,
+ +      },
+ +      .width          = -1,
+ +      .height         = -1,
+ +      .tim2           = TIM2_BCD | TIM2_IPC,
+ +      .cntl           = CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
         .bpp            = 16,
   };
   
@@@ -350,7 -249,7 +350,7 @@@ static struct clcd_panel sanyo_3_8_in 
         .width          = -1,
         .height         = -1,
         .tim2           = TIM2_BCD,
- -      .cntl           = CNTL_LCDTFT | CNTL_LCDVCOMP(1),
+ +      .cntl           = CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
         .bpp            = 16,
   };
   
@@@ -373,7 -272,7 +373,7 @@@ static struct clcd_panel sanyo_2_5_in 
         .width          = -1,
         .height         = -1,
         .tim2           = TIM2_IVS | TIM2_IHS | TIM2_IPC,
- -      .cntl           = CNTL_LCDTFT | CNTL_LCDVCOMP(1),
+ +      .cntl           = CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
         .bpp            = 16,
   };
   
@@@ -396,7 -295,7 +396,7 @@@ static struct clcd_panel epson_2_2_in 
         .width          = -1,
         .height         = -1,
         .tim2           = TIM2_BCD | TIM2_IPC,
- -      .cntl           = CNTL_LCDTFT | CNTL_LCDVCOMP(1),
+ +      .cntl           = CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
         .bpp            = 16,
   };
   
@@@ -409,15 -308,9 +409,15 @@@
   static struct clcd_panel *realview_clcd_panel(void)
   {
         void __iomem *sys_clcd = __io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_CLCD_OFFSET;
- -      struct clcd_panel *panel = &vga;
+ +      struct clcd_panel *vga_panel;
+ +      struct clcd_panel *panel;
         u32 val;
   
+ +      if (machine_is_realview_eb())
+ +              vga_panel = &vga;
+ +      else
+ +              vga_panel = &xvga;
+ +
         val = readl(sys_clcd) & SYS_CLCD_ID_MASK;
         if (val == SYS_CLCD_ID_SANYO_3_8)
                 panel = &sanyo_3_8_in;
@@@ -426,11 -319,11 +426,11 @@@
         else if (val == SYS_CLCD_ID_EPSON_2_2)
                 panel = &epson_2_2_in;
         else if (val == SYS_CLCD_ID_VGA)
- -              panel = &vga;
+ +              panel = vga_panel;
         else {
                 printk(KERN_ERR "CLCD: unknown LCD panel ID 0x%08x, using VGA\n",
                         val);
- -              panel = &vga;
+ +              panel = vga_panel;
         }
   
         return panel;
@@@ -465,18 -358,12 +465,18 @@@ static void realview_clcd_enable(struc
         writel(val, sys_clcd);
   }
   
- -static unsigned long framesize = SZ_1M;
- -
   static int realview_clcd_setup(struct clcd_fb *fb)
   {
+ +      unsigned long framesize;
         dma_addr_t dma;
   
+ +      if (machine_is_realview_eb())
+ +              /* VGA, 16bpp */
+ +              framesize = 640 * 480 * 2;
+ +      else
+ +              /* XVGA, 16bpp */
+ +              framesize = 1024 * 768 * 2;
+ +
         fb->panel               = realview_clcd_panel();
   
         fb->fb.screen_base = dma_alloc_writecombine(&fb->dev->dev, framesize,
@@@ -624,7 -511,7 +624,7 @@@ static struct clock_event_device timer0
         .set_mode       = timer_set_mode,
         .set_next_event = timer_set_next_event,
         .rating         = 300,
-       .cpumask        = CPU_MASK_ALL,
+       .cpumask        = cpu_all_mask,
   };
   
   static void __init realview_clockevents_init(unsigned int timer_irq)
@@@ -701,7 -588,7 +701,7 @@@ void __init realview_timer_init(unsigne
          * The dummy clock device has to be registered before the main device
          * so that the latter will broadcast the clock events
          */
- -      local_timer_setup(smp_processor_id());
+ +      local_timer_setup();
   #endif
   
         /* 
diff --combined arch/arm/mach-realview/localtimer.c

index 9019ef2e56115ac72f5b359bd2da1d68999a5d7b,504961ef343c2ef0f0e6be6bc052b997d90405fd..67d6d9cc68b2a693b5edc2a89aa8d78125e294d3
--- 1/arch/arm/mach-realview/localtimer.c
--- 2/arch/arm/mach-realview/localtimer.c
+++ b/arch/arm/mach-realview/localtimer.c
@@@ -38,14 -38,18 +38,14 @@@ void local_timer_interrupt(void
   
   #ifdef CONFIG_LOCAL_TIMERS
   
- -#define TWD_BASE(cpu) (twd_base_addr + (cpu) * twd_size)
- -
   /* set up by the platform code */
- -void __iomem *twd_base_addr;
- -unsigned int twd_size;
+ +void __iomem *twd_base;
   
   static unsigned long mpcore_timer_rate;
   
   static void local_timer_set_mode(enum clock_event_mode mode,
                                  struct clock_event_device *clk)
   {
- -      void __iomem *base = TWD_BASE(smp_processor_id());
         unsigned long ctrl;
   
         switch(mode) {
@@@ -64,16 -68,17 +64,16 @@@
                 ctrl = 0;
         }
   
- -      __raw_writel(ctrl, base + TWD_TIMER_CONTROL);
+ +      __raw_writel(ctrl, twd_base + TWD_TIMER_CONTROL);
   }
   
   static int local_timer_set_next_event(unsigned long evt,
                                       struct clock_event_device *unused)
   {
- -      void __iomem *base = TWD_BASE(smp_processor_id());
- -      unsigned long ctrl = __raw_readl(base + TWD_TIMER_CONTROL);
+ +      unsigned long ctrl = __raw_readl(twd_base + TWD_TIMER_CONTROL);
   
- -      __raw_writel(evt, base + TWD_TIMER_COUNTER);
- -      __raw_writel(ctrl | TWD_TIMER_CONTROL_ENABLE, base + TWD_TIMER_CONTROL);
+ +      __raw_writel(evt, twd_base + TWD_TIMER_COUNTER);
+ +      __raw_writel(ctrl | TWD_TIMER_CONTROL_ENABLE, twd_base + TWD_TIMER_CONTROL);
   
         return 0;
   }
@@@ -86,16 -91,19 +86,16 @@@
    */
   int local_timer_ack(void)
   {
- -      void __iomem *base = TWD_BASE(smp_processor_id());
- -
- -      if (__raw_readl(base + TWD_TIMER_INTSTAT)) {
- -              __raw_writel(1, base + TWD_TIMER_INTSTAT);
+ +      if (__raw_readl(twd_base + TWD_TIMER_INTSTAT)) {
+ +              __raw_writel(1, twd_base + TWD_TIMER_INTSTAT);
                 return 1;
         }
   
         return 0;
   }
   
- -static void __cpuinit twd_calibrate_rate(unsigned int cpu)
+ +static void __cpuinit twd_calibrate_rate(void)
   {
- -      void __iomem *base = TWD_BASE(cpu);
         unsigned long load, count;
         u64 waitjiffies;
   
@@@ -116,15 -124,15 +116,15 @@@
                 waitjiffies += 5;
   
                                  /* enable, no interrupt or reload */
- -              __raw_writel(0x1, base + TWD_TIMER_CONTROL);
+ +              __raw_writel(0x1, twd_base + TWD_TIMER_CONTROL);
   
                                  /* maximum value */
- -              __raw_writel(0xFFFFFFFFU, base + TWD_TIMER_COUNTER);
+ +              __raw_writel(0xFFFFFFFFU, twd_base + TWD_TIMER_COUNTER);
   
                 while (get_jiffies_64() < waitjiffies)
                         udelay(10);
   
- -              count = __raw_readl(base + TWD_TIMER_COUNTER);
+ +              count = __raw_readl(twd_base + TWD_TIMER_COUNTER);
   
                 mpcore_timer_rate = (0xFFFFFFFFU - count) * (HZ / 5);
   
@@@ -134,19 -142,18 +134,19 @@@
   
         load = mpcore_timer_rate / HZ;
   
- -      __raw_writel(load, base + TWD_TIMER_LOAD);
+ +      __raw_writel(load, twd_base + TWD_TIMER_LOAD);
   }
   
   /*
    * Setup the local clock events for a CPU.
    */
- -void __cpuinit local_timer_setup(unsigned int cpu)
+ +void __cpuinit local_timer_setup(void)
   {
+ +      unsigned int cpu = smp_processor_id();
         struct clock_event_device *clk = &per_cpu(local_clockevent, cpu);
         unsigned long flags;
   
- -      twd_calibrate_rate(cpu);
+ +      twd_calibrate_rate();
   
         clk->name               = "local_timer";
         clk->features           = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT;
@@@ -154,7 -161,7 +154,7 @@@
         clk->set_mode           = local_timer_set_mode;
         clk->set_next_event     = local_timer_set_next_event;
         clk->irq                = IRQ_LOCALTIMER;
-       clk->cpumask            = cpumask_of_cpu(cpu);
+       clk->cpumask            = cpumask_of(cpu);
         clk->shift              = 20;
         clk->mult               = div_sc(mpcore_timer_rate, NSEC_PER_SEC, clk->shift);
         clk->max_delta_ns       = clockevent_delta2ns(0xffffffff, clk);
@@@ -171,9 -178,9 +171,9 @@@
   /*
    * take a local timer down
    */
- -void __cpuexit local_timer_stop(unsigned int cpu)
+ +void __cpuexit local_timer_stop(void)
   {
- -      __raw_writel(0, TWD_BASE(cpu) + TWD_TIMER_CONTROL);
+ +      __raw_writel(0, twd_base + TWD_TIMER_CONTROL);
   }
   
   #else /* CONFIG_LOCAL_TIMERS */
@@@ -183,9 -190,8 +183,9 @@@ static void dummy_timer_set_mode(enum c
   {
   }
   
- -void __cpuinit local_timer_setup(unsigned int cpu)
+ +void __cpuinit local_timer_setup(void)
   {
+ +      unsigned int cpu = smp_processor_id();
         struct clock_event_device *clk = &per_cpu(local_clockevent, cpu);
   
         clk->name               = "dummy_timer";
@@@ -193,7 -199,7 +193,7 @@@
         clk->rating             = 200;
         clk->set_mode           = dummy_timer_set_mode;
         clk->broadcast          = smp_timer_broadcast;
-       clk->cpumask            = cpumask_of_cpu(cpu);
+       clk->cpumask            = cpumask_of(cpu);
   
         clockevents_register_device(clk);
   }
diff --combined arch/arm/mach-sa1100/time.c

index 8c5e727f3b751ffb0e87b176402ae75adb276cfc,1cac4ac0b4b89e7af0dc56116817d163744a542c..711c0295c66f1710de34e20f3d9a88c2eeb99609
--- 1/arch/arm/mach-sa1100/time.c
--- 2/arch/arm/mach-sa1100/time.c
+++ b/arch/arm/mach-sa1100/time.c
@@@ -2,8 -2,8 +2,8 @@@
    * linux/arch/arm/mach-sa1100/time.c
    *
    * Copyright (C) 1998 Deborah Wallach.
- - * Twiddles  (C) 1999         Hugo Fiennes <hugo@empeg.com>
- - * 
+ + * Twiddles  (C) 1999 Hugo Fiennes <hugo@empeg.com>
+ + *
    * 2000/03/29 (C) Nicolas Pitre <nico@cam.org>
    *    Rewritten: big cleanup, much simpler, better HZ accuracy.
    *
@@@ -73,7 -73,6 +73,6 @@@ static struct clock_event_device ckevt_
         .features       = CLOCK_EVT_FEAT_ONESHOT,
         .shift          = 32,
         .rating         = 200,
-       .cpumask        = CPU_MASK_CPU0,
         .set_next_event = sa1100_osmr0_set_next_event,
         .set_mode       = sa1100_osmr0_set_mode,
   };
@@@ -110,6 -109,7 +109,7 @@@ static void __init sa1100_timer_init(vo
                 clockevent_delta2ns(0x7fffffff, &ckevt_sa1100_osmr0);
         ckevt_sa1100_osmr0.min_delta_ns =
                 clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_sa1100_osmr0) + 1;
+       ckevt_sa1100_osmr0.cpumask = cpumask_of(0);
   
         cksrc_sa1100_oscr.mult =
                 clocksource_hz2mult(CLOCK_TICK_RATE, cksrc_sa1100_oscr.shift);
diff --combined arch/arm/mach-versatile/core.c

index df25aa138509c95aec6dfcb391f8409de5b182a8,a3f1933434e261d604c20f665a8b2b6dde450b0b..1c43494f5c422092d713a860e1623e83078b9d05
--- 1/arch/arm/mach-versatile/core.c
--- 2/arch/arm/mach-versatile/core.c
+++ b/arch/arm/mach-versatile/core.c
@@@ -31,7 -31,6 +31,7 @@@
   #include <linux/cnt32_to_63.h>
   #include <linux/io.h>
   
+ +#include <asm/clkdev.h>
   #include <asm/system.h>
   #include <mach/hardware.h>
   #include <asm/irq.h>
@@@ -374,60 -373,22 +374,60 @@@ static const struct icst307_params vers
   
   static void versatile_oscvco_set(struct clk *clk, struct icst307_vco vco)
   {
- -      void __iomem *sys_lock = __io_address(VERSATILE_SYS_BASE) + VERSATILE_SYS_LOCK_OFFSET;
- -      void __iomem *sys_osc = __io_address(VERSATILE_SYS_BASE) + VERSATILE_SYS_OSCCLCD_OFFSET;
+ +      void __iomem *sys = __io_address(VERSATILE_SYS_BASE);
+ +      void __iomem *sys_lock = sys + VERSATILE_SYS_LOCK_OFFSET;
         u32 val;
   
- -      val = readl(sys_osc) & ~0x7ffff;
+ +      val = readl(sys + clk->oscoff) & ~0x7ffff;
         val |= vco.v | (vco.r << 9) | (vco.s << 16);
   
         writel(0xa05f, sys_lock);
- -      writel(val, sys_osc);
+ +      writel(val, sys + clk->oscoff);
         writel(0, sys_lock);
   }
   
- -static struct clk versatile_clcd_clk = {
- -      .name   = "CLCDCLK",
+ +static struct clk osc4_clk = {
         .params = &versatile_oscvco_params,
- -      .setvco = versatile_oscvco_set,
+ +      .oscoff = VERSATILE_SYS_OSCCLCD_OFFSET,
+ +      .setvco = versatile_oscvco_set,
+ +};
+ +
+ +/*
+ + * These are fixed clocks.
+ + */
+ +static struct clk ref24_clk = {
+ +      .rate   = 24000000,
+ +};
+ +
+ +static struct clk_lookup lookups[] __initdata = {
+ +      {       /* UART0 */
+ +              .dev_id         = "dev:f1",
+ +              .clk            = &ref24_clk,
+ +      }, {    /* UART1 */
+ +              .dev_id         = "dev:f2",
+ +              .clk            = &ref24_clk,
+ +      }, {    /* UART2 */
+ +              .dev_id         = "dev:f3",
+ +              .clk            = &ref24_clk,
+ +      }, {    /* UART3 */
+ +              .dev_id         = "fpga:09",
+ +              .clk            = &ref24_clk,
+ +      }, {    /* KMI0 */
+ +              .dev_id         = "fpga:06",
+ +              .clk            = &ref24_clk,
+ +      }, {    /* KMI1 */
+ +              .dev_id         = "fpga:07",
+ +              .clk            = &ref24_clk,
+ +      }, {    /* MMC0 */
+ +              .dev_id         = "fpga:05",
+ +              .clk            = &ref24_clk,
+ +      }, {    /* MMC1 */
+ +              .dev_id         = "fpga:0b",
+ +              .clk            = &ref24_clk,
+ +      }, {    /* CLCD */
+ +              .dev_id         = "dev:20",
+ +              .clk            = &osc4_clk,
+ +      }
   };
   
   /*
@@@ -825,8 -786,7 +825,8 @@@ void __init versatile_init(void
   {
         int i;
   
- -      clk_register(&versatile_clcd_clk);
+ +      for (i = 0; i < ARRAY_SIZE(lookups); i++)
+ +              clkdev_add(&lookups[i]);
   
         platform_device_register(&versatile_flash_device);
         platform_device_register(&versatile_i2c_device);
@@@ -1005,7 -965,7 +1005,7 @@@ static void __init versatile_timer_init
         timer0_clockevent.min_delta_ns =
                 clockevent_delta2ns(0xf, &timer0_clockevent);
   
-       timer0_clockevent.cpumask = cpumask_of_cpu(0);
+       timer0_clockevent.cpumask = cpumask_of(0);
         clockevents_register_device(&timer0_clockevent);
   }
   
diff --combined arch/powerpc/kernel/smp.c

index 8ac3f721d2359e20b09476fbe85aafc7c161f97d,d1165566f06487a6860e8d14e3586f7230d6b66d..65484b2200b36add20b48dc38caf3f80971c4d29
--- 1/arch/powerpc/kernel/smp.c
--- 2/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@@ -57,15 -57,12 +57,11 @@@
   #define DBG(fmt...)
   #endif
   
- -int smp_hw_index[NR_CPUS];
   struct thread_info *secondary_ti;
   
- cpumask_t cpu_possible_map = CPU_MASK_NONE;
- cpumask_t cpu_online_map = CPU_MASK_NONE;
   DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
   DEFINE_PER_CPU(cpumask_t, cpu_core_map) = CPU_MASK_NONE;
   
- EXPORT_SYMBOL(cpu_online_map);
- EXPORT_SYMBOL(cpu_possible_map);
   EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
   EXPORT_PER_CPU_SYMBOL(cpu_core_map);
   
@@@ -122,65 -119,6 +118,65 @@@ void smp_message_recv(int msg
         }
   }
   
+ +static irqreturn_t call_function_action(int irq, void *data)
+ +{
+ +      generic_smp_call_function_interrupt();
+ +      return IRQ_HANDLED;
+ +}
+ +
+ +static irqreturn_t reschedule_action(int irq, void *data)
+ +{
+ +      /* we just need the return path side effect of checking need_resched */
+ +      return IRQ_HANDLED;
+ +}
+ +
+ +static irqreturn_t call_function_single_action(int irq, void *data)
+ +{
+ +      generic_smp_call_function_single_interrupt();
+ +      return IRQ_HANDLED;
+ +}
+ +
+ +static irqreturn_t debug_ipi_action(int irq, void *data)
+ +{
+ +      smp_message_recv(PPC_MSG_DEBUGGER_BREAK);
+ +      return IRQ_HANDLED;
+ +}
+ +
+ +static irq_handler_t smp_ipi_action[] = {
+ +      [PPC_MSG_CALL_FUNCTION] =  call_function_action,
+ +      [PPC_MSG_RESCHEDULE] = reschedule_action,
+ +      [PPC_MSG_CALL_FUNC_SINGLE] = call_function_single_action,
+ +      [PPC_MSG_DEBUGGER_BREAK] = debug_ipi_action,
+ +};
+ +
+ +const char *smp_ipi_name[] = {
+ +      [PPC_MSG_CALL_FUNCTION] =  "ipi call function",
+ +      [PPC_MSG_RESCHEDULE] = "ipi reschedule",
+ +      [PPC_MSG_CALL_FUNC_SINGLE] = "ipi call function single",
+ +      [PPC_MSG_DEBUGGER_BREAK] = "ipi debugger",
+ +};
+ +
+ +/* optional function to request ipi, for controllers with >= 4 ipis */
+ +int smp_request_message_ipi(int virq, int msg)
+ +{
+ +      int err;
+ +
+ +      if (msg < 0 || msg > PPC_MSG_DEBUGGER_BREAK) {
+ +              return -EINVAL;
+ +      }
+ +#if !defined(CONFIG_DEBUGGER) && !defined(CONFIG_KEXEC)
+ +      if (msg == PPC_MSG_DEBUGGER_BREAK) {
+ +              return 1;
+ +      }
+ +#endif
+ +      err = request_irq(virq, smp_ipi_action[msg], IRQF_DISABLED|IRQF_PERCPU,
+ +                        smp_ipi_name[msg], 0);
+ +      WARN(err < 0, "unable to request_irq %d for %s (rc %d)\n",
+ +              virq, smp_ipi_name[msg], err);
+ +
+ +      return err;
+ +}
+ +
   void smp_send_reschedule(int cpu)
   {
         if (likely(smp_ops))
@@@ -466,7 -404,8 +462,7 @@@ out
   static struct device_node *cpu_to_l2cache(int cpu)
   {
         struct device_node *np;
- -      const phandle *php;
- -      phandle ph;
+ +      struct device_node *cache;
   
         if (!cpu_present(cpu))
                 return NULL;
@@@ -475,11 -414,13 +471,11 @@@
         if (np == NULL)
                 return NULL;
   
- -      php = of_get_property(np, "l2-cache", NULL);
- -      if (php == NULL)
- -              return NULL;
- -      ph = *php;
+ +      cache = of_find_next_cache_node(np);
+ +
         of_node_put(np);
   
- -      return of_find_node_by_phandle(ph);
+ +      return cache;
   }
   
   /* Activate a secondary processor. */
diff --combined arch/powerpc/kernel/time.c

index e1f3a51404292ee3c422295e47292da9b7c22256,6f39d35d6f556f51fa8db03a3fd43a683f0da798..99f1ddd68582e9628456f8d8e7f8fe3fb0c5b25f
--- 1/arch/powerpc/kernel/time.c
--- 2/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@@ -164,6 -164,8 +164,6 @@@ static u64 tb_to_ns_scale __read_mostly
   static unsigned tb_to_ns_shift __read_mostly;
   static unsigned long boot_tb __read_mostly;
   
- -static struct gettimeofday_struct do_gtod;
- -
   extern struct timezone sys_tz;
   static long timezone_offset;
   
@@@ -413,9 -415,31 +413,9 @@@ void udelay(unsigned long usecs
   }
   EXPORT_SYMBOL(udelay);
   
- -
- -/*
- - * There are two copies of tb_to_xs and stamp_xsec so that no
- - * lock is needed to access and use these values in
- - * do_gettimeofday.  We alternate the copies and as long as a
- - * reasonable time elapses between changes, there will never
- - * be inconsistent values.  ntpd has a minimum of one minute
- - * between updates.
- - */
   static inline void update_gtod(u64 new_tb_stamp, u64 new_stamp_xsec,
                                u64 new_tb_to_xs)
   {
- -      unsigned temp_idx;
- -      struct gettimeofday_vars *temp_varp;
- -
- -      temp_idx = (do_gtod.var_idx == 0);
- -      temp_varp = &do_gtod.vars[temp_idx];
- -
- -      temp_varp->tb_to_xs = new_tb_to_xs;
- -      temp_varp->tb_orig_stamp = new_tb_stamp;
- -      temp_varp->stamp_xsec = new_stamp_xsec;
- -      smp_mb();
- -      do_gtod.varp = temp_varp;
- -      do_gtod.var_idx = temp_idx;
- -
         /*
          * tb_update_count is used to allow the userspace gettimeofday code
          * to assure itself that it sees a consistent view of the tb_to_xs and
@@@ -432,7 -456,6 +432,7 @@@
         vdso_data->tb_to_xs = new_tb_to_xs;
         vdso_data->wtom_clock_sec = wall_to_monotonic.tv_sec;
         vdso_data->wtom_clock_nsec = wall_to_monotonic.tv_nsec;
+ +      vdso_data->stamp_xtime = xtime;
         smp_wmb();
         ++(vdso_data->tb_update_count);
   }
@@@ -491,7 -514,9 +491,7 @@@ static int __init iSeries_tb_recal(void
                                 tb_ticks_per_sec   = new_tb_ticks_per_sec;
                                 calc_cputime_factors();
                                 div128_by_32( XSEC_PER_SEC, 0, tb_ticks_per_sec, &divres );
- -                              do_gtod.tb_ticks_per_sec = tb_ticks_per_sec;
                                 tb_to_xs = divres.result_low;
- -                              do_gtod.varp->tb_to_xs = tb_to_xs;
                                 vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
                                 vdso_data->tb_to_xs = tb_to_xs;
                         }
@@@ -844,7 -869,7 +844,7 @@@ static void register_decrementer_clocke
         struct clock_event_device *dec = &per_cpu(decrementers, cpu).event;
   
         *dec = decrementer_clockevent;
-       dec->cpumask = cpumask_of_cpu(cpu);
+       dec->cpumask = cpumask_of(cpu);
   
         printk(KERN_DEBUG "clockevent: %s mult[%lx] shift[%d] cpu[%d]\n",
                dec->name, dec->mult, dec->shift, cpu);
@@@ -963,6 -988,15 +963,6 @@@ void __init time_init(void
                 sys_tz.tz_dsttime = 0;
           }
   
- -      do_gtod.varp = &do_gtod.vars[0];
- -      do_gtod.var_idx = 0;
- -      do_gtod.varp->tb_orig_stamp = tb_last_jiffy;
- -      __get_cpu_var(last_jiffy) = tb_last_jiffy;
- -      do_gtod.varp->stamp_xsec = (u64) xtime.tv_sec * XSEC_PER_SEC;
- -      do_gtod.tb_ticks_per_sec = tb_ticks_per_sec;
- -      do_gtod.varp->tb_to_xs = tb_to_xs;
- -      do_gtod.tb_to_us = tb_to_us;
- -
         vdso_data->tb_orig_stamp = tb_last_jiffy;
         vdso_data->tb_update_count = 0;
         vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
diff --combined arch/powerpc/platforms/pseries/xics.c

index f7a69021b7bf8ed9832a94527a48b632dc2477ee,424b335a71c83e360b59bfdebd7939d62070b360..84e058f1e1cc805516157d9a2c503168149e2084
--- 1/arch/powerpc/platforms/pseries/xics.c
--- 2/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@@ -332,7 -332,7 +332,7 @@@ static void xics_eoi_lpar(unsigned int 
         lpar_xirr_info_set((0xff << 24) | irq);
   }
   
- static void xics_set_affinity(unsigned int virq, cpumask_t cpumask)
+ static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
   {
         unsigned int irq;
         int status;
@@@ -579,7 -579,7 +579,7 @@@ static void xics_update_irq_servers(voi
         int i, j;
         struct device_node *np;
         u32 ilen;
- -      const u32 *ireg, *isize;
+ +      const u32 *ireg;
         u32 hcpuid;
   
         /* Find the server numbers for the boot cpu. */
@@@ -607,6 -607,11 +607,6 @@@
                 }
         }
   
- -      /* get the bit size of server numbers */
- -      isize = of_get_property(np, "ibm,interrupt-server#-size", NULL);
- -      if (isize)
- -              interrupt_server_size = *isize;
- -
         of_node_put(np);
   }
   
@@@ -677,7 -682,6 +677,7 @@@ void __init xics_init_IRQ(void
         struct device_node *np;
         u32 indx = 0;
         int found = 0;
+ +      const u32 *isize;
   
         ppc64_boot_msg(0x20, "XICS Init");
   
@@@ -697,26 -701,6 +697,26 @@@
         if (found == 0)
                 return;
   
+ +      /* get the bit size of server numbers */
+ +      found = 0;
+ +
+ +      for_each_compatible_node(np, NULL, "ibm,ppc-xics") {
+ +              isize = of_get_property(np, "ibm,interrupt-server#-size", NULL);
+ +
+ +              if (!isize)
+ +                      continue;
+ +
+ +              if (!found) {
+ +                      interrupt_server_size = *isize;
+ +                      found = 1;
+ +              } else if (*isize != interrupt_server_size) {
+ +                      printk(KERN_WARNING "XICS: "
+ +                             "mismatched ibm,interrupt-server#-size\n");
+ +                      interrupt_server_size = max(*isize,
+ +                                                  interrupt_server_size);
+ +              }
+ +      }
+ +
         xics_update_irq_servers();
         xics_init_host();
   
@@@ -744,18 -728,9 +744,18 @@@ static void xics_set_cpu_priority(unsig
   /* Have the calling processor join or leave the specified global queue */
   static void xics_set_cpu_giq(unsigned int gserver, unsigned int join)
   {
- -      int status = rtas_set_indicator_fast(GLOBAL_INTERRUPT_QUEUE,
- -              (1UL << interrupt_server_size) - 1 - gserver, join);
- -      WARN_ON(status < 0);
+ +      int index;
+ +      int status;
+ +
+ +      if (!rtas_indicator_present(GLOBAL_INTERRUPT_QUEUE, NULL))
+ +              return;
+ +
+ +      index = (1UL << interrupt_server_size) - 1 - gserver;
+ +
+ +      status = rtas_set_indicator_fast(GLOBAL_INTERRUPT_QUEUE, index, join);
+ +
+ +      WARN(status < 0, "set-indicator(%d, %d, %u) returned %d\n",
+ +           GLOBAL_INTERRUPT_QUEUE, index, join, status);
   }
   
   void xics_setup_cpu(void)
@@@ -870,7 -845,7 +870,7 @@@ void xics_migrate_irqs_away(void
   
                 /* Reset affinity to all cpus */
                 irq_desc[virq].affinity = CPU_MASK_ALL;
-               desc->chip->set_affinity(virq, CPU_MASK_ALL);
+               desc->chip->set_affinity(virq, cpu_all_mask);
   unlock:
                 spin_unlock_irqrestore(&desc->lock, flags);
         }
diff --combined arch/powerpc/sysdev/mpic.c

index c82babb70074963bf4c37f3ead2d0feb4be9bf56,5d7f9f0c93c323ba6955203496c61d7979fa7ff9..3e0d89dcdba2a5b4950a63a9cd79534f8e3702cf
--- 1/arch/powerpc/sysdev/mpic.c
--- 2/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@@ -661,6 -661,17 +661,6 @@@ static inline void mpic_eoi(struct mpi
         (void)mpic_cpu_read(MPIC_INFO(CPU_WHOAMI));
   }
   
- -#ifdef CONFIG_SMP
- -static irqreturn_t mpic_ipi_action(int irq, void *data)
- -{
- -      long ipi = (long)data;
- -
- -      smp_message_recv(ipi);
- -
- -      return IRQ_HANDLED;
- -}
- -#endif /* CONFIG_SMP */
- -
   /*
    * Linux descriptor level callbacks
    */
@@@ -806,7 -817,7 +806,7 @@@ static void mpic_end_ipi(unsigned int i
   
   #endif /* CONFIG_SMP */
   
- void mpic_set_affinity(unsigned int irq, cpumask_t cpumask)
+ void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
   {
         struct mpic *mpic = mpic_from_irq(irq);
         unsigned int src = mpic_irq_to_hw(irq);
@@@ -818,7 -829,7 +818,7 @@@
         } else {
                 cpumask_t tmp;
   
-               cpus_and(tmp, cpumask, cpu_online_map);
+               cpumask_and(&tmp, cpumask, cpu_online_mask);
   
                 mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION),
                                mpic_physmask(cpus_addr(tmp)[0]));
@@@ -1537,7 -1548,13 +1537,7 @@@ unsigned int mpic_get_mcirq(void
   void mpic_request_ipis(void)
   {
         struct mpic *mpic = mpic_primary;
- -      long i, err;
- -      static char *ipi_names[] = {
- -              "IPI0 (call function)",
- -              "IPI1 (reschedule)",
- -              "IPI2 (call function single)",
- -              "IPI3 (debugger break)",
- -      };
+ +      int i;
         BUG_ON(mpic == NULL);
   
         printk(KERN_INFO "mpic: requesting IPIs ... \n");
@@@ -1546,10 -1563,17 +1546,10 @@@
                 unsigned int vipi = irq_create_mapping(mpic->irqhost,
                                                        mpic->ipi_vecs[0] + i);
                 if (vipi == NO_IRQ) {
- -                      printk(KERN_ERR "Failed to map IPI %ld\n", i);
- -                      break;
- -              }
- -              err = request_irq(vipi, mpic_ipi_action,
- -                                IRQF_DISABLED|IRQF_PERCPU,
- -                                ipi_names[i], (void *)i);
- -              if (err) {
- -                      printk(KERN_ERR "Request of irq %d for IPI %ld failed\n",
- -                             vipi, i);
- -                      break;
+ +                      printk(KERN_ERR "Failed to map %s\n", smp_ipi_name[i]);
+ +                      continue;
                 }
+ +              smp_request_message_ipi(vipi, i);
         }
   }
   
diff --combined arch/s390/Kconfig

index 8152fefc97b919469f8da42ea2c9b967dd55f049,b4aa5869c7f9c5b9eef1ce41bd56d0b7eeb4677b..19577aeffd7b42799eb576d4f882fcc969a28ae2
--- 1/arch/s390/Kconfig
--- 2/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@@ -43,9 -43,6 +43,9 @@@ config GENERIC_HWEIGH
   config GENERIC_TIME
         def_bool y
   
+ +config GENERIC_TIME_VSYSCALL
+ +      def_bool y
+ +
   config GENERIC_CLOCKEVENTS
         def_bool y
   
@@@ -69,20 -66,16 +69,21 @@@ config PGST
         bool
         default y if KVM
   
+ +config VIRT_CPU_ACCOUNTING
+ +      def_bool y
+ +
   mainmenu "Linux Kernel Configuration"
   
   config S390
         def_bool y
+ +      select USE_GENERIC_SMP_HELPERS if SMP
+ +      select HAVE_FUNCTION_TRACER
         select HAVE_OPROFILE
         select HAVE_KPROBES
         select HAVE_KRETPROBES
         select HAVE_KVM if 64BIT
         select HAVE_ARCH_TRACEHOOK
+       select INIT_ALL_POSSIBLE
   
   source "init/Kconfig"
   
@@@ -233,14 -226,6 +234,14 @@@ config MARCH_Z9_10
           Class (z9 BC). The kernel will be slightly faster but will not
           work on older machines such as the z990, z890, z900, and z800.
   
+ +config MARCH_Z10
+ +      bool "IBM System z10"
+ +      help
+ +        Select this to enable optimizations for IBM System z10. The
+ +        kernel will be slightly faster but will not work on older
+ +        machines such as the z990, z890, z900, z800, z9-109, z9-ec
+ +        and z9-bc.
+ +
   endchoice
   
   config PACK_STACK
@@@ -359,6 -344,16 +360,6 @@@ config QDI
   
           If unsure, say Y.
   
- -config QDIO_DEBUG
- -      bool "Extended debugging information"
- -      depends on QDIO
- -      help
- -        Say Y here to get extended debugging output in
- -          /sys/kernel/debug/s390dbf/qdio...
- -        Warning: this option reduces the performance of the QDIO module.
- -
- -        If unsure, say N.
- -
   config CHSC_SCH
         tristate "Support for CHSC subchannels"
         help
@@@ -472,9 -467,22 +473,9 @@@ config PAGE_STATE
           hypervisor. The ESSA instruction is used to do the states
           changes between a page that has content and the unused state.
   
- -config VIRT_TIMER
- -      bool "Virtual CPU timer support"
- -      help
- -        This provides a kernel interface for virtual CPU timers.
- -        Default is disabled.
- -
- -config VIRT_CPU_ACCOUNTING
- -      bool "Base user process accounting on virtual cpu timer"
- -      depends on VIRT_TIMER
- -      help
- -        Select this option to use CPU timer deltas to do user
- -        process accounting.
- -
   config APPLDATA_BASE
         bool "Linux - VM Monitor Stream, base infrastructure"
- -      depends on PROC_FS && VIRT_TIMER=y
+ +      depends on PROC_FS
         help
           This provides a kernel interface for creating and updating z/VM APPLDATA
           monitor records. The monitor records are updated at certain time
diff --combined arch/s390/kernel/smp.c

index 6fc78541dc57fee528a712f1cc3a259db97e23de,f03914b8ed2f85942416266fa15758a20bb0b5eb..3ed5c7a83c6c477e242ab411de06f7af35121f7a
--- 1/arch/s390/kernel/smp.c
--- 2/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@@ -20,9 -20,6 +20,9 @@@
    * cpu_number_map in other architectures.
    */
   
+ +#define KMSG_COMPONENT "cpu"
+ +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+ +
   #include <linux/module.h>
   #include <linux/init.h>
   #include <linux/mm.h>
@@@ -55,12 -52,6 +55,6 @@@
   struct _lowcore *lowcore_ptr[NR_CPUS];
   EXPORT_SYMBOL(lowcore_ptr);
   
- cpumask_t cpu_online_map = CPU_MASK_NONE;
- EXPORT_SYMBOL(cpu_online_map);
- 
- cpumask_t cpu_possible_map = CPU_MASK_ALL;
- EXPORT_SYMBOL(cpu_possible_map);
- 
   static struct task_struct *current_set[NR_CPUS];
   
   static u8 smp_cpu_type;
@@@ -80,6 -71,159 +74,6 @@@ static DEFINE_PER_CPU(struct cpu, cpu_d
   
   static void smp_ext_bitcall(int, ec_bit_sig);
   
- -/*
- - * Structure and data for __smp_call_function_map(). This is designed to
- - * minimise static memory requirements. It also looks cleaner.
- - */
- -static DEFINE_SPINLOCK(call_lock);
- -
- -struct call_data_struct {
- -      void (*func) (void *info);
- -      void *info;
- -      cpumask_t started;
- -      cpumask_t finished;
- -      int wait;
- -};
- -
- -static struct call_data_struct *call_data;
- -
- -/*
- - * 'Call function' interrupt callback
- - */
- -static void do_call_function(void)
- -{
- -      void (*func) (void *info) = call_data->func;
- -      void *info = call_data->info;
- -      int wait = call_data->wait;
- -
- -      cpu_set(smp_processor_id(), call_data->started);
- -      (*func)(info);
- -      if (wait)
- -              cpu_set(smp_processor_id(), call_data->finished);;
- -}
- -
- -static void __smp_call_function_map(void (*func) (void *info), void *info,
- -                                  int wait, cpumask_t map)
- -{
- -      struct call_data_struct data;
- -      int cpu, local = 0;
- -
- -      /*
- -       * Can deadlock when interrupts are disabled or if in wrong context.
- -       */
- -      WARN_ON(irqs_disabled() || in_irq());
- -
- -      /*
- -       * Check for local function call. We have to have the same call order
- -       * as in on_each_cpu() because of machine_restart_smp().
- -       */
- -      if (cpu_isset(smp_processor_id(), map)) {
- -              local = 1;
- -              cpu_clear(smp_processor_id(), map);
- -      }
- -
- -      cpus_and(map, map, cpu_online_map);
- -      if (cpus_empty(map))
- -              goto out;
- -
- -      data.func = func;
- -      data.info = info;
- -      data.started = CPU_MASK_NONE;
- -      data.wait = wait;
- -      if (wait)
- -              data.finished = CPU_MASK_NONE;
- -
- -      call_data = &data;
- -
- -      for_each_cpu_mask(cpu, map)
- -              smp_ext_bitcall(cpu, ec_call_function);
- -
- -      /* Wait for response */
- -      while (!cpus_equal(map, data.started))
- -              cpu_relax();
- -      if (wait)
- -              while (!cpus_equal(map, data.finished))
- -                      cpu_relax();
- -out:
- -      if (local) {
- -              local_irq_disable();
- -              func(info);
- -              local_irq_enable();
- -      }
- -}
- -
- -/*
- - * smp_call_function:
- - * @func: the function to run; this must be fast and non-blocking
- - * @info: an arbitrary pointer to pass to the function
- - * @wait: if true, wait (atomically) until function has completed on other CPUs
- - *
- - * Run a function on all other CPUs.
- - *
- - * You must not call this function with disabled interrupts, from a
- - * hardware interrupt handler or from a bottom half.
- - */
- -int smp_call_function(void (*func) (void *info), void *info, int wait)
- -{
- -      cpumask_t map;
- -
- -      spin_lock(&call_lock);
- -      map = cpu_online_map;
- -      cpu_clear(smp_processor_id(), map);
- -      __smp_call_function_map(func, info, wait, map);
- -      spin_unlock(&call_lock);
- -      return 0;
- -}
- -EXPORT_SYMBOL(smp_call_function);
- -
- -/*
- - * smp_call_function_single:
- - * @cpu: the CPU where func should run
- - * @func: the function to run; this must be fast and non-blocking
- - * @info: an arbitrary pointer to pass to the function
- - * @wait: if true, wait (atomically) until function has completed on other CPUs
- - *
- - * Run a function on one processor.
- - *
- - * You must not call this function with disabled interrupts, from a
- - * hardware interrupt handler or from a bottom half.
- - */
- -int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
- -                           int wait)
- -{
- -      spin_lock(&call_lock);
- -      __smp_call_function_map(func, info, wait, cpumask_of_cpu(cpu));
- -      spin_unlock(&call_lock);
- -      return 0;
- -}
- -EXPORT_SYMBOL(smp_call_function_single);
- -
- -/**
- - * smp_call_function_mask(): Run a function on a set of other CPUs.
- - * @mask: The set of cpus to run on.  Must not include the current cpu.
- - * @func: The function to run. This must be fast and non-blocking.
- - * @info: An arbitrary pointer to pass to the function.
- - * @wait: If true, wait (atomically) until function has completed on other CPUs.
- - *
- - * Returns 0 on success, else a negative status code.
- - *
- - * If @wait is true, then returns once @func has returned; otherwise
- - * it returns just before the target cpu calls @func.
- - *
- - * You must not call this function with disabled interrupts or from a
- - * hardware interrupt handler or from a bottom half handler.
- - */
- -int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
- -                         int wait)
- -{
- -      spin_lock(&call_lock);
- -      cpu_clear(smp_processor_id(), mask);
- -      __smp_call_function_map(func, info, wait, mask);
- -      spin_unlock(&call_lock);
- -      return 0;
- -}
- -EXPORT_SYMBOL(smp_call_function_mask);
- -
   void smp_send_stop(void)
   {
         int cpu, rc;
@@@ -121,10 -265,7 +115,10 @@@ static void do_ext_call_interrupt(__u1
         bits = xchg(&S390_lowcore.ext_call_fast, 0);
   
         if (test_bit(ec_call_function, &bits))
- -              do_call_function();
+ +              generic_smp_call_function_interrupt();
+ +
+ +      if (test_bit(ec_call_function_single, &bits))
+ +              generic_smp_call_function_single_interrupt();
   }
   
   /*
@@@ -141,19 -282,6 +135,19 @@@ static void smp_ext_bitcall(int cpu, ec
                 udelay(10);
   }
   
+ +void arch_send_call_function_ipi(cpumask_t mask)
+ +{
+ +      int cpu;
+ +
+ +      for_each_cpu_mask(cpu, mask)
+ +              smp_ext_bitcall(cpu, ec_call_function);
+ +}
+ +
+ +void arch_send_call_function_single_ipi(int cpu)
+ +{
+ +      smp_ext_bitcall(cpu, ec_call_function_single);
+ +}
+ +
   #ifndef CONFIG_64BIT
   /*
    * this function sends a 'purge tlb' signal to another CPU.
@@@ -254,8 -382,8 +248,8 @@@ static void __init smp_get_save_area(un
         if (ipl_info.type != IPL_TYPE_FCP_DUMP)
                 return;
         if (cpu >= NR_CPUS) {
- -              printk(KERN_WARNING "Registers for cpu %i not saved since dump "
- -                     "kernel was compiled with NR_CPUS=%i\n", cpu, NR_CPUS);
+ +              pr_warning("CPU %i exceeds the maximum %i and is excluded from "
+ +                         "the dump\n", cpu, NR_CPUS - 1);
                 return;
         }
         zfcpdump_save_areas[cpu] = kmalloc(sizeof(union save_area), GFP_KERNEL);
@@@ -428,7 -556,7 +422,7 @@@ static void __init smp_detect_cpus(void
         }
   out:
         kfree(info);
- -      printk(KERN_INFO "CPUs: %d configured, %d standby\n", c_cpus, s_cpus);
+ +      pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus);
         get_online_cpus();
         __smp_rescan_cpus();
         put_online_cpus();
@@@ -444,17 -572,19 +438,17 @@@ int __cpuinit start_secondary(void *cpu
         preempt_disable();
         /* Enable TOD clock interrupts on the secondary cpu. */
         init_cpu_timer();
- -#ifdef CONFIG_VIRT_TIMER
         /* Enable cpu timer interrupts on the secondary cpu. */
         init_cpu_vtimer();
- -#endif
         /* Enable pfault pseudo page faults on this cpu. */
         pfault_init();
   
         /* call cpu notifiers */
         notify_cpu_starting(smp_processor_id());
         /* Mark this cpu as online */
- -      spin_lock(&call_lock);
+ +      ipi_call_lock();
         cpu_set(smp_processor_id(), cpu_online_map);
- -      spin_unlock(&call_lock);
+ +      ipi_call_unlock();
         /* Switch on interrupts */
         local_irq_enable();
         /* Print info about this processor */
@@@ -503,15 -633,18 +497,15 @@@ static int __cpuinit smp_alloc_lowcore(
   
                 save_area = get_zeroed_page(GFP_KERNEL);
                 if (!save_area)
- -                      goto out_save_area;
+ +                      goto out;
                 lowcore->extended_save_area_addr = (u32) save_area;
         }
   #endif
         lowcore_ptr[cpu] = lowcore;
         return 0;
   
- -#ifndef CONFIG_64BIT
- -out_save_area:
- -      free_page(panic_stack);
- -#endif
   out:
+ +      free_page(panic_stack);
         free_pages(async_stack, ASYNC_ORDER);
         free_pages((unsigned long) lowcore, lc_order);
         return -ENOMEM;
@@@ -551,8 -684,12 +545,8 @@@ int __cpuinit __cpu_up(unsigned int cpu
   
         ccode = signal_processor_p((__u32)(unsigned long)(lowcore_ptr[cpu]),
                                    cpu, sigp_set_prefix);
- -      if (ccode) {
- -              printk("sigp_set_prefix failed for cpu %d "
- -                     "with condition code %d\n",
- -                     (int) cpu, (int) ccode);
+ +      if (ccode)
                 return -EIO;
- -      }
   
         idle = current_set[cpu];
         cpu_lowcore = lowcore_ptr[cpu];
@@@ -635,7 -772,7 +629,7 @@@ void __cpu_die(unsigned int cpu
         while (!smp_cpu_not_running(cpu))
                 cpu_relax();
         smp_free_lowcore(cpu);
- -      printk(KERN_INFO "Processor %d spun down\n", cpu);
+ +      pr_info("Processor %d stopped\n", cpu);
   }
   
   void cpu_die(void)
diff --combined arch/s390/kernel/time.c

index 5be981a36c3ecadcb66db97b0bd3d88834d1081e,f5bd141c84434b7a5376b134eeee75a684507575..d649600df5b9a887c17f6770ad878e8820c51d92
--- 1/arch/s390/kernel/time.c
--- 2/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@@ -12,9 -12,6 +12,9 @@@
    *    Copyright (C) 1991, 1992, 1995  Linus Torvalds
    */
   
+ +#define KMSG_COMPONENT "time"
+ +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+ +
   #include <linux/errno.h>
   #include <linux/module.h>
   #include <linux/sched.h>
@@@ -23,8 -20,6 +23,8 @@@
   #include <linux/string.h>
   #include <linux/mm.h>
   #include <linux/interrupt.h>
+ +#include <linux/cpu.h>
+ +#include <linux/stop_machine.h>
   #include <linux/time.h>
   #include <linux/sysdev.h>
   #include <linux/delay.h>
@@@ -41,7 -36,6 +41,7 @@@
   #include <asm/delay.h>
   #include <asm/s390_ext.h>
   #include <asm/div64.h>
+ +#include <asm/vdso.h>
   #include <asm/irq.h>
   #include <asm/irq_regs.h>
   #include <asm/timer.h>
@@@ -160,7 -154,7 +160,7 @@@ void init_cpu_timer(void
         cd->min_delta_ns        = 1;
         cd->max_delta_ns        = LONG_MAX;
         cd->rating              = 400;
-       cd->cpumask             = cpumask_of_cpu(cpu);
+       cd->cpumask             = cpumask_of(cpu);
         cd->set_next_event      = s390_next_event;
         cd->set_mode            = s390_set_mode;
   
@@@ -229,36 -223,6 +229,36 @@@ static struct clocksource clocksource_t
   };
   
   
+ +void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
+ +{
+ +      if (clock != &clocksource_tod)
+ +              return;
+ +
+ +      /* Make userspace gettimeofday spin until we're done. */
+ +      ++vdso_data->tb_update_count;
+ +      smp_wmb();
+ +      vdso_data->xtime_tod_stamp = clock->cycle_last;
+ +      vdso_data->xtime_clock_sec = xtime.tv_sec;
+ +      vdso_data->xtime_clock_nsec = xtime.tv_nsec;
+ +      vdso_data->wtom_clock_sec = wall_to_monotonic.tv_sec;
+ +      vdso_data->wtom_clock_nsec = wall_to_monotonic.tv_nsec;
+ +      smp_wmb();
+ +      ++vdso_data->tb_update_count;
+ +}
+ +
+ +extern struct timezone sys_tz;
+ +
+ +void update_vsyscall_tz(void)
+ +{
+ +      /* Make userspace gettimeofday spin until we're done. */
+ +      ++vdso_data->tb_update_count;
+ +      smp_wmb();
+ +      vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
+ +      vdso_data->tz_dsttime = sys_tz.tz_dsttime;
+ +      smp_wmb();
+ +      ++vdso_data->tb_update_count;
+ +}
+ +
   /*
    * Initialize the TOD clock and the CPU timer of
    * the boot cpu.
@@@ -289,8 -253,10 +289,8 @@@ void __init time_init(void
   
         /* Enable TOD clock interrupts on the boot cpu. */
         init_cpu_timer();
- -
- -#ifdef CONFIG_VIRT_TIMER
+ +      /* Enable cpu timer interrupts on the boot cpu. */
         vtime_init();
- -#endif
   }
   
   /*
@@@ -322,8 -288,8 +322,8 @@@ static unsigned long long adjust_time(u
         }
         sched_clock_base_cc += delta;
         if (adjust.offset != 0) {
- -              printk(KERN_NOTICE "etr: time adjusted by %li micro-seconds\n",
- -                     adjust.offset);
+ +              pr_notice("The ETR interface has adjusted the clock "
+ +                        "by %li microseconds\n", adjust.offset);
                 adjust.modes = ADJ_OFFSET_SINGLESHOT;
                 do_adjtimex(&adjust);
         }
@@@ -394,15 -360,6 +394,15 @@@ static void enable_sync_clock(void
         atomic_set_mask(0x80000000, sw_ptr);
   }
   
+ +/* Single threaded workqueue used for etr and stp sync events */
+ +static struct workqueue_struct *time_sync_wq;
+ +
+ +static void __init time_init_wq(void)
+ +{
+ +      if (!time_sync_wq)
+ +              time_sync_wq = create_singlethread_workqueue("timesync");
+ +}
+ +
   /*
    * External Time Reference (ETR) code.
    */
@@@ -468,7 -425,6 +468,7 @@@ static struct timer_list etr_timer
   
   static void etr_timeout(unsigned long dummy);
   static void etr_work_fn(struct work_struct *work);
+ +static DEFINE_MUTEX(etr_work_mutex);
   static DECLARE_WORK(etr_work, etr_work_fn);
   
   /*
@@@ -484,8 -440,8 +484,8 @@@ static void etr_reset(void
                 etr_tolec = get_clock();
                 set_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags);
         } else if (etr_port0_online || etr_port1_online) {
- -              printk(KERN_WARNING "Running on non ETR capable "
- -                     "machine, only local mode available.\n");
+ +              pr_warning("The real or virtual hardware system does "
+ +                         "not provide an ETR interface\n");
                 etr_port0_online = etr_port1_online = 0;
         }
   }
@@@ -496,18 -452,17 +496,18 @@@ static int __init etr_init(void
   
         if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags))
                 return 0;
+ +      time_init_wq();
         /* Check if this machine has the steai instruction. */
         if (etr_steai(&aib, ETR_STEAI_STEPPING_PORT) == 0)
                 etr_steai_available = 1;
         setup_timer(&etr_timer, etr_timeout, 0UL);
         if (etr_port0_online) {
                 set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
- -              schedule_work(&etr_work);
+ +              queue_work(time_sync_wq, &etr_work);
         }
         if (etr_port1_online) {
                 set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events);
- -              schedule_work(&etr_work);
+ +              queue_work(time_sync_wq, &etr_work);
         }
         return 0;
   }
@@@ -534,7 -489,7 +534,7 @@@ void etr_switch_to_local(void
         if (test_bit(CLOCK_SYNC_ETR, &clock_sync_flags))
                 disable_sync_clock(NULL);
         set_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events);
- -      schedule_work(&etr_work);
+ +      queue_work(time_sync_wq, &etr_work);
   }
   
   /*
@@@ -550,7 -505,7 +550,7 @@@ void etr_sync_check(void
         if (test_bit(CLOCK_SYNC_ETR, &clock_sync_flags))
                 disable_sync_clock(NULL);
         set_bit(ETR_EVENT_SYNC_CHECK, &etr_events);
- -      schedule_work(&etr_work);
+ +      queue_work(time_sync_wq, &etr_work);
   }
   
   /*
@@@ -574,13 -529,13 +574,13 @@@ static void etr_timing_alert(struct etr
                  * Both ports are not up-to-date now.
                  */
                 set_bit(ETR_EVENT_PORT_ALERT, &etr_events);
- -      schedule_work(&etr_work);
+ +      queue_work(time_sync_wq, &etr_work);
   }
   
   static void etr_timeout(unsigned long dummy)
   {
         set_bit(ETR_EVENT_UPDATE, &etr_events);
- -      schedule_work(&etr_work);
+ +      queue_work(time_sync_wq, &etr_work);
   }
   
   /*
@@@ -687,16 -642,14 +687,16 @@@ static int etr_aib_follows(struct etr_a
   }
   
   struct clock_sync_data {
+ +      atomic_t cpus;
         int in_sync;
         unsigned long long fixup_cc;
+ +      int etr_port;
+ +      struct etr_aib *etr_aib;
   };
   
- -static void clock_sync_cpu_start(void *dummy)
+ +static void clock_sync_cpu(struct clock_sync_data *sync)
   {
- -      struct clock_sync_data *sync = dummy;
- -
+ +      atomic_dec(&sync->cpus);
         enable_sync_clock();
         /*
          * This looks like a busy wait loop but it isn't. etr_sync_cpus
@@@ -722,35 -675,39 +722,35 @@@
         fixup_clock_comparator(sync->fixup_cc);
   }
   
- -static void clock_sync_cpu_end(void *dummy)
- -{
- -}
- -
   /*
    * Sync the TOD clock using the port refered to by aibp. This port
    * has to be enabled and the other port has to be disabled. The
    * last eacr update has to be more than 1.6 seconds in the past.
    */
- -static int etr_sync_clock(struct etr_aib *aib, int port)
+ +static int etr_sync_clock(void *data)
   {
- -      struct etr_aib *sync_port;
- -      struct clock_sync_data etr_sync;
+ +      static int first;
         unsigned long long clock, old_clock, delay, delta;
- -      int follows;
+ +      struct clock_sync_data *etr_sync;
+ +      struct etr_aib *sync_port, *aib;
+ +      int port;
         int rc;
   
- -      /* Check if the current aib is adjacent to the sync port aib. */
- -      sync_port = (port == 0) ? &etr_port0 : &etr_port1;
- -      follows = etr_aib_follows(sync_port, aib, port);
- -      memcpy(sync_port, aib, sizeof(*aib));
- -      if (!follows)
- -              return -EAGAIN;
+ +      etr_sync = data;
   
- -      /*
- -       * Catch all other cpus and make them wait until we have
- -       * successfully synced the clock. smp_call_function will
- -       * return after all other cpus are in etr_sync_cpu_start.
- -       */
- -      memset(&etr_sync, 0, sizeof(etr_sync));
- -      preempt_disable();
- -      smp_call_function(clock_sync_cpu_start, &etr_sync, 0);
- -      local_irq_disable();
+ +      if (xchg(&first, 1) == 1) {
+ +              /* Slave */
+ +              clock_sync_cpu(etr_sync);
+ +              return 0;
+ +      }
+ +
+ +      /* Wait until all other cpus entered the sync function. */
+ +      while (atomic_read(&etr_sync->cpus) != 0)
+ +              cpu_relax();
+ +
+ +      port = etr_sync->etr_port;
+ +      aib = etr_sync->etr_aib;
+ +      sync_port = (port == 0) ? &etr_port0 : &etr_port1;
         enable_sync_clock();
   
         /* Set clock to next OTE. */
@@@ -767,16 -724,16 +767,16 @@@
                 delay = (unsigned long long)
                         (aib->edf2.etv - sync_port->edf2.etv) << 32;
                 delta = adjust_time(old_clock, clock, delay);
- -              etr_sync.fixup_cc = delta;
+ +              etr_sync->fixup_cc = delta;
                 fixup_clock_comparator(delta);
                 /* Verify that the clock is properly set. */
                 if (!etr_aib_follows(sync_port, aib, port)) {
                         /* Didn't work. */
                         disable_sync_clock(NULL);
- -                      etr_sync.in_sync = -EAGAIN;
+ +                      etr_sync->in_sync = -EAGAIN;
                         rc = -EAGAIN;
                 } else {
- -                      etr_sync.in_sync = 1;
+ +                      etr_sync->in_sync = 1;
                         rc = 0;
                 }
         } else {
@@@ -784,33 -741,12 +784,33 @@@
                 __ctl_clear_bit(0, 29);
                 __ctl_clear_bit(14, 21);
                 disable_sync_clock(NULL);
- -              etr_sync.in_sync = -EAGAIN;
+ +              etr_sync->in_sync = -EAGAIN;
                 rc = -EAGAIN;
         }
- -      local_irq_enable();
- -      smp_call_function(clock_sync_cpu_end, NULL, 0);
- -      preempt_enable();
+ +      xchg(&first, 0);
+ +      return rc;
+ +}
+ +
+ +static int etr_sync_clock_stop(struct etr_aib *aib, int port)
+ +{
+ +      struct clock_sync_data etr_sync;
+ +      struct etr_aib *sync_port;
+ +      int follows;
+ +      int rc;
+ +
+ +      /* Check if the current aib is adjacent to the sync port aib. */
+ +      sync_port = (port == 0) ? &etr_port0 : &etr_port1;
+ +      follows = etr_aib_follows(sync_port, aib, port);
+ +      memcpy(sync_port, aib, sizeof(*aib));
+ +      if (!follows)
+ +              return -EAGAIN;
+ +      memset(&etr_sync, 0, sizeof(etr_sync));
+ +      etr_sync.etr_aib = aib;
+ +      etr_sync.etr_port = port;
+ +      get_online_cpus();
+ +      atomic_set(&etr_sync.cpus, num_online_cpus() - 1);
+ +      rc = stop_machine(etr_sync_clock, &etr_sync, &cpu_online_map);
+ +      put_online_cpus();
         return rc;
   }
   
@@@ -967,7 -903,7 +967,7 @@@ static void etr_update_eacr(struct etr_
   }
   
   /*
- - * ETR tasklet. In this function you'll find the main logic. In
+ + * ETR work. In this function you'll find the main logic. In
    * particular this is the only function that calls etr_update_eacr(),
    * it "controls" the etr control register.
    */
@@@ -978,9 -914,6 +978,9 @@@ static void etr_work_fn(struct work_str
         struct etr_aib aib;
         int sync_port;
   
+ +      /* prevent multiple execution. */
+ +      mutex_lock(&etr_work_mutex);
+ +
         /* Create working copy of etr_eacr. */
         eacr = etr_eacr;
   
@@@ -996,7 -929,7 +996,7 @@@
                 del_timer_sync(&etr_timer);
                 etr_update_eacr(eacr);
                 clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
- -              return;
+ +              goto out_unlock;
         }
   
         /* Store aib to get the current ETR status word. */
@@@ -1083,7 -1016,7 +1083,7 @@@
             eacr.es || sync_port < 0) {
                 etr_update_eacr(eacr);
                 etr_set_tolec_timeout(now);
- -              return;
+ +              goto out_unlock;
         }
   
         /*
@@@ -1103,7 -1036,7 +1103,7 @@@
         etr_update_eacr(eacr);
         set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
         if (now < etr_tolec + (1600000 << 12) ||
- -          etr_sync_clock(&aib, sync_port) != 0) {
+ +          etr_sync_clock_stop(&aib, sync_port) != 0) {
                 /* Sync failed. Try again in 1/2 second. */
                 eacr.es = 0;
                 etr_update_eacr(eacr);
@@@ -1111,8 -1044,6 +1111,8 @@@
                 etr_set_sync_timeout();
         } else
                 etr_set_tolec_timeout(now);
+ +out_unlock:
+ +      mutex_unlock(&etr_work_mutex);
   }
   
   /*
@@@ -1194,13 -1125,13 +1194,13 @@@ static ssize_t etr_online_store(struct 
                         return count;   /* Nothing to do. */
                 etr_port0_online = value;
                 set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
- -              schedule_work(&etr_work);
+ +              queue_work(time_sync_wq, &etr_work);
         } else {
                 if (etr_port1_online == value)
                         return count;   /* Nothing to do. */
                 etr_port1_online = value;
                 set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events);
- -              schedule_work(&etr_work);
+ +              queue_work(time_sync_wq, &etr_work);
         }
         return count;
   }
@@@ -1401,7 -1332,6 +1401,7 @@@ static struct stp_sstpi stp_info
   static void *stp_page;
   
   static void stp_work_fn(struct work_struct *work);
+ +static DEFINE_MUTEX(stp_work_mutex);
   static DECLARE_WORK(stp_work, stp_work_fn);
   
   static int __init early_parse_stp(char *p)
@@@ -1426,8 -1356,7 +1426,8 @@@ static void __init stp_reset(void
         if (rc == 0)
                 set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags);
         else if (stp_online) {
- -              printk(KERN_WARNING "Running on non STP capable machine.\n");
+ +              pr_warning("The real or virtual hardware system does "
+ +                         "not provide an STP interface\n");
                 free_bootmem((unsigned long) stp_page, PAGE_SIZE);
                 stp_page = NULL;
                 stp_online = 0;
@@@ -1436,12 -1365,8 +1436,12 @@@
   
   static int __init stp_init(void)
   {
- -      if (test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags) && stp_online)
- -              schedule_work(&stp_work);
+ +      if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
+ +              return 0;
+ +      time_init_wq();
+ +      if (!stp_online)
+ +              return 0;
+ +      queue_work(time_sync_wq, &stp_work);
         return 0;
   }
   
@@@ -1458,7 -1383,7 +1458,7 @@@ arch_initcall(stp_init)
   static void stp_timing_alert(struct stp_irq_parm *intparm)
   {
         if (intparm->tsc || intparm->lac || intparm->tcpc)
- -              schedule_work(&stp_work);
+ +              queue_work(time_sync_wq, &stp_work);
   }
   
   /*
@@@ -1472,7 -1397,7 +1472,7 @@@ void stp_sync_check(void
         if (!test_bit(CLOCK_SYNC_STP, &clock_sync_flags))
                 return;
         disable_sync_clock(NULL);
- -      schedule_work(&stp_work);
+ +      queue_work(time_sync_wq, &stp_work);
   }
   
   /*
@@@ -1486,34 -1411,46 +1486,34 @@@ void stp_island_check(void
         if (!test_bit(CLOCK_SYNC_STP, &clock_sync_flags))
                 return;
         disable_sync_clock(NULL);
- -      schedule_work(&stp_work);
+ +      queue_work(time_sync_wq, &stp_work);
   }
   
- -/*
- - * STP tasklet. Check for the STP state and take over the clock
- - * synchronization if the STP clock source is usable.
- - */
- -static void stp_work_fn(struct work_struct *work)
+ +
+ +static int stp_sync_clock(void *data)
   {
- -      struct clock_sync_data stp_sync;
+ +      static int first;
         unsigned long long old_clock, delta;
+ +      struct clock_sync_data *stp_sync;
         int rc;
   
- -      if (!stp_online) {
- -              chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000);
- -              return;
- -      }
+ +      stp_sync = data;
   
- -      rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0);
- -      if (rc)
- -              return;
+ +      if (xchg(&first, 1) == 1) {
+ +              /* Slave */
+ +              clock_sync_cpu(stp_sync);
+ +              return 0;
+ +      }
   
- -      rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi));
- -      if (rc || stp_info.c == 0)
- -              return;
+ +      /* Wait until all other cpus entered the sync function. */
+ +      while (atomic_read(&stp_sync->cpus) != 0)
+ +              cpu_relax();
   
- -      /*
- -       * Catch all other cpus and make them wait until we have
- -       * successfully synced the clock. smp_call_function will
- -       * return after all other cpus are in clock_sync_cpu_start.
- -       */
- -      memset(&stp_sync, 0, sizeof(stp_sync));
- -      preempt_disable();
- -      smp_call_function(clock_sync_cpu_start, &stp_sync, 0);
- -      local_irq_disable();
         enable_sync_clock();
   
         set_bit(CLOCK_SYNC_STP, &clock_sync_flags);
         if (test_and_clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags))
- -              schedule_work(&etr_work);
+ +              queue_work(time_sync_wq, &etr_work);
   
         rc = 0;
         if (stp_info.todoff[0] || stp_info.todoff[1] ||
@@@ -1532,49 -1469,16 +1532,49 @@@
         }
         if (rc) {
                 disable_sync_clock(NULL);
- -              stp_sync.in_sync = -EAGAIN;
+ +              stp_sync->in_sync = -EAGAIN;
                 clear_bit(CLOCK_SYNC_STP, &clock_sync_flags);
                 if (etr_port0_online || etr_port1_online)
- -                      schedule_work(&etr_work);
+ +                      queue_work(time_sync_wq, &etr_work);
         } else
- -              stp_sync.in_sync = 1;
+ +              stp_sync->in_sync = 1;
+ +      xchg(&first, 0);
+ +      return 0;
+ +}
+ +
+ +/*
+ + * STP work. Check for the STP state and take over the clock
+ + * synchronization if the STP clock source is usable.
+ + */
+ +static void stp_work_fn(struct work_struct *work)
+ +{
+ +      struct clock_sync_data stp_sync;
+ +      int rc;
+ +
+ +      /* prevent multiple execution. */
+ +      mutex_lock(&stp_work_mutex);
+ +
+ +      if (!stp_online) {
+ +              chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000);
+ +              goto out_unlock;
+ +      }
+ +
+ +      rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0);
+ +      if (rc)
+ +              goto out_unlock;
+ +
+ +      rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi));
+ +      if (rc || stp_info.c == 0)
+ +              goto out_unlock;
+ +
+ +      memset(&stp_sync, 0, sizeof(stp_sync));
+ +      get_online_cpus();
+ +      atomic_set(&stp_sync.cpus, num_online_cpus() - 1);
+ +      stop_machine(stp_sync_clock, &stp_sync, &cpu_online_map);
+ +      put_online_cpus();
   
- -      local_irq_enable();
- -      smp_call_function(clock_sync_cpu_end, NULL, 0);
- -      preempt_enable();
+ +out_unlock:
+ +      mutex_unlock(&stp_work_mutex);
   }
   
   /*
@@@ -1683,7 -1587,7 +1683,7 @@@ static ssize_t stp_online_store(struct 
         if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
                 return -EOPNOTSUPP;
         stp_online = value;
- -      schedule_work(&stp_work);
+ +      queue_work(time_sync_wq, &stp_work);
         return count;
   }
   
diff --combined arch/sparc/kernel/irq_64.c

index a3ea2bcb95de6a39ebb7bf297ee2355f507f178c,4aaf18e83c8c24766c4a18ae5a3716f7a6249402..cab8e02868716d691a38b9bad239ec754dd39134
--- 1/arch/sparc/kernel/irq_64.c
--- 2/arch/sparc64/kernel/irq.c
+++ b/arch/sparc/kernel/irq_64.c
@@@ -312,7 -312,8 +312,8 @@@ static void sun4u_irq_enable(unsigned i
         }
   }
   
- static void sun4u_set_affinity(unsigned int virt_irq, cpumask_t mask)
+ static void sun4u_set_affinity(unsigned int virt_irq,
+                              const struct cpumask *mask)
   {
         sun4u_irq_enable(virt_irq);
   }
@@@ -362,7 -363,8 +363,8 @@@ static void sun4v_irq_enable(unsigned i
                        ino, err);
   }
   
- static void sun4v_set_affinity(unsigned int virt_irq, cpumask_t mask)
+ static void sun4v_set_affinity(unsigned int virt_irq,
+                              const struct cpumask *mask)
   {
         unsigned int ino = virt_irq_table[virt_irq].dev_ino;
         unsigned long cpuid = irq_choose_cpu(virt_irq);
@@@ -429,7 -431,8 +431,8 @@@ static void sun4v_virq_enable(unsigned 
                        dev_handle, dev_ino, err);
   }
   
- static void sun4v_virt_set_affinity(unsigned int virt_irq, cpumask_t mask)
+ static void sun4v_virt_set_affinity(unsigned int virt_irq,
+                                   const struct cpumask *mask)
   {
         unsigned long cpuid, dev_handle, dev_ino;
         int err;
@@@ -775,69 -778,6 +778,69 @@@ void do_softirq(void
         local_irq_restore(flags);
   }
   
+ +static void unhandled_perf_irq(struct pt_regs *regs)
+ +{
+ +      unsigned long pcr, pic;
+ +
+ +      read_pcr(pcr);
+ +      read_pic(pic);
+ +
+ +      write_pcr(0);
+ +
+ +      printk(KERN_EMERG "CPU %d: Got unexpected perf counter IRQ.\n",
+ +             smp_processor_id());
+ +      printk(KERN_EMERG "CPU %d: PCR[%016lx] PIC[%016lx]\n",
+ +             smp_processor_id(), pcr, pic);
+ +}
+ +
+ +/* Almost a direct copy of the powerpc PMC code.  */
+ +static DEFINE_SPINLOCK(perf_irq_lock);
+ +static void *perf_irq_owner_caller; /* mostly for debugging */
+ +static void (*perf_irq)(struct pt_regs *regs) = unhandled_perf_irq;
+ +
+ +/* Invoked from level 15 PIL handler in trap table.  */
+ +void perfctr_irq(int irq, struct pt_regs *regs)
+ +{
+ +      clear_softint(1 << irq);
+ +      perf_irq(regs);
+ +}
+ +
+ +int register_perfctr_intr(void (*handler)(struct pt_regs *))
+ +{
+ +      int ret;
+ +
+ +      if (!handler)
+ +              return -EINVAL;
+ +
+ +      spin_lock(&perf_irq_lock);
+ +      if (perf_irq != unhandled_perf_irq) {
+ +              printk(KERN_WARNING "register_perfctr_intr: "
+ +                     "perf IRQ busy (reserved by caller %p)\n",
+ +                     perf_irq_owner_caller);
+ +              ret = -EBUSY;
+ +              goto out;
+ +      }
+ +
+ +      perf_irq_owner_caller = __builtin_return_address(0);
+ +      perf_irq = handler;
+ +
+ +      ret = 0;
+ +out:
+ +      spin_unlock(&perf_irq_lock);
+ +
+ +      return ret;
+ +}
+ +EXPORT_SYMBOL_GPL(register_perfctr_intr);
+ +
+ +void release_perfctr_intr(void (*handler)(struct pt_regs *))
+ +{
+ +      spin_lock(&perf_irq_lock);
+ +      perf_irq_owner_caller = NULL;
+ +      perf_irq = unhandled_perf_irq;
+ +      spin_unlock(&perf_irq_lock);
+ +}
+ +EXPORT_SYMBOL_GPL(release_perfctr_intr);
+ +
   #ifdef CONFIG_HOTPLUG_CPU
   void fixup_irqs(void)
   {
@@@ -851,7 -791,7 +854,7 @@@
                     !(irq_desc[irq].status & IRQ_PER_CPU)) {
                         if (irq_desc[irq].chip->set_affinity)
                                 irq_desc[irq].chip->set_affinity(irq,
-                                       irq_desc[irq].affinity);
+                                       &irq_desc[irq].affinity);
                 }
                 spin_unlock_irqrestore(&irq_desc[irq].lock, flags);
         }
diff --combined arch/sparc/kernel/of_device_64.c

index 46e231f7c5ce2c37cf51b6111a0175f9dcc4280e,df2efb7fc14c864b37fb0273adaadb7be4cf975e..322046cdf85f2298da755944d381537c585188be
--- 1/arch/sparc/kernel/of_device_64.c
--- 2/arch/sparc64/kernel/of_device.c
+++ b/arch/sparc/kernel/of_device_64.c
@@@ -780,7 -780,7 +780,7 @@@ out
         if (nid != -1) {
                 cpumask_t numa_mask = node_to_cpumask(nid);
   
-               irq_set_affinity(irq, numa_mask);
+               irq_set_affinity(irq, &numa_mask);
         }
   
         return irq;
@@@ -811,20 -811,20 +811,20 @@@ static struct of_device * __init scan_o
   
         irq = of_get_property(dp, "interrupts", &len);
         if (irq) {
- -              memcpy(op->irqs, irq, len);
                 op->num_irqs = len / 4;
+ +
+ +              /* Prevent overrunning the op->irqs[] array.  */
+ +              if (op->num_irqs > PROMINTR_MAX) {
+ +                      printk(KERN_WARNING "%s: Too many irqs (%d), "
+ +                             "limiting to %d.\n",
+ +                             dp->full_name, op->num_irqs, PROMINTR_MAX);
+ +                      op->num_irqs = PROMINTR_MAX;
+ +              }
+ +              memcpy(op->irqs, irq, op->num_irqs * 4);
         } else {
                 op->num_irqs = 0;
         }
   
- -      /* Prevent overrunning the op->irqs[] array.  */
- -      if (op->num_irqs > PROMINTR_MAX) {
- -              printk(KERN_WARNING "%s: Too many irqs (%d), "
- -                     "limiting to %d.\n",
- -                     dp->full_name, op->num_irqs, PROMINTR_MAX);
- -              op->num_irqs = PROMINTR_MAX;
- -      }
- -
         build_device_resources(op, parent);
         for (i = 0; i < op->num_irqs; i++)
                 op->irqs[i] = build_one_device_irq(op, parent, op->irqs[i]);
diff --combined arch/sparc/kernel/pci_msi.c

index 2e680f34f727fa61f5defef92e63b8144e365d70,0d0cd815e83e505b24e4b12a14f276fa37be892a..0d0cd815e83e505b24e4b12a14f276fa37be892a
--- 1/arch/sparc/kernel/pci_msi.c
--- 2/arch/sparc64/kernel/pci_msi.c
+++ b/arch/sparc/kernel/pci_msi.c
@@@ -288,7 -288,7 +288,7 @@@ static int bringup_one_msi_queue(struc
         if (nid != -1) {
                 cpumask_t numa_mask = node_to_cpumask(nid);
   
-               irq_set_affinity(irq, numa_mask);
+               irq_set_affinity(irq, &numa_mask);
         }
         err = request_irq(irq, sparc64_msiq_interrupt, 0,
                           "MSIQ",
diff --combined arch/sparc/kernel/smp_32.c

index e396c1f17a922deaef7b03a0751d06f1345770fd,1e5ac4e282e1285030aaa43380b91c01b7bcff48..1e5ac4e282e1285030aaa43380b91c01b7bcff48
--- 1/arch/sparc/kernel/smp_32.c
--- 2/arch/sparc/kernel/smp.c
+++ b/arch/sparc/kernel/smp_32.c
@@@ -39,8 -39,6 +39,6 @@@ volatile unsigned long cpu_callin_map[N
   unsigned char boot_cpu_id = 0;
   unsigned char boot_cpu_id4 = 0; /* boot_cpu_id << 2 */
   
- cpumask_t cpu_online_map = CPU_MASK_NONE;
- cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
   cpumask_t smp_commenced_mask = CPU_MASK_NONE;
   
   /* The only guaranteed locking primitive available on all Sparc
@@@ -334,7 -332,7 +332,7 @@@ void __init smp_setup_cpu_possible_map(
         instance = 0;
         while (!cpu_find_by_instance(instance, NULL, &mid)) {
                 if (mid < NR_CPUS) {
-                       cpu_set(mid, phys_cpu_present_map);
+                       cpu_set(mid, cpu_possible_map);
                         cpu_set(mid, cpu_present_map);
                 }
                 instance++;
@@@ -354,7 -352,7 +352,7 @@@ void __init smp_prepare_boot_cpu(void
   
         current_thread_info()->cpu = cpuid;
         cpu_set(cpuid, cpu_online_map);
-       cpu_set(cpuid, phys_cpu_present_map);
+       cpu_set(cpuid, cpu_possible_map);
   }
   
   int __cpuinit __cpu_up(unsigned int cpu)
diff --combined arch/sparc/kernel/smp_64.c

index bfe99d82d458702d32bf52a863cf00345e04a8e2,a97b8822c22ca029acc8ea1fe0fc6cc90c88efd0..46329799f3462bb4002024558be74f204e679f07
--- 1/arch/sparc/kernel/smp_64.c
--- 2/arch/sparc64/kernel/smp.c
+++ b/arch/sparc/kernel/smp_64.c
@@@ -49,14 -49,10 +49,10 @@@
   
   int sparc64_multi_core __read_mostly;
   
- cpumask_t cpu_possible_map __read_mostly = CPU_MASK_NONE;
- cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE;
   DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
   cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
         { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
   
- EXPORT_SYMBOL(cpu_possible_map);
- EXPORT_SYMBOL(cpu_online_map);
   EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
   EXPORT_SYMBOL(cpu_core_map);
   
@@@ -163,7 -159,7 +159,7 @@@ static inline long get_delta (long *rt
         for (i = 0; i < NUM_ITERS; i++) {
                 t0 = tick_ops->get_tick();
                 go[MASTER] = 1;
- -              membar_storeload();
+ +              membar_safe("#StoreLoad");
                 while (!(tm = go[SLAVE]))
                         rmb();
                 go[SLAVE] = 0;
@@@ -257,7 -253,7 +253,7 @@@ static void smp_synchronize_one_tick(in
   
         /* now let the client proceed into his loop */
         go[MASTER] = 0;
- -      membar_storeload();
+ +      membar_safe("#StoreLoad");
   
         spin_lock_irqsave(&itc_sync_lock, flags);
         {
@@@ -267,7 -263,7 +263,7 @@@
                         go[MASTER] = 0;
                         wmb();
                         go[SLAVE] = tick_ops->get_tick();
- -                      membar_storeload();
+ +                      membar_safe("#StoreLoad");
                 }
         }
         spin_unlock_irqrestore(&itc_sync_lock, flags);
@@@ -773,7 -769,7 +769,7 @@@ static void xcall_deliver(u64 data0, u6
   
         /* Setup the initial cpu list.  */
         cnt = 0;
- -      for_each_cpu_mask_nr(i, *mask) {
+ +      for_each_cpu(i, mask) {
                 if (i == this_cpu || !cpu_online(i))
                         continue;
                 cpu_list[cnt++] = i;
@@@ -1122,6 -1118,7 +1118,6 @@@ void smp_capture(void
                        smp_processor_id());
   #endif
                 penguins_are_doing_time = 1;
- -              membar_storestore_loadstore();
                 atomic_inc(&smp_capture_registry);
                 smp_cross_call(&xcall_capture, 0, 0, 0);
                 while (atomic_read(&smp_capture_registry) != ncpus)
@@@ -1141,13 -1138,13 +1137,13 @@@ void smp_release(void
                        smp_processor_id());
   #endif
                 penguins_are_doing_time = 0;
- -              membar_storeload_storestore();
+ +              membar_safe("#StoreLoad");
                 atomic_dec(&smp_capture_registry);
         }
   }
   
- -/* Imprisoned penguins run with %pil == 15, but PSTATE_IE set, so they
- - * can service tlb flush xcalls...
+ +/* Imprisoned penguins run with %pil == PIL_NORMAL_MAX, but PSTATE_IE
+ + * set, so they can service tlb flush xcalls...
    */
   extern void prom_world(int);
   
@@@ -1160,7 -1157,7 +1156,7 @@@ void smp_penguin_jailcell(int irq, stru
         __asm__ __volatile__("flushw");
         prom_world(1);
         atomic_inc(&smp_capture_registry);
- -      membar_storeload_storestore();
+ +      membar_safe("#StoreLoad");
         while (penguins_are_doing_time)
                 rmb();
         atomic_dec(&smp_capture_registry);
diff --combined arch/sparc/kernel/sparc_ksyms_32.c

index a4d45fc29b21e4ac9c9c069bdab7b8c277c2f6f5,32d11a5fe3a86f9e5f60acfefff09a2dd0d98c41..e1e97639231b208e8b01ee99373771020b49ec2f
--- 1/arch/sparc/kernel/sparc_ksyms_32.c
--- 2/arch/sparc/kernel/sparc_ksyms.c
+++ b/arch/sparc/kernel/sparc_ksyms_32.c
@@@ -61,6 -61,7 +61,6 @@@ extern void (*bzero_1page)(void *)
   extern void *__bzero(void *, size_t);
   extern void *__memscan_zero(void *, size_t);
   extern void *__memscan_generic(void *, int, size_t);
- -extern int __memcmp(const void *, const void *, __kernel_size_t);
   extern int __strncmp(const char *, const char *, __kernel_size_t);
   
   extern int __ashrdi3(int, int);
@@@ -112,17 -113,15 +112,13 @@@ EXPORT_PER_CPU_SYMBOL(__cpu_data)
   #ifdef CONFIG_SMP
   /* IRQ implementation. */
   EXPORT_SYMBOL(synchronize_irq);
- 
- /* CPU online map and active count. */
- EXPORT_SYMBOL(cpu_online_map);
- EXPORT_SYMBOL(phys_cpu_present_map);
   #endif
   
   EXPORT_SYMBOL(__udelay);
   EXPORT_SYMBOL(__ndelay);
   EXPORT_SYMBOL(rtc_lock);
- -#ifdef CONFIG_SUN_AUXIO
   EXPORT_SYMBOL(set_auxio);
   EXPORT_SYMBOL(get_auxio);
- -#endif
   EXPORT_SYMBOL(io_remap_pfn_range);
   
   #ifndef CONFIG_SMP
@@@ -210,6 -209,7 +206,6 @@@ EXPORT_SYMBOL(bzero_1page)
   EXPORT_SYMBOL(__bzero);
   EXPORT_SYMBOL(__memscan_zero);
   EXPORT_SYMBOL(__memscan_generic);
- -EXPORT_SYMBOL(__memcmp);
   EXPORT_SYMBOL(__strncmp);
   EXPORT_SYMBOL(__memmove);
   
diff --combined arch/sparc/kernel/time_64.c

index 141da375909129dea0ab0fffc9d5359d85e9a039,9df8f095a8b11a59e448bec001ac6aec852c480c..9df8f095a8b11a59e448bec001ac6aec852c480c
--- 1/arch/sparc/kernel/time_64.c
--- 2/arch/sparc64/kernel/time.c
+++ b/arch/sparc/kernel/time_64.c
@@@ -763,7 -763,7 +763,7 @@@ void __devinit setup_sparc64_timer(void
         sevt = &__get_cpu_var(sparc64_events);
   
         memcpy(sevt, &sparc64_clockevent, sizeof(*sevt));
-       sevt->cpumask = cpumask_of_cpu(smp_processor_id());
+       sevt->cpumask = cpumask_of(smp_processor_id());
   
         clockevents_register_device(sevt);
   }
diff --combined arch/x86/Kconfig

index 0f44add3e0b7850e676cf8cc45d8514ebc94a5ea,0ca2eb7573cd21a2d7298371345d2c32bbc0b0b4..249d1e0824b523e0a1e51a613a86a497649cc089
--- 1/arch/x86/Kconfig
--- 2/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@@ -19,8 -19,6 +19,8 @@@ config X86_6
   config X86
         def_bool y
         select HAVE_AOUT if X86_32
+ +      select HAVE_READQ
+ +      select HAVE_WRITEQ
         select HAVE_UNSTABLE_SCHED_CLOCK
         select HAVE_IDE
         select HAVE_OPROFILE
@@@ -92,10 -90,6 +92,10 @@@ config GENERIC_IOMA
   config GENERIC_BUG
         def_bool y
         depends on BUG
+ +      select GENERIC_BUG_RELATIVE_POINTERS if X86_64
+ +
+ +config GENERIC_BUG_RELATIVE_POINTERS
+ +      bool
   
   config GENERIC_HWEIGHT
         def_bool y
@@@ -250,19 -244,16 +250,19 @@@ config X86_HAS_BOOT_CPU_I
   config SPARSE_IRQ
         bool "Support sparse irq numbering"
         depends on PCI_MSI || HT_IRQ
- -      default y
         help
- -        This enables support for sparse irq, esp for msi/msi-x. You may need
- -        if you have lots of cards supports msi-x installed.
+ +        This enables support for sparse irqs. This is useful for distro
+ +        kernels that want to define a high CONFIG_NR_CPUS value but still
+ +        want to have low kernel memory footprint on smaller machines.
   
- -        If you don't know what to do here, say Y.
+ +        ( Sparse IRQs can also be beneficial on NUMA boxes, as they spread
+ +          out the irq_desc[] array in a more NUMA-friendly way. )
+ +
+ +        If you don't know what to do here, say N.
   
   config NUMA_MIGRATE_IRQ_DESC
         bool "Move irq desc when changing irq smp_affinity"
- -      depends on SPARSE_IRQ && SMP
+ +      depends on SPARSE_IRQ && NUMA
         default n
         help
           This enables moving irq_desc to cpu/node that irq will use handled.
@@@ -273,13 -264,21 +273,13 @@@ config X86_FIND_SMP_CONFI
         def_bool y
         depends on X86_MPPARSE || X86_VOYAGER
   
- -if ACPI
   config X86_MPPARSE
- -      def_bool y
- -      bool "Enable MPS table"
+ +      bool "Enable MPS table" if ACPI
+ +      default y
         depends on X86_LOCAL_APIC
         help
           For old smp systems that do not have proper acpi support. Newer systems
           (esp with 64bit cpus) with acpi support, MADT and DSDT will override it
- -endif
- -
- -if !ACPI
- -config X86_MPPARSE
- -      def_bool y
- -      depends on X86_LOCAL_APIC
- -endif
   
   choice
         prompt "Subarchitecture Type"
@@@ -501,7 -500,7 +501,7 @@@ config HPET_TIME
            The HPET provides a stable time base on SMP
            systems, unlike the TSC, but it is more expensive to access,
            as it is off-chip.  You can find the HPET spec at
- -         <http://www.intel.com/hardwaredesign/hpetspec.htm>.
+ +         <http://www.intel.com/hardwaredesign/hpetspec_1.pdf>.
   
            You can safely choose Y here.  However, HPET will only be
            activated if the platform and the BIOS support this feature.
@@@ -588,7 -587,7 +588,7 @@@ config AMD_IOMM
   
   # need this always selected by IOMMU for the VIA workaround
   config SWIOTLB
- -      bool
+ +      def_bool y if X86_64
         help
           Support for software bounce buffers used on x86-64 systems
           which don't have a hardware IOMMU (e.g. the current generation
@@@ -601,19 -600,20 +601,20 @@@ config IOMMU_HELPE
   
   config MAXSMP
         bool "Configure Maximum number of SMP Processors and NUMA Nodes"
-       depends on X86_64 && SMP && BROKEN
+       depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL
+       select CPUMASK_OFFSTACK
         default n
         help
           Configure maximum number of CPUS and NUMA Nodes for this architecture.
           If unsure, say N.
   
   config NR_CPUS
-       int "Maximum number of CPUs (2-512)" if !MAXSMP
-       range 2 512
-       depends on SMP
+       int "Maximum number of CPUs" if SMP && !MAXSMP
+       range 2 512 if SMP && !MAXSMP
+       default "1" if !SMP
         default "4096" if MAXSMP
-       default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000
-       default "8"
+       default "32" if SMP && (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000)
+       default "8" if SMP
         help
           This allows you to specify the maximum number of CPUs which this
           kernel will support.  The maximum supported value is 512 and the
@@@ -679,30 -679,6 +680,30 @@@ config X86_VISWS_API
         def_bool y
         depends on X86_32 && X86_VISWS
   
+ +config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
+ +      bool "Reroute for broken boot IRQs"
+ +      default n
+ +      depends on X86_IO_APIC
+ +      help
+ +        This option enables a workaround that fixes a source of
+ +        spurious interrupts. This is recommended when threaded
+ +        interrupt handling is used on systems where the generation of
+ +        superfluous "boot interrupts" cannot be disabled.
+ +
+ +        Some chipsets generate a legacy INTx "boot IRQ" when the IRQ
+ +        entry in the chipset's IO-APIC is masked (as, e.g. the RT
+ +        kernel does during interrupt handling). On chipsets where this
+ +        boot IRQ generation cannot be disabled, this workaround keeps
+ +        the original IRQ line masked so that only the equivalent "boot
+ +        IRQ" is delivered to the CPUs. The workaround also tells the
+ +        kernel to set up the IRQ handler on the boot IRQ line. In this
+ +        way only one interrupt is delivered to the kernel. Otherwise
+ +        the spurious second interrupt may cause the kernel to bring
+ +        down (vital) interrupt lines.
+ +
+ +        Only affects "broken" chipsets. Interrupt sharing may be
+ +        increased on these systems.
+ +
   config X86_MCE
         bool "Machine Check Exception"
         depends on !X86_VOYAGER
@@@ -999,37 -975,24 +1000,37 @@@ config X86_PA
   config ARCH_PHYS_ADDR_T_64BIT
          def_bool X86_64 || X86_PAE
   
+ +config DIRECT_GBPAGES
+ +      bool "Enable 1GB pages for kernel pagetables" if EMBEDDED
+ +      default y
+ +      depends on X86_64
+ +      help
+ +        Allow the kernel linear mapping to use 1GB pages on CPUs that
+ +        support it. This can improve the kernel's performance a tiny bit by
+ +        reducing TLB pressure. If in doubt, say "Y".
+ +
   # Common NUMA Features
   config NUMA
- -      bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)"
+ +      bool "Numa Memory Allocation and Scheduler Support"
         depends on SMP
         depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL)
         default n if X86_PC
         default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP)
         help
           Enable NUMA (Non Uniform Memory Access) support.
+ +
           The kernel will try to allocate memory used by a CPU on the
           local memory controller of the CPU and add some more
           NUMA awareness to the kernel.
   
- -        For 32-bit this is currently highly experimental and should be only
- -        used for kernel development. It might also cause boot failures.
- -        For 64-bit this is recommended on all multiprocessor Opteron systems.
- -        If the system is EM64T, you should say N unless your system is
- -        EM64T NUMA.
+ +        For 64-bit this is recommended if the system is Intel Core i7
+ +        (or later), AMD Opteron, or EM64T NUMA.
+ +
+ +        For 32-bit this is only needed on (rare) 32-bit-only platforms
+ +        that support NUMA topologies, such as NUMAQ / Summit, or if you
+ +        boot a 32-bit kernel on a 64-bit NUMA platform.
+ +
+ +        Otherwise, you should say N.
   
   comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI"
         depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI)
@@@ -1549,10 -1512,6 +1550,10 @@@ config ARCH_ENABLE_MEMORY_HOTPLU
         def_bool y
         depends on X86_64 || (X86_32 && HIGHMEM)
   
+ +config ARCH_ENABLE_MEMORY_HOTREMOVE
+ +      def_bool y
+ +      depends on MEMORY_HOTPLUG
+ +
   config HAVE_ARCH_EARLY_PFN_TO_NID
         def_bool X86_64
         depends on NUMA
diff --combined arch/x86/include/asm/irq.h

index 28e409fc73f3df33e4c6b2cbde5e99e0433e0fa1,4bb732e45a85f5f1296452f31c34b043783b8ecd..592688ed04d33462d172936d56dff55fc6e52eac
--- 1/arch/x86/include/asm/irq.h
--- 2/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@@ -31,9 -31,13 +31,9 @@@ static inline int irq_canonicalize(int 
   # endif
   #endif
   
- -#ifdef CONFIG_IRQBALANCE
- -extern int irqbalance_disable(char *str);
- -#endif
- -
   #ifdef CONFIG_HOTPLUG_CPU
   #include <linux/cpumask.h>
- extern void fixup_irqs(cpumask_t map);
+ extern void fixup_irqs(void);
   #endif
   
   extern unsigned int do_IRQ(struct pt_regs *regs);
@@@ -42,5 -46,6 +42,6 @@@ extern void native_init_IRQ(void)
   
   /* Interrupt vector management */
   extern DECLARE_BITMAP(used_vectors, NR_VECTORS);
+ extern int vector_used_by_percpu_irq(unsigned int vector);
   
   #endif /* _ASM_X86_IRQ_H */
diff --combined arch/x86/kernel/apic.c

index b5229affb95397cf8dbab2f49ab823497f8ba531,b9019271af62a01607dd835534394fd6beb3e516..6b7f824db16003a89dd2fca8786c130459e26dda
--- 1/arch/x86/kernel/apic.c
--- 2/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@@ -30,7 -30,6 +30,7 @@@
   #include <linux/module.h>
   #include <linux/dmi.h>
   #include <linux/dmar.h>
+ +#include <linux/ftrace.h>
   
   #include <asm/atomic.h>
   #include <asm/smp.h>
@@@ -119,8 -118,6 +119,6 @@@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_o
   
   int first_system_vector = 0xfe;
   
- char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
- 
   /*
    * Debug level, exported for io_apic.c
    */
@@@ -142,7 -139,7 +140,7 @@@ static int lapic_next_event(unsigned lo
                             struct clock_event_device *evt);
   static void lapic_timer_setup(enum clock_event_mode mode,
                               struct clock_event_device *evt);
- static void lapic_timer_broadcast(cpumask_t mask);
+ static void lapic_timer_broadcast(const cpumask_t *mask);
   static void apic_pm_activate(void);
   
   /*
@@@ -455,7 -452,7 +453,7 @@@ static void lapic_timer_setup(enum cloc
   /*
    * Local APIC timer broadcast function
    */
- static void lapic_timer_broadcast(cpumask_t mask)
+ static void lapic_timer_broadcast(const cpumask_t *mask)
   {
   #ifdef CONFIG_SMP
         send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
@@@ -471,7 -468,7 +469,7 @@@ static void __cpuinit setup_APIC_timer(
         struct clock_event_device *levt = &__get_cpu_var(lapic_events);
   
         memcpy(levt, &lapic_clockevent, sizeof(*levt));
-       levt->cpumask = cpumask_of_cpu(smp_processor_id());
+       levt->cpumask = cpumask_of(smp_processor_id());
   
         clockevents_register_device(levt);
   }
@@@ -778,7 -775,11 +776,7 @@@ static void local_apic_timer_interrupt(
         /*
          * the NMI deadlock-detector uses this.
          */
- -#ifdef CONFIG_X86_64
- -      add_pda(apic_timer_irqs, 1);
- -#else
- -      per_cpu(irq_stat, cpu).apic_timer_irqs++;
- -#endif
+ +      inc_irq_stat(apic_timer_irqs);
   
         evt->event_handler(evt);
   }
@@@ -791,7 -792,7 +789,7 @@@
    * [ if a single-CPU system runs an SMP kernel then we call the local
    *   interrupt as well. Thus we cannot inline the local irq ... ]
    */
- -void smp_apic_timer_interrupt(struct pt_regs *regs)
+ +void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
   {
         struct pt_regs *old_regs = set_irq_regs(regs);
   
@@@ -805,7 -806,9 +803,7 @@@
          * Besides, if we don't timer interrupts ignore the global
          * interrupt lock, which is the WrongThing (tm) to do.
          */
- -#ifdef CONFIG_X86_64
         exit_idle();
- -#endif
         irq_enter();
         local_apic_timer_interrupt();
         irq_exit();
@@@ -1663,7 -1666,9 +1661,7 @@@ void smp_spurious_interrupt(struct pt_r
   {
         u32 v;
   
- -#ifdef CONFIG_X86_64
         exit_idle();
- -#endif
         irq_enter();
         /*
          * Check if this really is a spurious interrupt and ACK it
@@@ -1674,11 -1679,14 +1672,11 @@@
         if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
                 ack_APIC_irq();
   
- -#ifdef CONFIG_X86_64
- -      add_pda(irq_spurious_count, 1);
- -#else
+ +      inc_irq_stat(irq_spurious_count);
+ +
         /* see sw-dev-man vol 3, chapter 7.4.13.5 */
         pr_info("spurious APIC interrupt on CPU#%d, "
                 "should never happen.\n", smp_processor_id());
- -      __get_cpu_var(irq_stat).irq_spurious_count++;
- -#endif
         irq_exit();
   }
   
@@@ -1689,7 -1697,9 +1687,7 @@@ void smp_error_interrupt(struct pt_reg
   {
         u32 v, v1;
   
- -#ifdef CONFIG_X86_64
         exit_idle();
- -#endif
         irq_enter();
         /* First tickle the hardware, only then report what went on. -- REW */
         v = apic_read(APIC_ESR);
@@@ -1807,28 -1817,32 +1805,32 @@@ void disconnect_bsp_APIC(int virt_wire_
   void __cpuinit generic_processor_info(int apicid, int version)
   {
         int cpu;
-       cpumask_t tmp_map;
   
         /*
          * Validate version
          */
         if (version == 0x0) {
                 pr_warning("BIOS bug, APIC version is 0 for CPU#%d! "
-                       "fixing up to 0x10. (tell your hw vendor)\n",
-                       version);
+                          "fixing up to 0x10. (tell your hw vendor)\n",
+                               version);
                 version = 0x10;
         }
         apic_version[apicid] = version;
   
-       if (num_processors >= NR_CPUS) {
-               pr_warning("WARNING: NR_CPUS limit of %i reached."
-                       "  Processor ignored.\n", NR_CPUS);
+       if (num_processors >= nr_cpu_ids) {
+               int max = nr_cpu_ids;
+               int thiscpu = max + disabled_cpus;
+ 
+               pr_warning(
+                       "ACPI: NR_CPUS/possible_cpus limit of %i reached."
+                       "  Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
+ 
+               disabled_cpus++;
                 return;
         }
   
         num_processors++;
-       cpus_complement(tmp_map, cpu_present_map);
-       cpu = first_cpu(tmp_map);
+       cpu = cpumask_next_zero(-1, cpu_present_mask);
   
         physid_set(apicid, phys_cpu_present_map);
         if (apicid == boot_cpu_physical_apicid) {
@@@ -1878,8 -1892,8 +1880,8 @@@
         }
   #endif
   
-       cpu_set(cpu, cpu_possible_map);
-       cpu_set(cpu, cpu_present_map);
+       set_cpu_possible(cpu, true);
+       set_cpu_present(cpu, true);
   }
   
   #ifdef CONFIG_X86_64
@@@ -2081,7 -2095,7 +2083,7 @@@ __cpuinit int apic_is_clustered_box(voi
         bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
         bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
   
-       for (i = 0; i < NR_CPUS; i++) {
+       for (i = 0; i < nr_cpu_ids; i++) {
                 /* are we being called early in kernel startup? */
                 if (bios_cpu_apicid) {
                         id = bios_cpu_apicid[i];
diff --combined arch/x86/kernel/cpu/intel_cacheinfo.c

index 68b5d8681cbb54597edf9021721745dbab19cb5c,fb7f946cb65ec139e6ec9c5ada4900c757482df3..c6ecda64f5f125b1ba358edac6d9e5693414b020
--- 1/arch/x86/kernel/cpu/intel_cacheinfo.c
--- 2/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@@ -534,31 -534,16 +534,16 @@@ static void __cpuinit free_cache_attrib
         per_cpu(cpuid4_info, cpu) = NULL;
   }
   
- static int __cpuinit detect_cache_attributes(unsigned int cpu)
+ static void get_cpu_leaves(void *_retval)
   {
-       struct _cpuid4_info     *this_leaf;
-       unsigned long           j;
-       int                     retval;
-       cpumask_t               oldmask;
- 
-       if (num_cache_leaves == 0)
-               return -ENOENT;
- 
-       per_cpu(cpuid4_info, cpu) = kzalloc(
-           sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
-       if (per_cpu(cpuid4_info, cpu) == NULL)
-               return -ENOMEM;
- 
-       oldmask = current->cpus_allowed;
-       retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
-       if (retval)
-               goto out;
+       int j, *retval = _retval, cpu = smp_processor_id();
   
         /* Do cpuid and store the results */
         for (j = 0; j < num_cache_leaves; j++) {
+               struct _cpuid4_info *this_leaf;
                 this_leaf = CPUID4_INFO_IDX(cpu, j);
-               retval = cpuid4_cache_lookup(j, this_leaf);
-               if (unlikely(retval < 0)) {
+               *retval = cpuid4_cache_lookup(j, this_leaf);
+               if (unlikely(*retval < 0)) {
                         int i;
   
                         for (i = 0; i < j; i++)
@@@ -567,9 -552,21 +552,21 @@@
                 }
                 cache_shared_cpu_map_setup(cpu, j);
         }
-       set_cpus_allowed_ptr(current, &oldmask);
+ }
+ 
+ static int __cpuinit detect_cache_attributes(unsigned int cpu)
+ {
+       int                     retval;
+ 
+       if (num_cache_leaves == 0)
+               return -ENOENT;
+ 
+       per_cpu(cpuid4_info, cpu) = kzalloc(
+           sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
+       if (per_cpu(cpuid4_info, cpu) == NULL)
+               return -ENOMEM;
   
- out:
+       smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
         if (retval) {
                 kfree(per_cpu(cpuid4_info, cpu));
                 per_cpu(cpuid4_info, cpu) = NULL;
@@@ -626,8 -623,8 +623,8 @@@ static ssize_t show_shared_cpu_map_func
                 cpumask_t *mask = &this_leaf->shared_cpu_map;
   
                 n = type?
-                       cpulist_scnprintf(buf, len-2, *mask):
-                       cpumask_scnprintf(buf, len-2, *mask);
+                       cpulist_scnprintf(buf, len-2, mask) :
+                       cpumask_scnprintf(buf, len-2, mask);
                 buf[n++] = '\n';
                 buf[n] = '\0';
         }
@@@ -644,17 -641,20 +641,17 @@@ static inline ssize_t show_shared_cpu_l
         return show_shared_cpu_map_func(leaf, 1, buf);
   }
   
- -static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) {
- -      switch(this_leaf->eax.split.type) {
- -          case CACHE_TYPE_DATA:
+ +static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf)
+ +{
+ +      switch (this_leaf->eax.split.type) {
+ +      case CACHE_TYPE_DATA:
                 return sprintf(buf, "Data\n");
- -              break;
- -          case CACHE_TYPE_INST:
+ +      case CACHE_TYPE_INST:
                 return sprintf(buf, "Instruction\n");
- -              break;
- -          case CACHE_TYPE_UNIFIED:
+ +      case CACHE_TYPE_UNIFIED:
                 return sprintf(buf, "Unified\n");
- -              break;
- -          default:
+ +      default:
                 return sprintf(buf, "Unknown\n");
- -              break;
         }
   }
   
diff --combined arch/x86/kernel/cpu/mcheck/mce_amd_64.c

index 748c8f9e7a0527d1e6f6298852113995f033350b,a1de80f368f13898043b4b93ea4599a9122d9838..a5a5e0530370b6c007e484dc981f110f8b310dbd
--- 1/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
--- 2/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@@ -83,34 -83,41 +83,41 @@@ static DEFINE_PER_CPU(unsigned char, ba
    * CPU Initialization
    */
   
+ struct thresh_restart {
+       struct threshold_block *b;
+       int reset;
+       u16 old_limit;
+ };
+ 
   /* must be called with correct cpu affinity */
- static void threshold_restart_bank(struct threshold_block *b,
-                                  int reset, u16 old_limit)
+ static long threshold_restart_bank(void *_tr)
   {
+       struct thresh_restart *tr = _tr;
         u32 mci_misc_hi, mci_misc_lo;
   
-       rdmsr(b->address, mci_misc_lo, mci_misc_hi);
+       rdmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
   
-       if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
-               reset = 1;      /* limit cannot be lower than err count */
+       if (tr->b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
+               tr->reset = 1;  /* limit cannot be lower than err count */
   
-       if (reset) {            /* reset err count and overflow bit */
+       if (tr->reset) {                /* reset err count and overflow bit */
                 mci_misc_hi =
                     (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
-                   (THRESHOLD_MAX - b->threshold_limit);
-       } else if (old_limit) { /* change limit w/o reset */
+                   (THRESHOLD_MAX - tr->b->threshold_limit);
+       } else if (tr->old_limit) {     /* change limit w/o reset */
                 int new_count = (mci_misc_hi & THRESHOLD_MAX) +
-                   (old_limit - b->threshold_limit);
+                   (tr->old_limit - tr->b->threshold_limit);
                 mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) |
                     (new_count & THRESHOLD_MAX);
         }
   
-       b->interrupt_enable ?
+       tr->b->interrupt_enable ?
             (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :
             (mci_misc_hi &= ~MASK_INT_TYPE_HI);
   
         mci_misc_hi |= MASK_COUNT_EN_HI;
-       wrmsr(b->address, mci_misc_lo, mci_misc_hi);
+       wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
+       return 0;
   }
   
   /* cpu init entry point, called from mce.c with preempt off */
@@@ -120,6 -127,7 +127,7 @@@ void __cpuinit mce_amd_feature_init(str
         unsigned int cpu = smp_processor_id();
         u8 lvt_off;
         u32 low = 0, high = 0, address = 0;
+       struct thresh_restart tr;
   
         for (bank = 0; bank < NR_BANKS; ++bank) {
                 for (block = 0; block < NR_BLOCKS; ++block) {
@@@ -162,7 -170,10 +170,10 @@@
                         wrmsr(address, low, high);
   
                         threshold_defaults.address = address;
-                       threshold_restart_bank(&threshold_defaults, 0, 0);
+                       tr.b = &threshold_defaults;
+                       tr.reset = 0;
+                       tr.old_limit = 0;
+                       threshold_restart_bank(&tr);
                 }
         }
   }
@@@ -237,7 -248,7 +248,7 @@@ asmlinkage void mce_threshold_interrupt
                 }
         }
   out:
- -      add_pda(irq_threshold_count, 1);
+ +      inc_irq_stat(irq_threshold_count);
         irq_exit();
   }
   
@@@ -251,20 -262,6 +262,6 @@@ struct threshold_attr 
         ssize_t(*store) (struct threshold_block *, const char *, size_t count);
   };
   
- static void affinity_set(unsigned int cpu, cpumask_t *oldmask,
-                                          cpumask_t *newmask)
- {
-       *oldmask = current->cpus_allowed;
-       cpus_clear(*newmask);
-       cpu_set(cpu, *newmask);
-       set_cpus_allowed_ptr(current, newmask);
- }
- 
- static void affinity_restore(const cpumask_t *oldmask)
- {
-       set_cpus_allowed_ptr(current, oldmask);
- }
- 
   #define SHOW_FIELDS(name)                                           \
   static ssize_t show_ ## name(struct threshold_block * b, char *buf) \
   {                                                                   \
@@@ -277,15 -274,16 +274,16 @@@ static ssize_t store_interrupt_enable(s
                                       const char *buf, size_t count)
   {
         char *end;
-       cpumask_t oldmask, newmask;
+       struct thresh_restart tr;
         unsigned long new = simple_strtoul(buf, &end, 0);
         if (end == buf)
                 return -EINVAL;
         b->interrupt_enable = !!new;
   
-       affinity_set(b->cpu, &oldmask, &newmask);
-       threshold_restart_bank(b, 0, 0);
-       affinity_restore(&oldmask);
+       tr.b = b;
+       tr.reset = 0;
+       tr.old_limit = 0;
+       work_on_cpu(b->cpu, threshold_restart_bank, &tr);
   
         return end - buf;
   }
@@@ -294,8 -292,7 +292,7 @@@ static ssize_t store_threshold_limit(st
                                      const char *buf, size_t count)
   {
         char *end;
-       cpumask_t oldmask, newmask;
-       u16 old;
+       struct thresh_restart tr;
         unsigned long new = simple_strtoul(buf, &end, 0);
         if (end == buf)
                 return -EINVAL;
@@@ -303,34 -300,36 +300,36 @@@
                 new = THRESHOLD_MAX;
         if (new < 1)
                 new = 1;
-       old = b->threshold_limit;
+       tr.old_limit = b->threshold_limit;
         b->threshold_limit = new;
+       tr.b = b;
+       tr.reset = 0;
   
-       affinity_set(b->cpu, &oldmask, &newmask);
-       threshold_restart_bank(b, 0, old);
-       affinity_restore(&oldmask);
+       work_on_cpu(b->cpu, threshold_restart_bank, &tr);
   
         return end - buf;
   }
   
- static ssize_t show_error_count(struct threshold_block *b, char *buf)
+ static long local_error_count(void *_b)
   {
-       u32 high, low;
-       cpumask_t oldmask, newmask;
-       affinity_set(b->cpu, &oldmask, &newmask);
+       struct threshold_block *b = _b;
+       u32 low, high;
+ 
         rdmsr(b->address, low, high);
-       affinity_restore(&oldmask);
-       return sprintf(buf, "%x\n",
-                      (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit));
+       return (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit);
+ }
+ 
+ static ssize_t show_error_count(struct threshold_block *b, char *buf)
+ {
+       return sprintf(buf, "%lx\n", work_on_cpu(b->cpu, local_error_count, b));
   }
   
   static ssize_t store_error_count(struct threshold_block *b,
                                  const char *buf, size_t count)
   {
-       cpumask_t oldmask, newmask;
-       affinity_set(b->cpu, &oldmask, &newmask);
-       threshold_restart_bank(b, 1, 0);
-       affinity_restore(&oldmask);
+       struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 };
+ 
+       work_on_cpu(b->cpu, threshold_restart_bank, &tr);
         return 1;
   }
   
@@@ -463,12 -462,19 +462,19 @@@ out_free
         return err;
   }
   
+ static long local_allocate_threshold_blocks(void *_bank)
+ {
+       unsigned int *bank = _bank;
+ 
+       return allocate_threshold_blocks(smp_processor_id(), *bank, 0,
+                                        MSR_IA32_MC0_MISC + *bank * 4);
+ }
+ 
   /* symlinks sibling shared banks to first core.  first core owns dir/files. */
   static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
   {
         int i, err = 0;
         struct threshold_bank *b = NULL;
-       cpumask_t oldmask, newmask;
         char name[32];
   
         sprintf(name, "threshold_bank%i", bank);
@@@ -519,11 -525,7 +525,7 @@@
   
         per_cpu(threshold_banks, cpu)[bank] = b;
   
-       affinity_set(cpu, &oldmask, &newmask);
-       err = allocate_threshold_blocks(cpu, bank, 0,
-                                       MSR_IA32_MC0_MISC + bank * 4);
-       affinity_restore(&oldmask);
- 
+       err = work_on_cpu(cpu, local_allocate_threshold_blocks, &bank);
         if (err)
                 goto out_free;
   
diff --combined arch/x86/kernel/genx2apic_uv_x.c

index dece17289731b951c4189bd5f66db0a919f91197,0e88be11227df8cc0e9b72f4eb74af2928fffb16..b193e082f6ce41041a2a15503bb103c9e4871994
--- 1/arch/x86/kernel/genx2apic_uv_x.c
--- 2/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@@ -10,7 -10,6 +10,7 @@@
   
   #include <linux/kernel.h>
   #include <linux/threads.h>
+ +#include <linux/cpu.h>
   #include <linux/cpumask.h>
   #include <linux/string.h>
   #include <linux/ctype.h>
@@@ -18,9 -17,6 +18,9 @@@
   #include <linux/sched.h>
   #include <linux/module.h>
   #include <linux/hardirq.h>
+ +#include <linux/timer.h>
+ +#include <linux/proc_fs.h>
+ +#include <asm/current.h>
   #include <asm/smp.h>
   #include <asm/ipi.h>
   #include <asm/genapic.h>
@@@ -79,16 -75,15 +79,15 @@@ EXPORT_SYMBOL(sn_rtc_cycles_per_second)
   
   /* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
   
- static cpumask_t uv_target_cpus(void)
+ static const struct cpumask *uv_target_cpus(void)
   {
-       return cpumask_of_cpu(0);
+       return cpumask_of(0);
   }
   
- static cpumask_t uv_vector_allocation_domain(int cpu)
+ static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
   {
-       cpumask_t domain = CPU_MASK_NONE;
-       cpu_set(cpu, domain);
-       return domain;
+       cpumask_clear(retmask);
+       cpumask_set_cpu(cpu, retmask);
   }
   
   int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
@@@ -127,28 -122,37 +126,37 @@@ static void uv_send_IPI_one(int cpu, in
         uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
   }
   
- static void uv_send_IPI_mask(cpumask_t mask, int vector)
+ static void uv_send_IPI_mask(const struct cpumask *mask, int vector)
   {
         unsigned int cpu;
   
-       for_each_possible_cpu(cpu)
-               if (cpu_isset(cpu, mask))
+       for_each_cpu(cpu, mask)
+               uv_send_IPI_one(cpu, vector);
+ }
+ 
+ static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
+ {
+       unsigned int cpu;
+       unsigned int this_cpu = smp_processor_id();
+ 
+       for_each_cpu(cpu, mask)
+               if (cpu != this_cpu)
                         uv_send_IPI_one(cpu, vector);
   }
   
   static void uv_send_IPI_allbutself(int vector)
   {
-       cpumask_t mask = cpu_online_map;
- 
-       cpu_clear(smp_processor_id(), mask);
+       unsigned int cpu;
+       unsigned int this_cpu = smp_processor_id();
   
-       if (!cpus_empty(mask))
-               uv_send_IPI_mask(mask, vector);
+       for_each_online_cpu(cpu)
+               if (cpu != this_cpu)
+                       uv_send_IPI_one(cpu, vector);
   }
   
   static void uv_send_IPI_all(int vector)
   {
-       uv_send_IPI_mask(cpu_online_map, vector);
+       uv_send_IPI_mask(cpu_online_mask, vector);
   }
   
   static int uv_apic_id_registered(void)
@@@ -160,7 -164,7 +168,7 @@@ static void uv_init_apic_ldr(void
   {
   }
   
- static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
+ static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask)
   {
         int cpu;
   
@@@ -168,13 -172,30 +176,30 @@@
          * We're using fixed IRQ delivery, can only return one phys APIC ID.
          * May as well be the first.
          */
-       cpu = first_cpu(cpumask);
+       cpu = cpumask_first(cpumask);
         if ((unsigned)cpu < nr_cpu_ids)
                 return per_cpu(x86_cpu_to_apicid, cpu);
         else
                 return BAD_APICID;
   }
   
+ static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+                                             const struct cpumask *andmask)
+ {
+       int cpu;
+ 
+       /*
+        * We're using fixed IRQ delivery, can only return one phys APIC ID.
+        * May as well be the first.
+        */
+       for_each_cpu_and(cpu, cpumask, andmask)
+               if (cpumask_test_cpu(cpu, cpu_online_mask))
+                       break;
+       if (cpu < nr_cpu_ids)
+               return per_cpu(x86_cpu_to_apicid, cpu);
+       return BAD_APICID;
+ }
+ 
   static unsigned int get_apic_id(unsigned long x)
   {
         unsigned int id;
@@@ -222,8 -243,10 +247,10 @@@ struct genapic apic_x2apic_uv_x = 
         .send_IPI_all = uv_send_IPI_all,
         .send_IPI_allbutself = uv_send_IPI_allbutself,
         .send_IPI_mask = uv_send_IPI_mask,
+       .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself,
         .send_IPI_self = uv_send_IPI_self,
         .cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
+       .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and,
         .phys_pkg_id = phys_pkg_id,
         .get_apic_id = get_apic_id,
         .set_apic_id = set_apic_id,
@@@ -359,103 -382,6 +386,103 @@@ static __init void uv_rtc_init(void
                 sn_rtc_cycles_per_second = ticks_per_sec;
   }
   
+ +/*
+ + * percpu heartbeat timer
+ + */
+ +static void uv_heartbeat(unsigned long ignored)
+ +{
+ +      struct timer_list *timer = &uv_hub_info->scir.timer;
+ +      unsigned char bits = uv_hub_info->scir.state;
+ +
+ +      /* flip heartbeat bit */
+ +      bits ^= SCIR_CPU_HEARTBEAT;
+ +
+ +      /* is this cpu idle? */
+ +      if (idle_cpu(raw_smp_processor_id()))
+ +              bits &= ~SCIR_CPU_ACTIVITY;
+ +      else
+ +              bits |= SCIR_CPU_ACTIVITY;
+ +
+ +      /* update system controller interface reg */
+ +      uv_set_scir_bits(bits);
+ +
+ +      /* enable next timer period */
+ +      mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL);
+ +}
+ +
+ +static void __cpuinit uv_heartbeat_enable(int cpu)
+ +{
+ +      if (!uv_cpu_hub_info(cpu)->scir.enabled) {
+ +              struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer;
+ +
+ +              uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
+ +              setup_timer(timer, uv_heartbeat, cpu);
+ +              timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
+ +              add_timer_on(timer, cpu);
+ +              uv_cpu_hub_info(cpu)->scir.enabled = 1;
+ +      }
+ +
+ +      /* check boot cpu */
+ +      if (!uv_cpu_hub_info(0)->scir.enabled)
+ +              uv_heartbeat_enable(0);
+ +}
+ +
+ +#ifdef CONFIG_HOTPLUG_CPU
+ +static void __cpuinit uv_heartbeat_disable(int cpu)
+ +{
+ +      if (uv_cpu_hub_info(cpu)->scir.enabled) {
+ +              uv_cpu_hub_info(cpu)->scir.enabled = 0;
+ +              del_timer(&uv_cpu_hub_info(cpu)->scir.timer);
+ +      }
+ +      uv_set_cpu_scir_bits(cpu, 0xff);
+ +}
+ +
+ +/*
+ + * cpu hotplug notifier
+ + */
+ +static __cpuinit int uv_scir_cpu_notify(struct notifier_block *self,
+ +                                     unsigned long action, void *hcpu)
+ +{
+ +      long cpu = (long)hcpu;
+ +
+ +      switch (action) {
+ +      case CPU_ONLINE:
+ +              uv_heartbeat_enable(cpu);
+ +              break;
+ +      case CPU_DOWN_PREPARE:
+ +              uv_heartbeat_disable(cpu);
+ +              break;
+ +      default:
+ +              break;
+ +      }
+ +      return NOTIFY_OK;
+ +}
+ +
+ +static __init void uv_scir_register_cpu_notifier(void)
+ +{
+ +      hotcpu_notifier(uv_scir_cpu_notify, 0);
+ +}
+ +
+ +#else /* !CONFIG_HOTPLUG_CPU */
+ +
+ +static __init void uv_scir_register_cpu_notifier(void)
+ +{
+ +}
+ +
+ +static __init int uv_init_heartbeat(void)
+ +{
+ +      int cpu;
+ +
+ +      if (is_uv_system())
+ +              for_each_online_cpu(cpu)
+ +                      uv_heartbeat_enable(cpu);
+ +      return 0;
+ +}
+ +
+ +late_initcall(uv_init_heartbeat);
+ +
+ +#endif /* !CONFIG_HOTPLUG_CPU */
+ +
   /*
    * Called on each cpu to initialize the per_cpu UV data area.
    *    ZZZ hotplug not supported yet
@@@ -529,7 -455,7 +556,7 @@@ void __init uv_system_init(void
   
         uv_bios_init();
         uv_bios_get_sn_info(0, &uv_type, &sn_partition_id,
- -                          &uv_coherency_id, &uv_region_size);
+ +                          &sn_coherency_id, &sn_region_size);
         uv_rtc_init();
   
         for_each_present_cpu(cpu) {
@@@ -540,7 -466,8 +567,7 @@@
                 uv_blade_info[blade].nr_possible_cpus++;
   
                 uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base;
- -              uv_cpu_hub_info(cpu)->lowmem_remap_top =
- -                                      lowmem_redir_base + lowmem_redir_size;
+ +              uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size;
                 uv_cpu_hub_info(cpu)->m_val = m_val;
                 uv_cpu_hub_info(cpu)->n_val = m_val;
                 uv_cpu_hub_info(cpu)->numa_blade_id = blade;
@@@ -550,8 -477,7 +577,8 @@@
                 uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1;
                 uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
                 uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
- -              uv_cpu_hub_info(cpu)->coherency_domain_number = uv_coherency_id;
+ +              uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id;
+ +              uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu;
                 uv_node_to_blade[nid] = blade;
                 uv_cpu_to_blade[cpu] = blade;
                 max_pnode = max(pnode, max_pnode);
@@@ -568,6 -494,4 +595,6 @@@
         map_mmioh_high(max_pnode);
   
         uv_cpu_init();
+ +      uv_scir_register_cpu_notifier();
+ +      proc_mkdir("sgi_uv", NULL);
   }
diff --combined arch/x86/kernel/hpet.c

index 845ea097383ee4051a24b54bca8da4c29bd9f6d1,e76d7e272974ffebd37a5010b2d2f4b3635c1bb7..cd759ad90690e72d109aed4309adeb87755977e5
--- 1/arch/x86/kernel/hpet.c
--- 2/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@@ -33,9 -33,7 +33,9 @@@
    * HPET address is set in acpi/boot.c, when an ACPI entry exists
    */
   unsigned long                         hpet_address;
- -unsigned long                         hpet_num_timers;
+ +#ifdef CONFIG_PCI_MSI
+ +static unsigned long                  hpet_num_timers;
+ +#endif
   static void __iomem                   *hpet_virt_address;
   
   struct hpet_dev {
@@@ -248,7 -246,7 +248,7 @@@ static void hpet_legacy_clockevent_regi
          * Start hpet with the boot cpu mask and make it
          * global after the IO_APIC has been initialized.
          */
-       hpet_clockevent.cpumask = cpumask_of_cpu(smp_processor_id());
+       hpet_clockevent.cpumask = cpumask_of(smp_processor_id());
         clockevents_register_device(&hpet_clockevent);
         global_clock_event = &hpet_clockevent;
         printk(KERN_DEBUG "hpet clockevent registered\n");
@@@ -303,7 -301,7 +303,7 @@@ static void hpet_set_mode(enum clock_ev
                         struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
                         hpet_setup_msi_irq(hdev->irq);
                         disable_irq(hdev->irq);
-                       irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu));
+                       irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
                         enable_irq(hdev->irq);
                 }
                 break;
@@@ -451,7 -449,7 +451,7 @@@ static int hpet_setup_irq(struct hpet_d
                 return -1;
   
         disable_irq(dev->irq);
-       irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu));
+       irq_set_affinity(dev->irq, cpumask_of(dev->cpu));
         enable_irq(dev->irq);
   
         printk(KERN_DEBUG "hpet: %s irq %d for MSI\n",
@@@ -502,7 -500,7 +502,7 @@@ static void init_one_hpet_msi_clockeven
         /* 5 usec minimum reprogramming delta. */
         evt->min_delta_ns = 5000;
   
-       evt->cpumask = cpumask_of_cpu(hdev->cpu);
+       evt->cpumask = cpumask_of(hdev->cpu);
         clockevents_register_device(evt);
   }
   
@@@ -813,7 -811,7 +813,7 @@@ int __init hpet_enable(void
   
   out_nohpet:
         hpet_clear_mapping();
- -      boot_hpet_disable = 1;
+ +      hpet_address = 0;
         return 0;
   }
   
@@@ -836,11 -834,10 +836,11 @@@ static __init int hpet_late_init(void
   
                 hpet_address = force_hpet_address;
                 hpet_enable();
- -              if (!hpet_virt_address)
- -                      return -ENODEV;
         }
   
+ +      if (!hpet_virt_address)
+ +              return -ENODEV;
+ +
         hpet_reserve_platform_timers(hpet_readl(HPET_ID));
   
         for_each_online_cpu(cpu) {
diff --combined arch/x86/kernel/io_apic.c

index 74917658b004aea2eb08dca5c5c9ba8a775eb81e,1cbf7c8d46e0d681752f4e6ac9fe83d0464254f0..62ecfc991e1e61b747567cb2ada8f8d68d0774b2
--- 1/arch/x86/kernel/io_apic.c
--- 2/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@@ -136,8 -136,8 +136,8 @@@ static struct irq_pin_list *get_one_fre
   
   struct irq_cfg {
         struct irq_pin_list *irq_2_pin;
-       cpumask_t domain;
-       cpumask_t old_domain;
+       cpumask_var_t domain;
+       cpumask_var_t old_domain;
         unsigned move_cleanup_count;
         u8 vector;
         u8 move_in_progress : 1;
@@@ -152,25 -152,25 +152,25 @@@ static struct irq_cfg irq_cfgx[] = 
   #else
   static struct irq_cfg irq_cfgx[NR_IRQS] = {
   #endif
-       [0]  = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
-       [1]  = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
-       [2]  = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
-       [3]  = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
-       [4]  = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
-       [5]  = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
-       [6]  = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
-       [7]  = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
-       [8]  = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
-       [9]  = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
-       [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
-       [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
-       [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
-       [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
-       [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
-       [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
+       [0]  = { .vector = IRQ0_VECTOR,  },
+       [1]  = { .vector = IRQ1_VECTOR,  },
+       [2]  = { .vector = IRQ2_VECTOR,  },
+       [3]  = { .vector = IRQ3_VECTOR,  },
+       [4]  = { .vector = IRQ4_VECTOR,  },
+       [5]  = { .vector = IRQ5_VECTOR,  },
+       [6]  = { .vector = IRQ6_VECTOR,  },
+       [7]  = { .vector = IRQ7_VECTOR,  },
+       [8]  = { .vector = IRQ8_VECTOR,  },
+       [9]  = { .vector = IRQ9_VECTOR,  },
+       [10] = { .vector = IRQ10_VECTOR, },
+       [11] = { .vector = IRQ11_VECTOR, },
+       [12] = { .vector = IRQ12_VECTOR, },
+       [13] = { .vector = IRQ13_VECTOR, },
+       [14] = { .vector = IRQ14_VECTOR, },
+       [15] = { .vector = IRQ15_VECTOR, },
   };
   
- -void __init arch_early_irq_init(void)
+ +int __init arch_early_irq_init(void)
   {
         struct irq_cfg *cfg;
         struct irq_desc *desc;
@@@ -183,9 -183,11 +183,13 @@@
         for (i = 0; i < count; i++) {
                 desc = irq_to_desc(i);
                 desc->chip_data = &cfg[i];
+               alloc_bootmem_cpumask_var(&cfg[i].domain);
+               alloc_bootmem_cpumask_var(&cfg[i].old_domain);
+               if (i < NR_IRQS_LEGACY)
+                       cpumask_setall(cfg[i].domain);
         }
+ +
+ +      return 0;
   }
   
   #ifdef CONFIG_SPARSE_IRQ
@@@ -209,12 -211,26 +213,26 @@@ static struct irq_cfg *get_one_free_irq
         node = cpu_to_node(cpu);
   
         cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
+       if (cfg) {
+               /* FIXME: needs alloc_cpumask_var_node() */
+               if (!alloc_cpumask_var(&cfg->domain, GFP_ATOMIC)) {
+                       kfree(cfg);
+                       cfg = NULL;
+               } else if (!alloc_cpumask_var(&cfg->old_domain, GFP_ATOMIC)) {
+                       free_cpumask_var(cfg->domain);
+                       kfree(cfg);
+                       cfg = NULL;
+               } else {
+                       cpumask_clear(cfg->domain);
+                       cpumask_clear(cfg->old_domain);
+               }
+       }
         printk(KERN_DEBUG "  alloc irq_cfg on cpu %d node %d\n", cpu, node);
   
         return cfg;
   }
   
- -void arch_init_chip_data(struct irq_desc *desc, int cpu)
+ +int arch_init_chip_data(struct irq_desc *desc, int cpu)
   {
         struct irq_cfg *cfg;
   
@@@ -226,8 -242,6 +244,8 @@@
                         BUG_ON(1);
                 }
         }
+ +
+ +      return 0;
   }
   
   #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
@@@ -333,13 -347,14 +351,14 @@@ void arch_free_chip_data(struct irq_des
         }
   }
   
- static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
+ static void
+ set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
   {
         struct irq_cfg *cfg = desc->chip_data;
   
         if (!cfg->move_in_progress) {
                 /* it means that domain is not changed */
-               if (!cpus_intersects(desc->affinity, mask))
+               if (!cpumask_intersects(&desc->affinity, mask))
                         cfg->move_desc_pending = 1;
         }
   }
@@@ -354,7 -369,8 +373,8 @@@ static struct irq_cfg *irq_cfg(unsigne
   #endif
   
   #ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
- static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
+ static inline void
+ set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
   {
   }
   #endif
@@@ -485,6 -501,26 +505,26 @@@ static void ioapic_mask_entry(int apic
   }
   
   #ifdef CONFIG_SMP
+ static void send_cleanup_vector(struct irq_cfg *cfg)
+ {
+       cpumask_var_t cleanup_mask;
+ 
+       if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
+               unsigned int i;
+               cfg->move_cleanup_count = 0;
+               for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+                       cfg->move_cleanup_count++;
+               for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+                       send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
+       } else {
+               cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
+               cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
+               send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+               free_cpumask_var(cleanup_mask);
+       }
+       cfg->move_in_progress = 0;
+ }
+ 
   static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
   {
         int apic, pin;
@@@ -520,41 -556,55 +560,55 @@@
         }
   }
   
- static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask);
+ static int
+ assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
   
- static void set_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask)
+ /*
+  * Either sets desc->affinity to a valid value, and returns cpu_mask_to_apicid
+  * of that, or returns BAD_APICID and leaves desc->affinity untouched.
+  */
+ static unsigned int
+ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
   {
         struct irq_cfg *cfg;
-       unsigned long flags;
-       unsigned int dest;
-       cpumask_t tmp;
         unsigned int irq;
   
-       cpus_and(tmp, mask, cpu_online_map);
-       if (cpus_empty(tmp))
-               return;
+       if (!cpumask_intersects(mask, cpu_online_mask))
+               return BAD_APICID;
   
         irq = desc->irq;
         cfg = desc->chip_data;
         if (assign_irq_vector(irq, cfg, mask))
-               return;
+               return BAD_APICID;
   
+       cpumask_and(&desc->affinity, cfg->domain, mask);
         set_extra_move_desc(desc, mask);
+       return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask);
+ }
   
-       cpus_and(tmp, cfg->domain, mask);
-       dest = cpu_mask_to_apicid(tmp);
-       /*
-        * Only the high 8 bits are valid.
-        */
-       dest = SET_APIC_LOGICAL_ID(dest);
+ static void
+ set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
+ {
+       struct irq_cfg *cfg;
+       unsigned long flags;
+       unsigned int dest;
+       unsigned int irq;
+ 
+       irq = desc->irq;
+       cfg = desc->chip_data;
   
         spin_lock_irqsave(&ioapic_lock, flags);
-       __target_IO_APIC_irq(irq, dest, cfg);
-       desc->affinity = mask;
+       dest = set_desc_affinity(desc, mask);
+       if (dest != BAD_APICID) {
+               /* Only the high 8 bits are valid. */
+               dest = SET_APIC_LOGICAL_ID(dest);
+               __target_IO_APIC_irq(irq, dest, cfg);
+       }
         spin_unlock_irqrestore(&ioapic_lock, flags);
   }
   
- static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+ static void
+ set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
   {
         struct irq_desc *desc;
   
@@@ -1222,7 -1272,8 +1276,8 @@@ void unlock_vector_lock(void
         spin_unlock(&vector_lock);
   }
   
- static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
+ static int
+ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
   {
         /*
          * NOTE! The local APIC isn't very good at handling
@@@ -1237,49 -1288,49 +1292,49 @@@
          */
         static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
         unsigned int old_vector;
-       int cpu;
+       int cpu, err;
+       cpumask_var_t tmp_mask;
   
         if ((cfg->move_in_progress) || cfg->move_cleanup_count)
                 return -EBUSY;
   
-       /* Only try and allocate irqs on cpus that are present */
-       cpus_and(mask, mask, cpu_online_map);
+       if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
+               return -ENOMEM;
   
         old_vector = cfg->vector;
         if (old_vector) {
-               cpumask_t tmp;
-               cpus_and(tmp, cfg->domain, mask);
-               if (!cpus_empty(tmp))
+               cpumask_and(tmp_mask, mask, cpu_online_mask);
+               cpumask_and(tmp_mask, cfg->domain, tmp_mask);
+               if (!cpumask_empty(tmp_mask)) {
+                       free_cpumask_var(tmp_mask);
                         return 0;
+               }
         }
   
-       for_each_cpu_mask_nr(cpu, mask) {
-               cpumask_t domain, new_mask;
+       /* Only try and allocate irqs on cpus that are present */
+       err = -ENOSPC;
+       for_each_cpu_and(cpu, mask, cpu_online_mask) {
                 int new_cpu;
                 int vector, offset;
   
-               domain = vector_allocation_domain(cpu);
-               cpus_and(new_mask, domain, cpu_online_map);
+               vector_allocation_domain(cpu, tmp_mask);
   
                 vector = current_vector;
                 offset = current_offset;
   next:
                 vector += 8;
                 if (vector >= first_system_vector) {
-                       /* If we run out of vectors on large boxen, must share them. */
+                       /* If out of vectors on large boxen, must share them. */
                         offset = (offset + 1) % 8;
                         vector = FIRST_DEVICE_VECTOR + offset;
                 }
                 if (unlikely(current_vector == vector))
                         continue;
- #ifdef CONFIG_X86_64
-               if (vector == IA32_SYSCALL_VECTOR)
-                       goto next;
- #else
-               if (vector == SYSCALL_VECTOR)
+ 
+               if (test_bit(vector, used_vectors))
                         goto next;
- #endif
-               for_each_cpu_mask_nr(new_cpu, new_mask)
+ 
+               for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
                         if (per_cpu(vector_irq, new_cpu)[vector] != -1)
                                 goto next;
                 /* Found one! */
@@@ -1287,18 -1338,21 +1342,21 @@@
                 current_offset = offset;
                 if (old_vector) {
                         cfg->move_in_progress = 1;
-                       cfg->old_domain = cfg->domain;
+                       cpumask_copy(cfg->old_domain, cfg->domain);
                 }
-               for_each_cpu_mask_nr(new_cpu, new_mask)
+               for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
                         per_cpu(vector_irq, new_cpu)[vector] = irq;
                 cfg->vector = vector;
-               cfg->domain = domain;
-               return 0;
+               cpumask_copy(cfg->domain, tmp_mask);
+               err = 0;
+               break;
         }
-       return -ENOSPC;
+       free_cpumask_var(tmp_mask);
+       return err;
   }
   
- static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
+ static int
+ assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
   {
         int err;
         unsigned long flags;
@@@ -1311,23 -1365,20 +1369,20 @@@
   
   static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
   {
-       cpumask_t mask;
         int cpu, vector;
   
         BUG_ON(!cfg->vector);
   
         vector = cfg->vector;
-       cpus_and(mask, cfg->domain, cpu_online_map);
-       for_each_cpu_mask_nr(cpu, mask)
+       for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
                 per_cpu(vector_irq, cpu)[vector] = -1;
   
         cfg->vector = 0;
-       cpus_clear(cfg->domain);
+       cpumask_clear(cfg->domain);
   
         if (likely(!cfg->move_in_progress))
                 return;
-       cpus_and(mask, cfg->old_domain, cpu_online_map);
-       for_each_cpu_mask_nr(cpu, mask) {
+       for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
                 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
                                                                 vector++) {
                         if (per_cpu(vector_irq, cpu)[vector] != irq)
@@@ -1349,8 -1400,10 +1404,8 @@@ void __setup_vector_irq(int cpu
   
         /* Mark the inuse vectors */
         for_each_irq_desc(irq, desc) {
- -              if (!desc)
- -                      continue;
                 cfg = desc->chip_data;
-               if (!cpu_isset(cpu, cfg->domain))
+               if (!cpumask_test_cpu(cpu, cfg->domain))
                         continue;
                 vector = cfg->vector;
                 per_cpu(vector_irq, cpu)[vector] = irq;
@@@ -1362,7 -1415,7 +1417,7 @@@
                         continue;
   
                 cfg = irq_cfg(irq);
-               if (!cpu_isset(cpu, cfg->domain))
+               if (!cpumask_test_cpu(cpu, cfg->domain))
                         per_cpu(vector_irq, cpu)[vector] = -1;
         }
   }
@@@ -1498,18 -1551,17 +1553,17 @@@ static void setup_IO_APIC_irq(int apic
   {
         struct irq_cfg *cfg;
         struct IO_APIC_route_entry entry;
-       cpumask_t mask;
+       unsigned int dest;
   
         if (!IO_APIC_IRQ(irq))
                 return;
   
         cfg = desc->chip_data;
   
-       mask = TARGET_CPUS;
-       if (assign_irq_vector(irq, cfg, mask))
+       if (assign_irq_vector(irq, cfg, TARGET_CPUS))
                 return;
   
-       cpus_and(mask, cfg->domain, mask);
+       dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
   
         apic_printk(APIC_VERBOSE,KERN_DEBUG
                     "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
@@@ -1519,8 -1571,7 +1573,7 @@@
   
   
         if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
-                              cpu_mask_to_apicid(mask), trigger, polarity,
-                              cfg->vector)) {
+                              dest, trigger, polarity, cfg->vector)) {
                 printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
                        mp_ioapics[apic].mp_apicid, pin);
                 __clear_irq_vector(irq, cfg);
@@@ -1732,6 -1783,8 +1785,6 @@@ __apicdebuginit(void) print_IO_APIC(voi
         for_each_irq_desc(irq, desc) {
                 struct irq_pin_list *entry;
   
- -              if (!desc)
- -                      continue;
                 cfg = desc->chip_data;
                 entry = cfg->irq_2_pin;
                 if (!entry)
@@@ -2240,7 -2293,7 +2293,7 @@@ static int ioapic_retrigger_irq(unsigne
         unsigned long flags;
   
         spin_lock_irqsave(&vector_lock, flags);
-       send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector);
+       send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
         spin_unlock_irqrestore(&vector_lock, flags);
   
         return 1;
@@@ -2289,18 -2342,17 +2342,17 @@@ static DECLARE_DELAYED_WORK(ir_migratio
    * as simple as edge triggered migration and we can do the irq migration
    * with a simple atomic update to IO-APIC RTE.
    */
- static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask)
+ static void
+ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
   {
         struct irq_cfg *cfg;
-       cpumask_t tmp, cleanup_mask;
         struct irte irte;
         int modify_ioapic_rte;
         unsigned int dest;
         unsigned long flags;
         unsigned int irq;
   
-       cpus_and(tmp, mask, cpu_online_map);
-       if (cpus_empty(tmp))
+       if (!cpumask_intersects(mask, cpu_online_mask))
                 return;
   
         irq = desc->irq;
@@@ -2313,8 -2365,7 +2365,7 @@@
   
         set_extra_move_desc(desc, mask);
   
-       cpus_and(tmp, cfg->domain, mask);
-       dest = cpu_mask_to_apicid(tmp);
+       dest = cpu_mask_to_apicid_and(cfg->domain, mask);
   
         modify_ioapic_rte = desc->status & IRQ_LEVEL;
         if (modify_ioapic_rte) {
@@@ -2331,14 -2382,10 +2382,10 @@@
          */
         modify_irte(irq, &irte);
   
-       if (cfg->move_in_progress) {
-               cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
-               cfg->move_cleanup_count = cpus_weight(cleanup_mask);
-               send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-               cfg->move_in_progress = 0;
-       }
+       if (cfg->move_in_progress)
+               send_cleanup_vector(cfg);
   
-       desc->affinity = mask;
+       cpumask_copy(&desc->affinity, mask);
   }
   
   static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
@@@ -2360,11 -2407,11 +2407,11 @@@
         }
   
         /* everthing is clear. we have right of way */
-       migrate_ioapic_irq_desc(desc, desc->pending_mask);
+       migrate_ioapic_irq_desc(desc, &desc->pending_mask);
   
         ret = 0;
         desc->status &= ~IRQ_MOVE_PENDING;
-       cpus_clear(desc->pending_mask);
+       cpumask_clear(&desc->pending_mask);
   
   unmask:
         unmask_IO_APIC_irq_desc(desc);
@@@ -2378,6 -2425,9 +2425,6 @@@ static void ir_irq_migration(struct wor
         struct irq_desc *desc;
   
         for_each_irq_desc(irq, desc) {
- -              if (!desc)
- -                      continue;
- -
                 if (desc->status & IRQ_MOVE_PENDING) {
                         unsigned long flags;
   
@@@ -2389,7 -2439,7 +2436,7 @@@
                                 continue;
                         }
   
-                       desc->chip->set_affinity(irq, desc->pending_mask);
+                       desc->chip->set_affinity(irq, &desc->pending_mask);
                         spin_unlock_irqrestore(&desc->lock, flags);
                 }
         }
@@@ -2398,18 -2448,20 +2445,20 @@@
   /*
    * Migrates the IRQ destination in the process context.
    */
- static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask)
+ static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
+                                           const struct cpumask *mask)
   {
         if (desc->status & IRQ_LEVEL) {
                 desc->status |= IRQ_MOVE_PENDING;
-               desc->pending_mask = mask;
+               cpumask_copy(&desc->pending_mask, mask);
                 migrate_irq_remapped_level_desc(desc);
                 return;
         }
   
         migrate_ioapic_irq_desc(desc, mask);
   }
- static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+ static void set_ir_ioapic_affinity_irq(unsigned int irq,
+                                      const struct cpumask *mask)
   {
         struct irq_desc *desc = irq_to_desc(irq);
   
@@@ -2420,9 -2472,10 +2469,9 @@@
   asmlinkage void smp_irq_move_cleanup_interrupt(void)
   {
         unsigned vector, me;
+ +
         ack_APIC_irq();
- -#ifdef CONFIG_X86_64
         exit_idle();
- -#endif
         irq_enter();
   
         me = smp_processor_id();
@@@ -2444,7 -2497,7 +2493,7 @@@
                 if (!cfg->move_cleanup_count)
                         goto unlock;
   
-               if ((vector == cfg->vector) && cpu_isset(me, cfg->domain))
+               if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
                         goto unlock;
   
                 __get_cpu_var(vector_irq)[vector] = -1;
@@@ -2467,7 -2520,7 +2516,7 @@@ static void irq_complete_move(struct ir
                 if (likely(!cfg->move_desc_pending))
                         return;
   
- -              /* domain is not change, but affinity is changed */
+ +              /* domain has not changed, but affinity did */
                 me = smp_processor_id();
                 if (cpu_isset(me, desc->affinity)) {
                         *descp = desc = move_irq_desc(desc, me);
@@@ -2481,20 -2534,14 +2530,14 @@@
   
         vector = ~get_irq_regs()->orig_ax;
         me = smp_processor_id();
-       if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
-               cpumask_t cleanup_mask;
- 
   #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
                 *descp = desc = move_irq_desc(desc, me);
                 /* get the new one */
                 cfg = desc->chip_data;
   #endif
   
-               cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
-               cfg->move_cleanup_count = cpus_weight(cleanup_mask);
-               send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-               cfg->move_in_progress = 0;
-       }
+       if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
+               send_cleanup_vector(cfg);
   }
   #else
   static inline void irq_complete_move(struct irq_desc **descp) {}
@@@ -2667,6 -2714,9 +2710,6 @@@ static inline void init_IO_APIC_traps(v
          * 0x80, because int 0x80 is hm, kind of importantish. ;)
          */
         for_each_irq_desc(irq, desc) {
- -              if (!desc)
- -                      continue;
- -
                 cfg = desc->chip_data;
                 if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
                         /*
@@@ -3216,16 -3266,13 +3259,13 @@@ static int msi_compose_msg(struct pci_d
         struct irq_cfg *cfg;
         int err;
         unsigned dest;
-       cpumask_t tmp;
   
         cfg = irq_cfg(irq);
-       tmp = TARGET_CPUS;
-       err = assign_irq_vector(irq, cfg, tmp);
+       err = assign_irq_vector(irq, cfg, TARGET_CPUS);
         if (err)
                 return err;
   
-       cpus_and(tmp, cfg->domain, tmp);
-       dest = cpu_mask_to_apicid(tmp);
+       dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
   
   #ifdef CONFIG_INTR_REMAP
         if (irq_remapped(irq)) {
@@@ -3279,26 -3326,18 +3319,18 @@@
   }
   
   #ifdef CONFIG_SMP
- static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
   {
         struct irq_desc *desc = irq_to_desc(irq);
         struct irq_cfg *cfg;
         struct msi_msg msg;
         unsigned int dest;
-       cpumask_t tmp;
   
-       cpus_and(tmp, mask, cpu_online_map);
-       if (cpus_empty(tmp))
+       dest = set_desc_affinity(desc, mask);
+       if (dest == BAD_APICID)
                 return;
   
         cfg = desc->chip_data;
-       if (assign_irq_vector(irq, cfg, mask))
-               return;
- 
-       set_extra_move_desc(desc, mask);
- 
-       cpus_and(tmp, cfg->domain, mask);
-       dest = cpu_mask_to_apicid(tmp);
   
         read_msi_msg_desc(desc, &msg);
   
@@@ -3308,37 -3347,27 +3340,27 @@@
         msg.address_lo |= MSI_ADDR_DEST_ID(dest);
   
         write_msi_msg_desc(desc, &msg);
-       desc->affinity = mask;
   }
   #ifdef CONFIG_INTR_REMAP
   /*
    * Migrate the MSI irq to another cpumask. This migration is
    * done in the process context using interrupt-remapping hardware.
    */
- static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+ static void
+ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
   {
         struct irq_desc *desc = irq_to_desc(irq);
-       struct irq_cfg *cfg;
+       struct irq_cfg *cfg = desc->chip_data;
         unsigned int dest;
-       cpumask_t tmp, cleanup_mask;
         struct irte irte;
   
-       cpus_and(tmp, mask, cpu_online_map);
-       if (cpus_empty(tmp))
-               return;
- 
         if (get_irte(irq, &irte))
                 return;
   
-       cfg = desc->chip_data;
-       if (assign_irq_vector(irq, cfg, mask))
+       dest = set_desc_affinity(desc, mask);
+       if (dest == BAD_APICID)
                 return;
   
-       set_extra_move_desc(desc, mask);
- 
-       cpus_and(tmp, cfg->domain, mask);
-       dest = cpu_mask_to_apicid(tmp);
- 
         irte.vector = cfg->vector;
         irte.dest_id = IRTE_DEST(dest);
   
@@@ -3352,14 -3381,8 +3374,8 @@@
          * at the new destination. So, time to cleanup the previous
          * vector allocation.
          */
-       if (cfg->move_in_progress) {
-               cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
-               cfg->move_cleanup_count = cpus_weight(cleanup_mask);
-               send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-               cfg->move_in_progress = 0;
-       }
- 
-       desc->affinity = mask;
+       if (cfg->move_in_progress)
+               send_cleanup_vector(cfg);
   }
   
   #endif
@@@ -3550,26 -3573,18 +3566,18 @@@ void arch_teardown_msi_irq(unsigned in
   
   #ifdef CONFIG_DMAR
   #ifdef CONFIG_SMP
- static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
+ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
   {
         struct irq_desc *desc = irq_to_desc(irq);
         struct irq_cfg *cfg;
         struct msi_msg msg;
         unsigned int dest;
-       cpumask_t tmp;
   
-       cpus_and(tmp, mask, cpu_online_map);
-       if (cpus_empty(tmp))
+       dest = set_desc_affinity(desc, mask);
+       if (dest == BAD_APICID)
                 return;
   
         cfg = desc->chip_data;
-       if (assign_irq_vector(irq, cfg, mask))
-               return;
- 
-       set_extra_move_desc(desc, mask);
- 
-       cpus_and(tmp, cfg->domain, mask);
-       dest = cpu_mask_to_apicid(tmp);
   
         dmar_msi_read(irq, &msg);
   
@@@ -3579,7 -3594,6 +3587,6 @@@
         msg.address_lo |= MSI_ADDR_DEST_ID(dest);
   
         dmar_msi_write(irq, &msg);
-       desc->affinity = mask;
   }
   
   #endif /* CONFIG_SMP */
@@@ -3613,26 -3627,18 +3620,18 @@@ int arch_setup_dmar_msi(unsigned int ir
   #ifdef CONFIG_HPET_TIMER
   
   #ifdef CONFIG_SMP
- static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
+ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
   {
         struct irq_desc *desc = irq_to_desc(irq);
         struct irq_cfg *cfg;
         struct msi_msg msg;
         unsigned int dest;
-       cpumask_t tmp;
   
-       cpus_and(tmp, mask, cpu_online_map);
-       if (cpus_empty(tmp))
+       dest = set_desc_affinity(desc, mask);
+       if (dest == BAD_APICID)
                 return;
   
         cfg = desc->chip_data;
-       if (assign_irq_vector(irq, cfg, mask))
-               return;
- 
-       set_extra_move_desc(desc, mask);
- 
-       cpus_and(tmp, cfg->domain, mask);
-       dest = cpu_mask_to_apicid(tmp);
   
         hpet_msi_read(irq, &msg);
   
@@@ -3642,7 -3648,6 +3641,6 @@@
         msg.address_lo |= MSI_ADDR_DEST_ID(dest);
   
         hpet_msi_write(irq, &msg);
-       desc->affinity = mask;
   }
   
   #endif /* CONFIG_SMP */
@@@ -3697,28 -3702,19 +3695,19 @@@ static void target_ht_irq(unsigned int 
         write_ht_irq_msg(irq, &msg);
   }
   
- static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
+ static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
   {
         struct irq_desc *desc = irq_to_desc(irq);
         struct irq_cfg *cfg;
         unsigned int dest;
-       cpumask_t tmp;
   
-       cpus_and(tmp, mask, cpu_online_map);
-       if (cpus_empty(tmp))
+       dest = set_desc_affinity(desc, mask);
+       if (dest == BAD_APICID)
                 return;
   
         cfg = desc->chip_data;
-       if (assign_irq_vector(irq, cfg, mask))
-               return;
- 
-       set_extra_move_desc(desc, mask);
- 
-       cpus_and(tmp, cfg->domain, mask);
-       dest = cpu_mask_to_apicid(tmp);
   
         target_ht_irq(irq, dest, cfg->vector);
-       desc->affinity = mask;
   }
   
   #endif
@@@ -3738,17 -3734,14 +3727,14 @@@ int arch_setup_ht_irq(unsigned int irq
   {
         struct irq_cfg *cfg;
         int err;
-       cpumask_t tmp;
   
         cfg = irq_cfg(irq);
-       tmp = TARGET_CPUS;
-       err = assign_irq_vector(irq, cfg, tmp);
+       err = assign_irq_vector(irq, cfg, TARGET_CPUS);
         if (!err) {
                 struct ht_irq_msg msg;
                 unsigned dest;
   
-               cpus_and(tmp, cfg->domain, tmp);
-               dest = cpu_mask_to_apicid(tmp);
+               dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
   
                 msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
   
@@@ -3784,7 -3777,7 +3770,7 @@@
   int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
                        unsigned long mmr_offset)
   {
-       const cpumask_t *eligible_cpu = get_cpu_mask(cpu);
+       const struct cpumask *eligible_cpu = cpumask_of(cpu);
         struct irq_cfg *cfg;
         int mmr_pnode;
         unsigned long mmr_value;
@@@ -3794,7 -3787,7 +3780,7 @@@
   
         cfg = irq_cfg(irq);
   
-       err = assign_irq_vector(irq, cfg, *eligible_cpu);
+       err = assign_irq_vector(irq, cfg, eligible_cpu);
         if (err != 0)
                 return err;
   
@@@ -3813,7 -3806,7 +3799,7 @@@
         entry->polarity = 0;
         entry->trigger = 0;
         entry->mask = 0;
-       entry->dest = cpu_mask_to_apicid(*eligible_cpu);
+       entry->dest = cpu_mask_to_apicid(eligible_cpu);
   
         mmr_pnode = uv_blade_to_pnode(mmr_blade);
         uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
@@@ -4024,7 -4017,7 +4010,7 @@@ void __init setup_ioapic_dest(void
         int pin, ioapic, irq, irq_entry;
         struct irq_desc *desc;
         struct irq_cfg *cfg;
-       cpumask_t mask;
+       const struct cpumask *mask;
   
         if (skip_ioapic_setup == 1)
                 return;
@@@ -4055,7 -4048,7 +4041,7 @@@
                          */
                         if (desc->status &
                             (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
-                               mask = desc->affinity;
+                               mask = &desc->affinity;
                         else
                                 mask = TARGET_CPUS;
   
diff --combined arch/x86/kernel/irq_64.c

index a174a217eb1aefc25c971c2d6155e483d2d50192,fca2991443f5773c9c9113dda3e450c12201d042..6383d50f82ea11e93220a9fd45b9f0fc466860e6
--- 1/arch/x86/kernel/irq_64.c
--- 2/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@@ -13,12 -13,12 +13,12 @@@
   #include <linux/seq_file.h>
   #include <linux/module.h>
   #include <linux/delay.h>
+ +#include <linux/ftrace.h>
   #include <asm/uaccess.h>
   #include <asm/io_apic.h>
   #include <asm/idle.h>
   #include <asm/smp.h>
   
- -#ifdef CONFIG_DEBUG_STACKOVERFLOW
   /*
    * Probabilistic stack overflow check:
    *
@@@ -28,25 -28,26 +28,25 @@@
    */
   static inline void stack_overflow_check(struct pt_regs *regs)
   {
+ +#ifdef CONFIG_DEBUG_STACKOVERFLOW
         u64 curbase = (u64)task_stack_page(current);
- -      static unsigned long warned = -60*HZ;
- -
- -      if (regs->sp >= curbase && regs->sp <= curbase + THREAD_SIZE &&
- -          regs->sp <  curbase + sizeof(struct thread_info) + 128 &&
- -          time_after(jiffies, warned + 60*HZ)) {
- -              printk("do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n",
- -                     current->comm, curbase, regs->sp);
- -              show_stack(NULL,NULL);
- -              warned = jiffies;
- -      }
- -}
+ +
+ +      WARN_ONCE(regs->sp >= curbase &&
+ +                regs->sp <= curbase + THREAD_SIZE &&
+ +                regs->sp <  curbase + sizeof(struct thread_info) +
+ +                                      sizeof(struct pt_regs) + 128,
+ +
+ +                "do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n",
+ +                      current->comm, curbase, regs->sp);
   #endif
+ +}
   
   /*
    * do_IRQ handles all normal device IRQ's (the special
    * SMP cross-CPU interrupts have their own specific
    * handlers).
    */
- -asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
+ +asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
   {
         struct pt_regs *old_regs = set_irq_regs(regs);
         struct irq_desc *desc;
@@@ -59,7 -60,9 +59,7 @@@
         irq_enter();
         irq = __get_cpu_var(vector_irq)[vector];
   
- -#ifdef CONFIG_DEBUG_STACKOVERFLOW
         stack_overflow_check(regs);
- -#endif
   
         desc = irq_to_desc(irq);
         if (likely(desc))
@@@ -80,16 -83,17 +80,17 @@@
   }
   
   #ifdef CONFIG_HOTPLUG_CPU
- void fixup_irqs(cpumask_t map)
+ /* A cpu has been removed from cpu_online_mask.  Reset irq affinities. */
+ void fixup_irqs(void)
   {
         unsigned int irq;
         static int warned;
         struct irq_desc *desc;
   
         for_each_irq_desc(irq, desc) {
-               cpumask_t mask;
                 int break_affinity = 0;
                 int set_affinity = 1;
+               const struct cpumask *affinity;
   
                 if (!desc)
                         continue;
@@@ -99,23 -103,23 +100,23 @@@
                 /* interrupt's are disabled at this point */
                 spin_lock(&desc->lock);
   
+               affinity = &desc->affinity;
                 if (!irq_has_action(irq) ||
-                   cpus_equal(desc->affinity, map)) {
+                   cpumask_equal(affinity, cpu_online_mask)) {
                         spin_unlock(&desc->lock);
                         continue;
                 }
   
-               cpus_and(mask, desc->affinity, map);
-               if (cpus_empty(mask)) {
+               if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
                         break_affinity = 1;
-                       mask = map;
+                       affinity = cpu_all_mask;
                 }
   
                 if (desc->chip->mask)
                         desc->chip->mask(irq);
   
                 if (desc->chip->set_affinity)
-                       desc->chip->set_affinity(irq, mask);
+                       desc->chip->set_affinity(irq, affinity);
                 else if (!(warned++))
                         set_affinity = 0;
   
diff --combined arch/x86/kernel/irqinit_32.c

index 203384ed2b5d85342f0ad9ea4f85adf45385acb5,61aa2a1004b59d5a859c04b53ed66e34eef72451..84723295f88a061a8078dbff08a66df484789dc9
--- 1/arch/x86/kernel/irqinit_32.c
--- 2/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@@ -110,6 -110,18 +110,18 @@@ DEFINE_PER_CPU(vector_irq_t, vector_irq
         [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
   };
   
+ int vector_used_by_percpu_irq(unsigned int vector)
+ {
+       int cpu;
+ 
+       for_each_online_cpu(cpu) {
+               if (per_cpu(vector_irq, cpu)[vector] != -1)
+                       return 1;
+       }
+ 
+       return 0;
+ }
+ 
   /* Overridden in paravirt.c */
   void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
   
@@@ -128,7 -140,7 +140,7 @@@ void __init native_init_IRQ(void
         for (i =  FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
                 /* SYSCALL_VECTOR was reserved in trap_init. */
                 if (i != SYSCALL_VECTOR)
- -                      set_intr_gate(i, interrupt[i]);
+ +                      set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
         }
   
   
@@@ -146,10 -158,12 +158,12 @@@
         alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
   
         /* IPI for single call function */
-       set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt);
+       alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
+                                call_function_single_interrupt);
   
         /* Low priority IPI to cleanup after moving an irq */
         set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
+       set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
   #endif
   
   #ifdef CONFIG_X86_LOCAL_APIC
diff --combined arch/x86/kernel/irqinit_64.c

index 6190e6ef546cfc197e63d0a716861d94905b9a59,1020919efe1cfe40aa224726184f6d678c9dadc2..31ebfe38e96ce98327ed64075e773f6bde4b0d66
--- 1/arch/x86/kernel/irqinit_64.c
--- 2/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@@ -23,6 -23,41 +23,6 @@@
   #include <asm/apic.h>
   #include <asm/i8259.h>
   
- -/*
- - * Common place to define all x86 IRQ vectors
- - *
- - * This builds up the IRQ handler stubs using some ugly macros in irq.h
- - *
- - * These macros create the low-level assembly IRQ routines that save
- - * register context and call do_IRQ(). do_IRQ() then does all the
- - * operations that are needed to keep the AT (or SMP IOAPIC)
- - * interrupt-controller happy.
- - */
- -
- -#define IRQ_NAME2(nr) nr##_interrupt(void)
- -#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
- -
- -/*
- - *    SMP has a few special interrupts for IPI messages
- - */
- -
- -#define BUILD_IRQ(nr)                         \
- -      asmlinkage void IRQ_NAME(nr);           \
- -      asm("\n.text\n.p2align\n"               \
- -          "IRQ" #nr "_interrupt:\n\t"         \
- -          "push $~(" #nr ") ; "               \
- -          "jmp common_interrupt\n"            \
- -          ".previous");
- -
- -#define BI(x,y) \
- -      BUILD_IRQ(x##y)
- -
- -#define BUILD_16_IRQS(x) \
- -      BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
- -      BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
- -      BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
- -      BI(x,c) BI(x,d) BI(x,e) BI(x,f)
- -
   /*
    * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
    * (these are usually mapped to vectors 0x30-0x3f)
@@@ -38,6 -73,37 +38,6 @@@
    *
    * (these are usually mapped into the 0x30-0xff vector range)
    */
- -                                    BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3)
- -BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7)
- -BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb)
- -BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf)
- -
- -#undef BUILD_16_IRQS
- -#undef BI
- -
- -
- -#define IRQ(x,y) \
- -      IRQ##x##y##_interrupt
- -
- -#define IRQLIST_16(x) \
- -      IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
- -      IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
- -      IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
- -      IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
- -
- -/* for the irq vectors */
- -static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = {
- -                                        IRQLIST_16(0x2), IRQLIST_16(0x3),
- -      IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
- -      IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
- -      IRQLIST_16(0xc), IRQLIST_16(0xd), IRQLIST_16(0xe), IRQLIST_16(0xf)
- -};
- -
- -#undef IRQ
- -#undef IRQLIST_16
- -
- -
- -
   
   /*
    * IRQ2 is cascade interrupt to second interrupt controller
@@@ -69,6 -135,18 +69,18 @@@ DEFINE_PER_CPU(vector_irq_t, vector_irq
         [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
   };
   
+ int vector_used_by_percpu_irq(unsigned int vector)
+ {
+       int cpu;
+ 
+       for_each_online_cpu(cpu) {
+               if (per_cpu(vector_irq, cpu)[vector] != -1)
+                       return 1;
+       }
+ 
+       return 0;
+ }
+ 
   void __init init_ISA_irqs(void)
   {
         int i;
@@@ -121,6 -199,7 +133,7 @@@ static void __init smp_intr_init(void
   
         /* Low priority IPI to cleanup after moving an irq */
         set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
+       set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
   #endif
   }
   
diff --combined arch/x86/kernel/reboot.c

index 72e0e4e712d6e3ef77d4b8d36c5bbdde4181cb6a,ba7b9a0e606358a27789b13eb25fc0ca9778dd66..39643b1df061d5ae049e8814a0abf126a430f362
--- 1/arch/x86/kernel/reboot.c
--- 2/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@@ -12,7 -12,6 +12,7 @@@
   #include <asm/proto.h>
   #include <asm/reboot_fixups.h>
   #include <asm/reboot.h>
+ +#include <asm/virtext.h>
   
   #ifdef CONFIG_X86_32
   # include <linux/dmi.h>
@@@ -40,12 -39,6 +40,12 @@@ int reboot_force
   static int reboot_cpu = -1;
   #endif
   
+ +/* This is set if we need to go through the 'emergency' path.
+ + * When machine_emergency_restart() is called, we may be on
+ + * an inconsistent state and won't be able to do a clean cleanup
+ + */
+ +static int reboot_emergency;
+ +
   /* This is set by the PCI code if either type 1 or type 2 PCI is detected */
   bool port_cf9_safe = false;
   
@@@ -375,48 -368,6 +375,48 @@@ static inline void kb_wait(void
         }
   }
   
+ +static void vmxoff_nmi(int cpu, struct die_args *args)
+ +{
+ +      cpu_emergency_vmxoff();
+ +}
+ +
+ +/* Use NMIs as IPIs to tell all CPUs to disable virtualization
+ + */
+ +static void emergency_vmx_disable_all(void)
+ +{
+ +      /* Just make sure we won't change CPUs while doing this */
+ +      local_irq_disable();
+ +
+ +      /* We need to disable VMX on all CPUs before rebooting, otherwise
+ +       * we risk hanging up the machine, because the CPU ignore INIT
+ +       * signals when VMX is enabled.
+ +       *
+ +       * We can't take any locks and we may be on an inconsistent
+ +       * state, so we use NMIs as IPIs to tell the other CPUs to disable
+ +       * VMX and halt.
+ +       *
+ +       * For safety, we will avoid running the nmi_shootdown_cpus()
+ +       * stuff unnecessarily, but we don't have a way to check
+ +       * if other CPUs have VMX enabled. So we will call it only if the
+ +       * CPU we are running on has VMX enabled.
+ +       *
+ +       * We will miss cases where VMX is not enabled on all CPUs. This
+ +       * shouldn't do much harm because KVM always enable VMX on all
+ +       * CPUs anyway. But we can miss it on the small window where KVM
+ +       * is still enabling VMX.
+ +       */
+ +      if (cpu_has_vmx() && cpu_vmx_enabled()) {
+ +              /* Disable VMX on this CPU.
+ +               */
+ +              cpu_vmxoff();
+ +
+ +              /* Halt and disable VMX on the other CPUs */
+ +              nmi_shootdown_cpus(vmxoff_nmi);
+ +
+ +      }
+ +}
+ +
+ +
   void __attribute__((weak)) mach_reboot_fixups(void)
   {
   }
@@@ -425,9 -376,6 +425,9 @@@ static void native_machine_emergency_re
   {
         int i;
   
+ +      if (reboot_emergency)
+ +              emergency_vmx_disable_all();
+ +
         /* Tell the BIOS if we want cold or warm reboot */
         *((unsigned short *)__va(0x472)) = reboot_mode;
   
@@@ -534,19 -482,13 +534,19 @@@ void native_machine_shutdown(void
   #endif
   }
   
+ +static void __machine_emergency_restart(int emergency)
+ +{
+ +      reboot_emergency = emergency;
+ +      machine_ops.emergency_restart();
+ +}
+ +
   static void native_machine_restart(char *__unused)
   {
         printk("machine restart\n");
   
         if (!reboot_force)
                 machine_shutdown();
- -      machine_emergency_restart();
+ +      __machine_emergency_restart(0);
   }
   
   static void native_machine_halt(void)
@@@ -590,7 -532,7 +590,7 @@@ void machine_shutdown(void
   
   void machine_emergency_restart(void)
   {
- -      machine_ops.emergency_restart();
+ +      __machine_emergency_restart(1);
   }
   
   void machine_restart(char *cmd)
@@@ -650,10 -592,7 +650,7 @@@ static int crash_nmi_callback(struct no
   
   static void smp_send_nmi_allbutself(void)
   {
-       cpumask_t mask = cpu_online_map;
-       cpu_clear(safe_smp_processor_id(), mask);
-       if (!cpus_empty(mask))
-               send_IPI_mask(mask, NMI_VECTOR);
+       send_IPI_allbutself(NMI_VECTOR);
   }
   
   static struct notifier_block crash_nmi_nb = {
diff --combined arch/x86/kernel/smp.c

index 7e558db362c1870f19013da9879f8cea1542e472,49ed667b06f3684de2b9a72ff89c8e49c8bf1e3c..beea2649a2406240c3727da2a02e1f538feadba9
--- 1/arch/x86/kernel/smp.c
--- 2/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@@ -118,22 -118,22 +118,22 @@@ static void native_smp_send_reschedule(
                 WARN_ON(1);
                 return;
         }
-       send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
+       send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
   }
   
   void native_send_call_func_single_ipi(int cpu)
   {
-       send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR);
+       send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR);
   }
   
- void native_send_call_func_ipi(cpumask_t mask)
+ void native_send_call_func_ipi(const struct cpumask *mask)
   {
         cpumask_t allbutself;
   
         allbutself = cpu_online_map;
         cpu_clear(smp_processor_id(), allbutself);
   
-       if (cpus_equal(mask, allbutself) &&
+       if (cpus_equal(*mask, allbutself) &&
             cpus_equal(cpu_online_map, cpu_callout_map))
                 send_IPI_allbutself(CALL_FUNCTION_VECTOR);
         else
@@@ -165,7 -165,11 +165,7 @@@ static void native_smp_send_stop(void
   void smp_reschedule_interrupt(struct pt_regs *regs)
   {
         ack_APIC_irq();
- -#ifdef CONFIG_X86_32
- -      __get_cpu_var(irq_stat).irq_resched_count++;
- -#else
- -      add_pda(irq_resched_count, 1);
- -#endif
+ +      inc_irq_stat(irq_resched_count);
   }
   
   void smp_call_function_interrupt(struct pt_regs *regs)
@@@ -173,7 -177,11 +173,7 @@@
         ack_APIC_irq();
         irq_enter();
         generic_smp_call_function_interrupt();
- -#ifdef CONFIG_X86_32
- -      __get_cpu_var(irq_stat).irq_call_count++;
- -#else
- -      add_pda(irq_call_count, 1);
- -#endif
+ +      inc_irq_stat(irq_call_count);
         irq_exit();
   }
   
@@@ -182,7 -190,11 +182,7 @@@ void smp_call_function_single_interrupt
         ack_APIC_irq();
         irq_enter();
         generic_smp_call_function_single_interrupt();
- -#ifdef CONFIG_X86_32
- -      __get_cpu_var(irq_stat).irq_call_count++;
- -#else
- -      add_pda(irq_call_count, 1);
- -#endif
+ +      inc_irq_stat(irq_call_count);
         irq_exit();
   }
   
diff --combined arch/x86/kernel/smpboot.c

index f8500c969442875e4db94cf9ca3d68e1c3777fbf,1a9941b111504dc6de7084d662ef56a938a46797..31869bf5fabd68a187fe36799a3cff7fa9b5f0a9
--- 1/arch/x86/kernel/smpboot.c
--- 2/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@@ -102,14 -102,8 +102,8 @@@ EXPORT_SYMBOL(smp_num_siblings)
   /* Last level cache ID of each logical CPU */
   DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID;
   
- /* bitmap of online cpus */
- cpumask_t cpu_online_map __read_mostly;
- EXPORT_SYMBOL(cpu_online_map);
- 
   cpumask_t cpu_callin_map;
   cpumask_t cpu_callout_map;
- cpumask_t cpu_possible_map;
- EXPORT_SYMBOL(cpu_possible_map);
   
   /* representing HT siblings of each logical CPU */
   DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
@@@ -288,7 -282,7 +282,7 @@@ static int __cpuinitdata unsafe_smp
   /*
    * Activate a secondary processor.
    */
- -static void __cpuinit start_secondary(void *unused)
+ +notrace static void __cpuinit start_secondary(void *unused)
   {
         /*
          * Don't put *anything* before cpu_init(), SMP booting is too
@@@ -1081,10 -1075,8 +1075,10 @@@ static int __init smp_sanity_check(unsi
   #endif
   
         if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
- -              printk(KERN_WARNING "weird, boot CPU (#%d) not listed"
- -                                  "by the BIOS.\n", hard_smp_processor_id());
+ +              printk(KERN_WARNING
+ +                      "weird, boot CPU (#%d) not listed by the BIOS.\n",
+ +                      hard_smp_processor_id());
+ +
                 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
         }
   
@@@ -1260,6 -1252,15 +1254,15 @@@ void __init native_smp_cpus_done(unsign
         check_nmi_watchdog();
   }
   
+ static int __initdata setup_possible_cpus = -1;
+ static int __init _setup_possible_cpus(char *str)
+ {
+       get_option(&str, &setup_possible_cpus);
+       return 0;
+ }
+ early_param("possible_cpus", _setup_possible_cpus);
+ 
+ 
   /*
    * cpu_possible_map should be static, it cannot change as cpu's
    * are onlined, or offlined. The reason is per-cpu data-structures
@@@ -1272,7 -1273,7 +1275,7 @@@
    *
    * Three ways to find out the number of additional hotplug CPUs:
    * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
-  * - The user can overwrite it with additional_cpus=NUM
+  * - The user can overwrite it with possible_cpus=NUM
    * - Otherwise don't reserve additional CPUs.
    * We do this because additional CPUs waste a lot of memory.
    * -AK
@@@ -1285,9 -1286,17 +1288,17 @@@ __init void prefill_possible_map(void
         if (!num_processors)
                 num_processors = 1;
   
-       possible = num_processors + disabled_cpus;
-       if (possible > NR_CPUS)
-               possible = NR_CPUS;
+       if (setup_possible_cpus == -1)
+               possible = num_processors + disabled_cpus;
+       else
+               possible = setup_possible_cpus;
+ 
+       if (possible > CONFIG_NR_CPUS) {
+               printk(KERN_WARNING
+                       "%d Processors exceeds NR_CPUS limit of %d\n",
+                       possible, CONFIG_NR_CPUS);
+               possible = CONFIG_NR_CPUS;
+       }
   
         printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
                 possible, max_t(int, possible - num_processors, 0));
@@@ -1352,7 -1361,7 +1363,7 @@@ void cpu_disable_common(void
         lock_vector_lock();
         remove_cpu_from_maps(cpu);
         unlock_vector_lock();
-       fixup_irqs(cpu_online_map);
+       fixup_irqs();
   }
   
   int native_cpu_disable(void)
diff --combined arch/x86/kernel/tlb_32.c

index 8da059f949be9527e1ce7919bfff54550a65c714,174ea90d1cbd5cd281b0c0eac6bf2ebb3e82bcfe..ce505464224758c650a34f153a17fff8003e267c
--- 1/arch/x86/kernel/tlb_32.c
--- 2/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@@ -34,8 -34,9 +34,8 @@@ static DEFINE_SPINLOCK(tlbstate_lock)
    */
   void leave_mm(int cpu)
   {
- -      if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
- -              BUG();
- -      cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
+ +      BUG_ON(x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK);
+ +      cpu_clear(cpu, x86_read_percpu(cpu_tlbstate.active_mm)->cpu_vm_mask);
         load_cr3(swapper_pg_dir);
   }
   EXPORT_SYMBOL_GPL(leave_mm);
@@@ -103,8 -104,8 +103,8 @@@ void smp_invalidate_interrupt(struct pt
                  * BUG();
                  */
   
- -      if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
- -              if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
+ +      if (flush_mm == x86_read_percpu(cpu_tlbstate.active_mm)) {
+ +              if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) {
                         if (flush_va == TLB_FLUSH_ALL)
                                 local_flush_tlb();
                         else
@@@ -118,7 -119,7 +118,7 @@@
         smp_mb__after_clear_bit();
   out:
         put_cpu_no_resched();
- -      __get_cpu_var(irq_stat).irq_tlb_count++;
+ +      inc_irq_stat(irq_tlb_count);
   }
   
   void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
@@@ -163,7 -164,7 +163,7 @@@
          * We have to send the IPI only to
          * CPUs affected.
          */
-       send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
+       send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR);
   
         while (!cpus_empty(flush_cpumask))
                 /* nothing. lockup detection does not belong here */
@@@ -237,7 -238,7 +237,7 @@@ static void do_flush_tlb_all(void *info
         unsigned long cpu = smp_processor_id();
   
         __flush_tlb_all();
- -      if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
+ +      if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_LAZY)
                 leave_mm(cpu);
   }
   
diff --combined arch/x86/kernel/tlb_64.c

index 29887d7081a9572557e5ae30f630f8c2a3ab3321,de6f1bda0c50e522fd05f8d40b4ce27d445f4ea0..f8be6f1d2e48645c35d883479d6072418d4d2d26
--- 1/arch/x86/kernel/tlb_64.c
--- 2/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@@ -154,7 -154,7 +154,7 @@@ asmlinkage void smp_invalidate_interrup
   out:
         ack_APIC_irq();
         cpu_clear(cpu, f->flush_cpumask);
- -      add_pda(irq_tlb_count, 1);
+ +      inc_irq_stat(irq_tlb_count);
   }
   
   void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
@@@ -191,7 -191,7 +191,7 @@@
          * We have to send the IPI only to
          * CPUs affected.
          */
-       send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender);
+       send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender);
   
         while (!cpus_empty(f->flush_cpumask))
                 cpu_relax();
diff --combined arch/x86/kernel/traps.c

index 141907ab6e2260adaa2f20cbcdc6747751184c64,4a6dff39a470a82d3f7ac16bde81623927b504f2..2d1f4c7e40524ba7c3d0847583f0b5041ccdba30
--- 1/arch/x86/kernel/traps.c
--- 2/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@@ -72,9 -72,6 +72,6 @@@
   
   #include "cpu/mcheck/mce.h"
   
- DECLARE_BITMAP(used_vectors, NR_VECTORS);
- EXPORT_SYMBOL_GPL(used_vectors);
- 
   asmlinkage int system_call(void);
   
   /* Do we ignore FPU interrupts ? */
@@@ -89,6 -86,9 +86,9 @@@ gate_desc idt_table[256
         __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
   #endif
   
+ DECLARE_BITMAP(used_vectors, NR_VECTORS);
+ EXPORT_SYMBOL_GPL(used_vectors);
+ 
   static int ignore_nmis;
   
   static inline void conditional_sti(struct pt_regs *regs)
@@@ -481,7 -481,11 +481,7 @@@ do_nmi(struct pt_regs *regs, long error
   {
         nmi_enter();
   
- -#ifdef CONFIG_X86_32
- -      { int cpu; cpu = smp_processor_id(); ++nmi_count(cpu); }
- -#else
- -      add_pda(__nmi_count, 1);
- -#endif
+ +      inc_irq_stat(__nmi_count);
   
         if (!ignore_nmis)
                 default_do_nmi(regs);
@@@ -660,7 -664,7 +660,7 @@@ void math_error(void __user *ip
   {
         struct task_struct *task;
         siginfo_t info;
- -      unsigned short cwd, swd;
+ +      unsigned short cwd, swd, err;
   
         /*
          * Save the info for the exception handler and clear the error.
@@@ -671,6 -675,7 +671,6 @@@
         task->thread.error_code = 0;
         info.si_signo = SIGFPE;
         info.si_errno = 0;
- -      info.si_code = __SI_FAULT;
         info.si_addr = ip;
         /*
          * (~cwd & swd) will mask out exceptions that are not set to unmasked
@@@ -684,31 -689,34 +684,31 @@@
          */
         cwd = get_fpu_cwd(task);
         swd = get_fpu_swd(task);
- -      switch (swd & ~cwd & 0x3f) {
- -      case 0x000: /* No unmasked exception */
+ +
+ +      err = swd & ~cwd & 0x3f;
+ +
   #ifdef CONFIG_X86_32
+ +      if (!err)
                 return;
   #endif
- -      default: /* Multiple exceptions */
- -              break;
- -      case 0x001: /* Invalid Op */
+ +
+ +      if (err & 0x001) {      /* Invalid op */
                 /*
                  * swd & 0x240 == 0x040: Stack Underflow
                  * swd & 0x240 == 0x240: Stack Overflow
                  * User must clear the SF bit (0x40) if set
                  */
                 info.si_code = FPE_FLTINV;
- -              break;
- -      case 0x002: /* Denormalize */
- -      case 0x010: /* Underflow */
- -              info.si_code = FPE_FLTUND;
- -              break;
- -      case 0x004: /* Zero Divide */
+ +      } else if (err & 0x004) { /* Divide by Zero */
                 info.si_code = FPE_FLTDIV;
- -              break;
- -      case 0x008: /* Overflow */
+ +      } else if (err & 0x008) { /* Overflow */
                 info.si_code = FPE_FLTOVF;
- -              break;
- -      case 0x020: /* Precision */
+ +      } else if (err & 0x012) { /* Denormal, Underflow */
+ +              info.si_code = FPE_FLTUND;
+ +      } else if (err & 0x020) { /* Precision */
                 info.si_code = FPE_FLTRES;
- -              break;
+ +      } else {
+ +              info.si_code = __SI_FAULT|SI_KERNEL; /* WTF? */
         }
         force_sig_info(SIGFPE, &info, task);
   }
@@@ -941,9 -949,7 +941,7 @@@ dotraplinkage void do_iret_error(struc
   
   void __init trap_init(void)
   {
- #ifdef CONFIG_X86_32
         int i;
- #endif
   
   #ifdef CONFIG_EISA
         void __iomem *p = early_ioremap(0x0FFFD9, 4);
@@@ -1000,11 -1006,15 +998,15 @@@
         }
   
         set_system_trap_gate(SYSCALL_VECTOR, &system_call);
+ #endif
   
         /* Reserve all the builtin and the syscall vector: */
         for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
                 set_bit(i, used_vectors);
   
+ #ifdef CONFIG_X86_64
+       set_bit(IA32_SYSCALL_VECTOR, used_vectors);
+ #else
         set_bit(SYSCALL_VECTOR, used_vectors);
   #endif
         /*
diff --combined arch/x86/lguest/boot.c

index 50a779264bb18fabcf21e3434f58a7963b1c691d,104c8220a383f0c736abf536ee56fae735ec3b2d..a7ed208f81e3e4bbb5ff4e3447d844dc77cd3371
--- 1/arch/x86/lguest/boot.c
--- 2/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@@ -590,8 -590,7 +590,8 @@@ static void __init lguest_init_IRQ(void
                  * a straightforward 1 to 1 mapping, so force that here. */
                 __get_cpu_var(vector_irq)[vector] = i;
                 if (vector != SYSCALL_VECTOR) {
- -                      set_intr_gate(vector, interrupt[vector]);
+ +                      set_intr_gate(vector,
+ +                                    interrupt[vector-FIRST_EXTERNAL_VECTOR]);
                         set_irq_chip_and_handler_name(i, &lguest_irq_controller,
                                                       handle_level_irq,
                                                       "level");
@@@ -738,7 -737,7 +738,7 @@@ static void lguest_time_init(void
   
         /* We can't set cpumask in the initializer: damn C limitations!  Set it
          * here and register our timer device. */
-       lguest_clockevent.cpumask = cpumask_of_cpu(0);
+       lguest_clockevent.cpumask = cpumask_of(0);
         clockevents_register_device(&lguest_clockevent);
   
         /* Finally, we unblock the timer interrupt. */
diff --combined arch/x86/xen/mmu.c

index 773d68d3e9128eba414a31ae3fa3dbc989033d72,e59e53b11e2b3d725a6920d2b606cfc05805f705..503c240e26c73539c2d4061f0d186b92a7c20c83
--- 1/arch/x86/xen/mmu.c
--- 2/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@@ -154,13 -154,13 +154,13 @@@ void xen_setup_mfn_list_list(void
   {
         unsigned pfn, idx;
   
- -      for(pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) {
+ +      for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) {
                 unsigned topidx = p2m_top_index(pfn);
   
                 p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]);
         }
   
- -      for(idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) {
+ +      for (idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) {
                 unsigned topidx = idx * P2M_ENTRIES_PER_PAGE;
                 p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]);
         }
@@@ -179,7 -179,7 +179,7 @@@ void __init xen_build_dynamic_phys_to_m
         unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
         unsigned pfn;
   
- -      for(pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) {
+ +      for (pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) {
                 unsigned topidx = p2m_top_index(pfn);
   
                 p2m_top[topidx] = &mfn_list[pfn];
@@@ -207,7 -207,7 +207,7 @@@ static void alloc_p2m(unsigned long **p
         p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
         BUG_ON(p == NULL);
   
- -      for(i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
+ +      for (i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
                 p[i] = INVALID_P2M_ENTRY;
   
         if (cmpxchg(pp, p2m_missing, p) != p2m_missing)
@@@ -407,8 -407,7 +407,8 @@@ out
                 preempt_enable();
   }
   
- -pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+ +pte_t xen_ptep_modify_prot_start(struct mm_struct *mm,
+ +                               unsigned long addr, pte_t *ptep)
   {
         /* Just return the pte as-is.  We preserve the bits on commit */
         return *ptep;
@@@ -879,8 -878,7 +879,8 @@@ static void __xen_pgd_pin(struct mm_str
   
                 if (user_pgd) {
                         xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD);
- -                      xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(user_pgd)));
+ +                      xen_do_pin(MMUEXT_PIN_L4_TABLE,
+ +                                 PFN_DOWN(__pa(user_pgd)));
                 }
         }
   #else /* CONFIG_X86_32 */
@@@ -995,8 -993,7 +995,8 @@@ static void __xen_pgd_unpin(struct mm_s
                 pgd_t *user_pgd = xen_get_user_pgd(pgd);
   
                 if (user_pgd) {
- -                      xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(user_pgd)));
+ +                      xen_do_pin(MMUEXT_UNPIN_TABLE,
+ +                                 PFN_DOWN(__pa(user_pgd)));
                         xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD);
                 }
         }
@@@ -1082,7 -1079,7 +1082,7 @@@ static void drop_other_mm_ref(void *inf
   
   static void xen_drop_mm_ref(struct mm_struct *mm)
   {
-       cpumask_t mask;
+       cpumask_var_t mask;
         unsigned cpu;
   
         if (current->active_mm == mm) {
@@@ -1094,7 -1091,16 +1094,16 @@@
         }
   
         /* Get the "official" set of cpus referring to our pagetable. */
-       mask = mm->cpu_vm_mask;
+       if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) {
+               for_each_online_cpu(cpu) {
+                       if (!cpumask_test_cpu(cpu, &mm->cpu_vm_mask)
+                           && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
+                               continue;
+                       smp_call_function_single(cpu, drop_other_mm_ref, mm, 1);
+               }
+               return;
+       }
+       cpumask_copy(mask, &mm->cpu_vm_mask);
   
         /* It's possible that a vcpu may have a stale reference to our
            cr3, because its in lazy mode, and it hasn't yet flushed
@@@ -1103,11 -1109,12 +1112,12 @@@
            if needed. */
         for_each_online_cpu(cpu) {
                 if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
-                       cpu_set(cpu, mask);
+                       cpumask_set_cpu(cpu, mask);
         }
   
-       if (!cpus_empty(mask))
-               smp_call_function_mask(mask, drop_other_mm_ref, mm, 1);
+       if (!cpumask_empty(mask))
+               smp_call_function_many(mask, drop_other_mm_ref, mm, 1);
+       free_cpumask_var(mask);
   }
   #else
   static void xen_drop_mm_ref(struct mm_struct *mm)
diff --combined drivers/xen/events.c

index e26733a9df21d5ca583ad09bea26dfea909873da,6c8193046e0de5f463b44fd1737a8c284cb4c895..eb0dfdeaa9494ac3043f13db5cbd1bf0c0dd5751
--- 1/drivers/xen/events.c
--- 2/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@@ -142,6 -142,9 +142,6 @@@ static void init_evtchn_cpu_bindings(vo
   
         /* By default all event channels notify CPU#0. */
         for_each_irq_desc(i, desc) {
- -              if (!desc)
- -                      continue;
- -
                 desc->affinity = cpumask_of_cpu(0);
         }
   #endif
@@@ -230,7 -233,6 +230,7 @@@ static void unmask_evtchn(int port
   static int find_unbound_irq(void)
   {
         int irq;
+ +      struct irq_desc *desc;
   
         /* Only allocate from dynirq range */
         for (irq = 0; irq < nr_irqs; irq++)
@@@ -240,10 -242,6 +240,10 @@@
         if (irq == nr_irqs)
                 panic("No available IRQ to bind to: increase nr_irqs!\n");
   
+ +      desc = irq_to_desc_alloc_cpu(irq, 0);
+ +      if (WARN_ON(desc == NULL))
+ +              return -1;
+ +
         return irq;
   }
   
@@@ -585,7 -583,7 +585,7 @@@ void rebind_evtchn_irq(int evtchn, int 
         spin_unlock(&irq_mapping_update_lock);
   
         /* new event channels are always bound to cpu 0 */
-       irq_set_affinity(irq, cpumask_of_cpu(0));
+       irq_set_affinity(irq, cpumask_of(0));
   
         /* Unmask the event channel. */
         enable_irq(irq);
@@@ -614,9 -612,9 +614,9 @@@ static void rebind_irq_to_cpu(unsigned 
   }
   
   
- static void set_affinity_irq(unsigned irq, cpumask_t dest)
+ static void set_affinity_irq(unsigned irq, const struct cpumask *dest)
   {
-       unsigned tcpu = first_cpu(dest);
+       unsigned tcpu = cpumask_first(dest);
         rebind_irq_to_cpu(irq, tcpu);
   }
   
diff --combined include/linux/interrupt.h

index 8cc8ef47f5b63ac3f7460214bdebe346a9b0de7d,7e85a6e89e417f551d8e966fa59827dab19f3d28..990355fbc54ee64004f7a2150552b2db2d1e6682
--- 1/include/linux/interrupt.h
--- 2/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@@ -111,13 -111,13 +111,13 @@@ extern void enable_irq(unsigned int irq
   
   extern cpumask_t irq_default_affinity;
   
- extern int irq_set_affinity(unsigned int irq, cpumask_t cpumask);
+ extern int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask);
   extern int irq_can_set_affinity(unsigned int irq);
   extern int irq_select_affinity(unsigned int irq);
   
   #else /* CONFIG_SMP */
   
- static inline int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
+ static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m)
   {
         return -EINVAL;
   }
@@@ -253,6 -253,9 +253,6 @@@ enu
         BLOCK_SOFTIRQ,
         TASKLET_SOFTIRQ,
         SCHED_SOFTIRQ,
- -#ifdef CONFIG_HIGH_RES_TIMERS
- -      HRTIMER_SOFTIRQ,
- -#endif
         RCU_SOFTIRQ,    /* Preferable RCU should always be the last softirq */
   
         NR_SOFTIRQS
@@@ -464,10 -467,4 +464,10 @@@ static inline void init_irq_proc(void
   
   int show_interrupts(struct seq_file *p, void *v);
   
+ +struct irq_desc;
+ +
+ +extern int early_irq_init(void);
+ +extern int arch_early_irq_init(void);
+ +extern int arch_init_chip_data(struct irq_desc *desc, int cpu);
+ +
   #endif
diff --combined include/linux/irq.h

index d64a6d49bdef0c9d194db7bb555d6718de4a4157,fde5e613201829ace74f2ba384171143906e4e85..f899b502f18622dce2c509aac5c656ef8150216f
--- 1/include/linux/irq.h
--- 2/include/linux/irq.h
+++ b/include/linux/irq.h
@@@ -113,7 -113,8 +113,8 @@@ struct irq_chip 
         void            (*eoi)(unsigned int irq);
   
         void            (*end)(unsigned int irq);
-       void            (*set_affinity)(unsigned int irq, cpumask_t dest);
+       void            (*set_affinity)(unsigned int irq,
+                                       const struct cpumask *dest);
         int             (*retrigger)(unsigned int irq);
         int             (*set_type)(unsigned int irq, unsigned int flow_type);
         int             (*set_wake)(unsigned int irq, unsigned int on);
@@@ -134,9 -135,6 +135,9 @@@ struct irq_2_iommu
   /**
    * struct irq_desc - interrupt descriptor
    * @irq:              interrupt number for this descriptor
+ + * @timer_rand_state: pointer to timer rand state struct
+ + * @kstat_irqs:               irq stats per cpu
+ + * @irq_2_iommu:      iommu with this irq
    * @handle_irq:               highlevel irq-events handler [if NULL, __do_IRQ()]
    * @chip:             low level interrupt hardware access
    * @msi_desc:         MSI descriptor
@@@ -148,8 -146,8 +149,8 @@@
    * @depth:            disable-depth, for nested irq_disable() calls
    * @wake_depth:               enable depth, for multiple set_irq_wake() callers
    * @irq_count:                stats field to detect stalled irqs
- - * @irqs_unhandled:   stats field for spurious unhandled interrupts
    * @last_unhandled:   aging timer for unhandled count
+ + * @irqs_unhandled:   stats field for spurious unhandled interrupts
    * @lock:             locking for SMP
    * @affinity:         IRQ affinity on SMP
    * @cpu:              cpu index useful for balancing
@@@ -177,8 -175,8 +178,8 @@@ struct irq_desc 
         unsigned int            depth;          /* nested irq disables */
         unsigned int            wake_depth;     /* nested wake enables */
         unsigned int            irq_count;      /* For detecting broken IRQs */
- -      unsigned int            irqs_unhandled;
         unsigned long           last_unhandled; /* Aging timer for unhandled count */
+ +      unsigned int            irqs_unhandled;
         spinlock_t              lock;
   #ifdef CONFIG_SMP
         cpumask_t               affinity;
@@@ -193,23 -191,42 +194,23 @@@
         const char              *name;
   } ____cacheline_internodealigned_in_smp;
   
- -extern void early_irq_init(void);
- -extern void arch_early_irq_init(void);
- -extern void arch_init_chip_data(struct irq_desc *desc, int cpu);
   extern void arch_init_copy_chip_data(struct irq_desc *old_desc,
                                         struct irq_desc *desc, int cpu);
   extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc);
   
   #ifndef CONFIG_SPARSE_IRQ
   extern struct irq_desc irq_desc[NR_IRQS];
- -
- -static inline struct irq_desc *irq_to_desc(unsigned int irq)
- -{
- -      return (irq < NR_IRQS) ? irq_desc + irq : NULL;
- -}
- -static inline struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
- -{
- -      return irq_to_desc(irq);
- -}
- -
- -#else
- -
- -extern struct irq_desc *irq_to_desc(unsigned int irq);
- -extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu);
+ +#else /* CONFIG_SPARSE_IRQ */
   extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu);
   
- -# define for_each_irq_desc(irq, desc)         \
- -      for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; irq++, desc = irq_to_desc(irq))
- -# define for_each_irq_desc_reverse(irq, desc)                          \
- -      for (irq = nr_irqs - 1, desc = irq_to_desc(irq); irq >= 0; irq--, desc = irq_to_desc(irq))
- -
   #define kstat_irqs_this_cpu(DESC) \
         ((DESC)->kstat_irqs[smp_processor_id()])
   #define kstat_incr_irqs_this_cpu(irqno, DESC) \
         ((DESC)->kstat_irqs[smp_processor_id()]++)
   
- -#endif
+ +#endif /* CONFIG_SPARSE_IRQ */
+ +
+ +extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu);
   
   static inline struct irq_desc *
   irq_remap_to_desc(unsigned int irq, struct irq_desc *desc)
diff --combined include/linux/sched.h

index 8395e715809d382bb7f3a5ed7087173e99cd15d2,e5f928a079e835ae801e9d3b08d1aa40a0efce15..158d53d07765888af5c2e7bdc4515373983d930e
--- 1/include/linux/sched.h
--- 2/include/linux/sched.h
+++ b/include/linux/sched.h
@@@ -250,7 -250,7 +250,7 @@@ extern void init_idle_bootup_task(struc
   extern int runqueue_is_locked(void);
   extern void task_rq_unlock_wait(struct task_struct *p);
   
- extern cpumask_t nohz_cpu_mask;
+ extern cpumask_var_t nohz_cpu_mask;
   #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
   extern int select_nohz_load_balancer(int cpu);
   #else
@@@ -571,6 -571,12 +571,6 @@@ struct signal_struct 
          */
         struct rlimit rlim[RLIM_NLIMITS];
   
- -      /* keep the process-shared keyrings here so that they do the right
- -       * thing in threads created with CLONE_THREAD */
- -#ifdef CONFIG_KEYS
- -      struct key *session_keyring;    /* keyring inherited over fork */
- -      struct key *process_keyring;    /* keyring private to this process */
- -#endif
   #ifdef CONFIG_BSD_PROCESS_ACCT
         struct pacct_struct pacct;      /* per-process accounting information */
   #endif
@@@ -641,7 -647,6 +641,7 @@@ struct user_struct 
         /* Hash table maintenance information */
         struct hlist_node uidhash_node;
         uid_t uid;
+ +      struct user_namespace *user_ns;
   
   #ifdef CONFIG_USER_SCHED
         struct task_group *tg;
@@@ -659,7 -664,6 +659,7 @@@ extern struct user_struct *find_user(ui
   extern struct user_struct root_user;
   #define INIT_USER (&root_user)
   
+ +
   struct backing_dev_info;
   struct reclaim_state;
   
@@@ -667,7 -671,8 +667,7 @@@
   struct sched_info {
         /* cumulative counters */
         unsigned long pcount;         /* # of times run on this cpu */
- -      unsigned long long cpu_time,  /* time spent on the cpu */
- -                         run_delay; /* time spent waiting on a runqueue */
+ +      unsigned long long run_delay; /* time spent waiting on a runqueue */
   
         /* timestamps */
         unsigned long long last_arrival,/* when we last ran on a cpu */
@@@ -758,20 -763,51 +758,51 @@@ enum cpu_idle_type 
   #define SD_SERIALIZE          1024    /* Only a single load balancing instance */
   #define SD_WAKE_IDLE_FAR      2048    /* Gain latency sacrificing cache hit */
   
- #define BALANCE_FOR_MC_POWER  \
-       (sched_smt_power_savings ? SD_POWERSAVINGS_BALANCE : 0)
+ enum powersavings_balance_level {
+       POWERSAVINGS_BALANCE_NONE = 0,  /* No power saving load balance */
+       POWERSAVINGS_BALANCE_BASIC,     /* Fill one thread/core/package
+                                        * first for long running threads
+                                        */
+       POWERSAVINGS_BALANCE_WAKEUP,    /* Also bias task wakeups to semi-idle
+                                        * cpu package for power savings
+                                        */
+       MAX_POWERSAVINGS_BALANCE_LEVELS
+ };
   
- #define BALANCE_FOR_PKG_POWER \
-       ((sched_mc_power_savings || sched_smt_power_savings) ?  \
-        SD_POWERSAVINGS_BALANCE : 0)
+ extern int sched_mc_power_savings, sched_smt_power_savings;
   
- #define test_sd_parent(sd, flag)      ((sd->parent &&         \
-                                        (sd->parent->flags & flag)) ? 1 : 0)
+ static inline int sd_balance_for_mc_power(void)
+ {
+       if (sched_smt_power_savings)
+               return SD_POWERSAVINGS_BALANCE;
   
+       return 0;
+ }
+ 
+ static inline int sd_balance_for_package_power(void)
+ {
+       if (sched_mc_power_savings | sched_smt_power_savings)
+               return SD_POWERSAVINGS_BALANCE;
+ 
+       return 0;
+ }
+ 
+ /*
+  * Optimise SD flags for power savings:
+  * SD_BALANCE_NEWIDLE helps agressive task consolidation and power savings.
+  * Keep default SD flags if sched_{smt,mc}_power_saving=0
+  */
+ 
+ static inline int sd_power_saving_flags(void)
+ {
+       if (sched_mc_power_savings | sched_smt_power_savings)
+               return SD_BALANCE_NEWIDLE;
+ 
+       return 0;
+ }
   
   struct sched_group {
         struct sched_group *next;       /* Must be a circular list */
-       cpumask_t cpumask;
   
         /*
          * CPU power of this group, SCHED_LOAD_SCALE being max power for a
@@@ -784,8 -820,15 +815,15 @@@
          * (see include/linux/reciprocal_div.h)
          */
         u32 reciprocal_cpu_power;
+ 
+       unsigned long cpumask[];
   };
   
+ static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
+ {
+       return to_cpumask(sg->cpumask);
+ }
+ 
   enum sched_domain_level {
         SD_LV_NONE = 0,
         SD_LV_SIBLING,
@@@ -809,7 -852,6 +847,6 @@@ struct sched_domain 
         struct sched_domain *parent;    /* top domain must be null terminated */
         struct sched_domain *child;     /* bottom domain must be null terminated */
         struct sched_group *groups;     /* the balancing groups of the domain */
-       cpumask_t span;                 /* span of all CPUs in this domain */
         unsigned long min_interval;     /* Minimum balance interval ms */
         unsigned long max_interval;     /* Maximum balance interval ms */
         unsigned int busy_factor;       /* less balancing by factor if busy */
@@@ -864,25 -906,73 +901,42 @@@
   #ifdef CONFIG_SCHED_DEBUG
         char *name;
   #endif
+ 
+       /* span of all CPUs in this domain */
+       unsigned long span[];
   };
   
- extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
+ static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
+ {
+       return to_cpumask(sd->span);
+ }
+ 
+ extern void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
                                     struct sched_domain_attr *dattr_new);
   extern int arch_reinit_sched_domains(void);
   
+ /* Test a flag in parent sched domain */
+ static inline int test_sd_parent(struct sched_domain *sd, int flag)
+ {
+       if (sd->parent && (sd->parent->flags & flag))
+               return 1;
+ 
+       return 0;
+ }
+ 
   #else /* CONFIG_SMP */
   
   struct sched_domain_attr;
   
   static inline void
- partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
+ partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
                         struct sched_domain_attr *dattr_new)
   {
   }
   #endif        /* !CONFIG_SMP */
   
   struct io_context;                    /* See blkdev.h */
- -#define NGROUPS_SMALL         32
- -#define NGROUPS_PER_BLOCK     ((unsigned int)(PAGE_SIZE / sizeof(gid_t)))
- -struct group_info {
- -      int ngroups;
- -      atomic_t usage;
- -      gid_t small_block[NGROUPS_SMALL];
- -      int nblocks;
- -      gid_t *blocks[0];
- -};
   
- -/*
- - * get_group_info() must be called with the owning task locked (via task_lock())
- - * when task != current.  The reason being that the vast majority of callers are
- - * looking at current->group_info, which can not be changed except by the
- - * current task.  Changing current->group_info requires the task lock, too.
- - */
- -#define get_group_info(group_info) do { \
- -      atomic_inc(&(group_info)->usage); \
- -} while (0)
- -
- -#define put_group_info(group_info) do { \
- -      if (atomic_dec_and_test(&(group_info)->usage)) \
- -              groups_free(group_info); \
- -} while (0)
- -
- -extern struct group_info *groups_alloc(int gidsetsize);
- -extern void groups_free(struct group_info *group_info);
- -extern int set_current_groups(struct group_info *group_info);
- -extern int groups_search(struct group_info *group_info, gid_t grp);
- -/* access the groups "array" with this macro */
- -#define GROUP_AT(gi, i) \
- -    ((gi)->blocks[(i)/NGROUPS_PER_BLOCK][(i)%NGROUPS_PER_BLOCK])
   
   #ifdef ARCH_HAS_PREFETCH_SWITCH_STACK
   extern void prefetch_stack(struct task_struct *t);
@@@ -926,7 -1016,7 +980,7 @@@ struct sched_class 
         void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
   
         void (*set_cpus_allowed)(struct task_struct *p,
-                                const cpumask_t *newmask);
+                                const struct cpumask *newmask);
   
         void (*rq_online)(struct rq *rq);
         void (*rq_offline)(struct rq *rq);
@@@ -1138,7 -1228,6 +1192,7 @@@ struct task_struct 
          * The buffer to hold the BTS data.
          */
         void *bts_buffer;
+ +      size_t bts_size;
   #endif /* CONFIG_X86_PTRACE_BTS */
   
         /* PID/PID hash table linkage. */
@@@ -1162,12 -1251,17 +1216,12 @@@
         struct list_head cpu_timers[3];
   
   /* process credentials */
- -      uid_t uid,euid,suid,fsuid;
- -      gid_t gid,egid,sgid,fsgid;
- -      struct group_info *group_info;
- -      kernel_cap_t   cap_effective, cap_inheritable, cap_permitted, cap_bset;
- -      struct user_struct *user;
- -      unsigned securebits;
- -#ifdef CONFIG_KEYS
- -      unsigned char jit_keyring;      /* default keyring to attach requested keys to */
- -      struct key *request_key_auth;   /* assumed request_key authority */
- -      struct key *thread_keyring;     /* keyring private to this thread */
- -#endif
+ +      const struct cred *real_cred;   /* objective and real subjective task
+ +                                       * credentials (COW) */
+ +      const struct cred *cred;        /* effective (overridable) subjective task
+ +                                       * credentials (COW) */
+ +      struct mutex cred_exec_mutex;   /* execve vs ptrace cred calculation mutex */
+ +
         char comm[TASK_COMM_LEN]; /* executable name excluding path
                                      - access with [gs]et_task_comm (which lock
                                        it with task_lock())
@@@ -1204,6 -1298,9 +1258,6 @@@
         int (*notifier)(void *priv);
         void *notifier_data;
         sigset_t *notifier_mask;
- -#ifdef CONFIG_SECURITY
- -      void *security;
- -#endif
         struct audit_context *audit_context;
   #ifdef CONFIG_AUDITSYSCALL
         uid_t loginuid;
@@@ -1579,12 -1676,12 +1633,12 @@@ extern cputime_t task_gtime(struct task
   
   #ifdef CONFIG_SMP
   extern int set_cpus_allowed_ptr(struct task_struct *p,
-                               const cpumask_t *new_mask);
+                               const struct cpumask *new_mask);
   #else
   static inline int set_cpus_allowed_ptr(struct task_struct *p,
-                                      const cpumask_t *new_mask)
+                                      const struct cpumask *new_mask)
   {
-       if (!cpu_isset(0, *new_mask))
+       if (!cpumask_test_cpu(0, new_mask))
                 return -EINVAL;
         return 0;
   }
@@@ -1760,6 -1857,7 +1814,6 @@@ static inline struct user_struct *get_u
         return u;
   }
   extern void free_uid(struct user_struct *);
- -extern void switch_uid(struct user_struct *);
   extern void release_uids(struct user_namespace *ns);
   
   #include <asm/current.h>
@@@ -1778,6 -1876,9 +1832,6 @@@ extern void wake_up_new_task(struct tas
   extern void sched_fork(struct task_struct *p, int clone_flags);
   extern void sched_dead(struct task_struct *p);
   
- -extern int in_group_p(gid_t);
- -extern int in_egroup_p(gid_t);
- -
   extern void proc_caches_init(void);
   extern void flush_signals(struct task_struct *);
   extern void ignore_signals(struct task_struct *);
@@@ -1909,8 -2010,6 +1963,8 @@@ static inline unsigned long wait_task_i
   #define for_each_process(p) \
         for (p = &init_task ; (p = next_task(p)) != &init_task ; )
   
+ +extern bool is_single_threaded(struct task_struct *);
+ +
   /*
    * Careful: do_each_thread/while_each_thread is a double loop so
    *          'break' will not work as expected - use goto instead.
@@@ -2195,10 -2294,8 +2249,8 @@@ __trace_special(void *__tr, void *__dat
   }
   #endif
   
- extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask);
- extern long sched_getaffinity(pid_t pid, cpumask_t *mask);
- 
- extern int sched_mc_power_savings, sched_smt_power_savings;
+ extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
+ extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
   
   extern void normalize_rt_tasks(void);
   
diff --combined init/Kconfig

index 13627191a60d194de08aaa4b410aa752cfd7cb21,b3782c6d5ede57f94446de710213a2b20deed283..f6281711166d5dbeba4f55121547396d00b6ff2e
--- 1/init/Kconfig
--- 2/init/Kconfig
+++ b/init/Kconfig
@@@ -588,13 -588,6 +588,13 @@@ config KALLSYMS_AL
   
            Say N.
   
+ +config KALLSYMS_STRIP_GENERATED
+ +      bool "Strip machine generated symbols from kallsyms"
+ +      depends on KALLSYMS_ALL
+ +      default y
+ +      help
+ +        Say N if you want kallsyms to retain even machine generated symbols.
+ +
   config KALLSYMS_EXTRA_PASS
         bool "Do an extra kallsyms pass"
         depends on KALLSYMS
@@@ -924,6 -917,15 +924,15 @@@ config KMO
   
   endif # MODULES
   
+ config INIT_ALL_POSSIBLE
+       bool
+       help
+         Back when each arch used to define their own cpu_online_map and
+         cpu_possible_map, some of them chose to initialize cpu_possible_map
+         with all 1s, and others with all 0s.  When they were centralised,
+         it was better to provide this option than to break all the archs
+         and have several arch maintainers persuing me down dark alleys.
+ 
   config STOP_MACHINE
         bool
         default y
@@@ -936,90 -938,10 +945,90 @@@ source "block/Kconfig
   config PREEMPT_NOTIFIERS
         bool
   
+ +choice
+ +      prompt "RCU Implementation"
+ +      default CLASSIC_RCU
+ +
   config CLASSIC_RCU
- -      def_bool !PREEMPT_RCU
+ +      bool "Classic RCU"
         help
           This option selects the classic RCU implementation that is
           designed for best read-side performance on non-realtime
- -        systems.  Classic RCU is the default.  Note that the
- -        PREEMPT_RCU symbol is used to select/deselect this option.
+ +        systems.
+ +
+ +        Select this option if you are unsure.
+ +
+ +config TREE_RCU
+ +      bool "Tree-based hierarchical RCU"
+ +      help
+ +        This option selects the RCU implementation that is
+ +        designed for very large SMP system with hundreds or
+ +        thousands of CPUs.
+ +
+ +config PREEMPT_RCU
+ +      bool "Preemptible RCU"
+ +      depends on PREEMPT
+ +      help
+ +        This option reduces the latency of the kernel by making certain
+ +        RCU sections preemptible. Normally RCU code is non-preemptible, if
+ +        this option is selected then read-only RCU sections become
+ +        preemptible. This helps latency, but may expose bugs due to
+ +        now-naive assumptions about each RCU read-side critical section
+ +        remaining on a given CPU through its execution.
+ +
+ +endchoice
+ +
+ +config RCU_TRACE
+ +      bool "Enable tracing for RCU"
+ +      depends on TREE_RCU || PREEMPT_RCU
+ +      help
+ +        This option provides tracing in RCU which presents stats
+ +        in debugfs for debugging RCU implementation.
+ +
+ +        Say Y here if you want to enable RCU tracing
+ +        Say N if you are unsure.
+ +
+ +config RCU_FANOUT
+ +      int "Tree-based hierarchical RCU fanout value"
+ +      range 2 64 if 64BIT
+ +      range 2 32 if !64BIT
+ +      depends on TREE_RCU
+ +      default 64 if 64BIT
+ +      default 32 if !64BIT
+ +      help
+ +        This option controls the fanout of hierarchical implementations
+ +        of RCU, allowing RCU to work efficiently on machines with
+ +        large numbers of CPUs.  This value must be at least the cube
+ +        root of NR_CPUS, which allows NR_CPUS up to 32,768 for 32-bit
+ +        systems and up to 262,144 for 64-bit systems.
+ +
+ +        Select a specific number if testing RCU itself.
+ +        Take the default if unsure.
+ +
+ +config RCU_FANOUT_EXACT
+ +      bool "Disable tree-based hierarchical RCU auto-balancing"
+ +      depends on TREE_RCU
+ +      default n
+ +      help
+ +        This option forces use of the exact RCU_FANOUT value specified,
+ +        regardless of imbalances in the hierarchy.  This is useful for
+ +        testing RCU itself, and might one day be useful on systems with
+ +        strong NUMA behavior.
+ +
+ +        Without RCU_FANOUT_EXACT, the code will balance the hierarchy.
+ +
+ +        Say N if unsure.
+ +
+ +config TREE_RCU_TRACE
+ +      def_bool RCU_TRACE && TREE_RCU
+ +      select DEBUG_FS
+ +      help
+ +        This option provides tracing for the TREE_RCU implementation,
+ +        permitting Makefile to trivially select kernel/rcutree_trace.c.
+ +
+ +config PREEMPT_RCU_TRACE
+ +      def_bool RCU_TRACE && PREEMPT_RCU
+ +      select DEBUG_FS
+ +      help
+ +        This option provides tracing for the PREEMPT_RCU implementation,
+ +        permitting Makefile to trivially select kernel/rcupreempt_trace.c.
diff --combined kernel/irq/chip.c

index 6eb3c7952b6496fc9c5f8da49b982d79866e1504,b343deedae914b59399a504854b2a97a105742c8..f63c706d25e15f481f61548dd248d1eaf69702bb
--- 1/kernel/irq/chip.c
--- 2/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@@ -46,7 -46,7 +46,7 @@@ void dynamic_irq_init(unsigned int irq
         desc->irq_count = 0;
         desc->irqs_unhandled = 0;
   #ifdef CONFIG_SMP
-       cpus_setall(desc->affinity);
+       cpumask_setall(&desc->affinity);
   #endif
         spin_unlock_irqrestore(&desc->lock, flags);
   }
@@@ -125,7 -125,6 +125,7 @@@ int set_irq_type(unsigned int irq, unsi
                 return -ENODEV;
         }
   
+ +      type &= IRQ_TYPE_SENSE_MASK;
         if (type == IRQ_TYPE_NONE)
                 return 0;
   
diff --combined kernel/irq/manage.c

index 540f6c49f3fa156b2bd0d61ad2c5090ea0e46013,10ad2f87ed9a0cfe6d20844f05231b7f0fd0506a..61c4a9b6216546aac546b4a6ab1427ed4ac2a00a
--- 1/kernel/irq/manage.c
--- 2/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@@ -79,7 -79,7 +79,7 @@@ int irq_can_set_affinity(unsigned int i
    *    @cpumask:       cpumask
    *
    */
- int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
+ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
   {
         struct irq_desc *desc = irq_to_desc(irq);
         unsigned long flags;
@@@ -91,14 -91,14 +91,14 @@@
   
   #ifdef CONFIG_GENERIC_PENDING_IRQ
         if (desc->status & IRQ_MOVE_PCNTXT || desc->status & IRQ_DISABLED) {
-               desc->affinity = cpumask;
+               cpumask_copy(&desc->affinity, cpumask);
                 desc->chip->set_affinity(irq, cpumask);
         } else {
                 desc->status |= IRQ_MOVE_PENDING;
-               desc->pending_mask = cpumask;
+               cpumask_copy(&desc->pending_mask, cpumask);
         }
   #else
-       desc->affinity = cpumask;
+       cpumask_copy(&desc->affinity, cpumask);
         desc->chip->set_affinity(irq, cpumask);
   #endif
         desc->status |= IRQ_AFFINITY_SET;
@@@ -112,26 -112,24 +112,24 @@@
    */
   int do_irq_select_affinity(unsigned int irq, struct irq_desc *desc)
   {
-       cpumask_t mask;
- 
         if (!irq_can_set_affinity(irq))
                 return 0;
   
-       cpus_and(mask, cpu_online_map, irq_default_affinity);
- 
         /*
          * Preserve an userspace affinity setup, but make sure that
          * one of the targets is online.
          */
         if (desc->status & (IRQ_AFFINITY_SET | IRQ_NO_BALANCING)) {
-               if (cpus_intersects(desc->affinity, cpu_online_map))
-                       mask = desc->affinity;
+               if (cpumask_any_and(&desc->affinity, cpu_online_mask)
+                   < nr_cpu_ids)
+                       goto set_affinity;
                 else
                         desc->status &= ~IRQ_AFFINITY_SET;
         }
   
-       desc->affinity = mask;
-       desc->chip->set_affinity(irq, mask);
+       cpumask_and(&desc->affinity, cpu_online_mask, &irq_default_affinity);
+ set_affinity:
+       desc->chip->set_affinity(irq, &desc->affinity);
   
         return 0;
   }
@@@ -370,18 -368,16 +368,18 @@@ int __irq_set_trigger(struct irq_desc *
                 return 0;
         }
   
- -      ret = chip->set_type(irq, flags & IRQF_TRIGGER_MASK);
+ +      /* caller masked out all except trigger mode flags */
+ +      ret = chip->set_type(irq, flags);
   
         if (ret)
                 pr_err("setting trigger mode %d for irq %u failed (%pF)\n",
- -                              (int)(flags & IRQF_TRIGGER_MASK),
- -                              irq, chip->set_type);
+ +                              (int)flags, irq, chip->set_type);
         else {
+ +              if (flags & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH))
+ +                      flags |= IRQ_LEVEL;
                 /* note that IRQF_TRIGGER_MASK == IRQ_TYPE_SENSE_MASK */
- -              desc->status &= ~IRQ_TYPE_SENSE_MASK;
- -              desc->status |= flags & IRQ_TYPE_SENSE_MASK;
+ +              desc->status &= ~(IRQ_LEVEL | IRQ_TYPE_SENSE_MASK);
+ +              desc->status |= flags;
         }
   
         return ret;
@@@ -461,8 -457,7 +459,8 @@@ __setup_irq(unsigned int irq, struct ir
   
                 /* Setup the type (level, edge polarity) if configured: */
                 if (new->flags & IRQF_TRIGGER_MASK) {
- -                      ret = __irq_set_trigger(desc, irq, new->flags);
+ +                      ret = __irq_set_trigger(desc, irq,
+ +                                      new->flags & IRQF_TRIGGER_MASK);
   
                         if (ret) {
                                 spin_unlock_irqrestore(&desc->lock, flags);
@@@ -676,18 -671,6 +674,18 @@@ int request_irq(unsigned int irq, irq_h
         struct irq_desc *desc;
         int retval;
   
+ +      /*
+ +       * handle_IRQ_event() always ignores IRQF_DISABLED except for
+ +       * the _first_ irqaction (sigh).  That can cause oopsing, but
+ +       * the behavior is classified as "will not fix" so we need to
+ +       * start nudging drivers away from using that idiom.
+ +       */
+ +      if ((irqflags & (IRQF_SHARED|IRQF_DISABLED))
+ +                      == (IRQF_SHARED|IRQF_DISABLED))
+ +              pr_warning("IRQ %d/%s: IRQF_DISABLED is not "
+ +                              "guaranteed on shared IRQs\n",
+ +                              irq, devname);
+ +
   #ifdef CONFIG_LOCKDEP
         /*
          * Lockdep wants atomic interrupt handlers:
diff --combined kernel/sched.c

index fff1c4a20b6538966a0cf2b97a012c045d52b84d,756d981d91a40e0e80239ebca62355c62db06d78..27ba1d642f0f0c4c370e81a61b067874310532d0
--- 1/kernel/sched.c
--- 2/kernel/sched.c
+++ b/kernel/sched.c
@@@ -209,6 -209,7 +209,6 @@@ void init_rt_bandwidth(struct rt_bandwi
         hrtimer_init(&rt_b->rt_period_timer,
                         CLOCK_MONOTONIC, HRTIMER_MODE_REL);
         rt_b->rt_period_timer.function = sched_rt_period_timer;
- -      rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
   }
   
   static inline int rt_bandwidth_enabled(void)
@@@ -360,9 -361,7 +360,9 @@@ static inline struct task_group *task_g
         struct task_group *tg;
   
   #ifdef CONFIG_USER_SCHED
- -      tg = p->user->tg;
+ +      rcu_read_lock();
+ +      tg = __task_cred(p)->user->tg;
+ +      rcu_read_unlock();
   #elif defined(CONFIG_CGROUP_SCHED)
         tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id),
                                 struct task_group, css);
@@@ -498,18 -497,26 +498,26 @@@ struct rt_rq 
    */
   struct root_domain {
         atomic_t refcount;
-       cpumask_t span;
-       cpumask_t online;
+       cpumask_var_t span;
+       cpumask_var_t online;
   
         /*
          * The "RT overload" flag: it gets set if a CPU has more than
          * one runnable RT task.
          */
-       cpumask_t rto_mask;
+       cpumask_var_t rto_mask;
         atomic_t rto_count;
   #ifdef CONFIG_SMP
         struct cpupri cpupri;
   #endif
+ #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
+       /*
+        * Preferred wake up cpu nominated by sched_mc balance that will be
+        * used when most cpus are idle in the system indicating overall very
+        * low system utilisation. Triggered at POWERSAVINGS_BALANCE_WAKEUP(2)
+        */
+       unsigned int sched_mc_preferred_wakeup_cpu;
+ #endif
   };
   
   /*
@@@ -603,8 -610,6 +611,8 @@@ struct rq 
   #ifdef CONFIG_SCHEDSTATS
         /* latency stats */
         struct sched_info rq_sched_info;
+ +      unsigned long long rq_cpu_time;
+ +      /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
   
         /* sys_sched_yield() stats */
         unsigned int yld_exp_empty;
@@@ -1138,6 -1143,7 +1146,6 @@@ static void init_rq_hrtick(struct rq *r
   
         hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
         rq->hrtick_timer.function = hrtick;
- -      rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
   }
   #else /* CONFIG_SCHED_HRTICK */
   static inline void hrtick_clear(struct rq *rq)
@@@ -1514,7 -1520,7 +1522,7 @@@ static int tg_shares_up(struct task_gro
         struct sched_domain *sd = data;
         int i;
   
-       for_each_cpu_mask(i, sd->span) {
+       for_each_cpu(i, sched_domain_span(sd)) {
                 /*
                  * If there are currently no tasks on the cpu pretend there
                  * is one of average load so that when a new task gets to
@@@ -1535,7 -1541,7 +1543,7 @@@
         if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE))
                 shares = tg->shares;
   
-       for_each_cpu_mask(i, sd->span)
+       for_each_cpu(i, sched_domain_span(sd))
                 update_group_shares_cpu(tg, i, shares, rq_weight);
   
         return 0;
@@@ -1865,8 -1871,6 +1873,8 @@@ void set_task_cpu(struct task_struct *p
   
         clock_offset = old_rq->clock - new_rq->clock;
   
+ +      trace_sched_migrate_task(p, task_cpu(p), new_cpu);
+ +
   #ifdef CONFIG_SCHEDSTATS
         if (p->se.wait_start)
                 p->se.wait_start -= clock_offset;
@@@ -2101,15 -2105,17 +2109,17 @@@ find_idlest_group(struct sched_domain *
                 int i;
   
                 /* Skip over this group if it has no CPUs allowed */
-               if (!cpus_intersects(group->cpumask, p->cpus_allowed))
+               if (!cpumask_intersects(sched_group_cpus(group),
+                                       &p->cpus_allowed))
                         continue;
   
-               local_group = cpu_isset(this_cpu, group->cpumask);
+               local_group = cpumask_test_cpu(this_cpu,
+                                              sched_group_cpus(group));
   
                 /* Tally up the load of all CPUs in the group */
                 avg_load = 0;
   
-               for_each_cpu_mask_nr(i, group->cpumask) {
+               for_each_cpu(i, sched_group_cpus(group)) {
                         /* Bias balancing toward cpus of our domain */
                         if (local_group)
                                 load = source_load(i, load_idx);
@@@ -2141,17 -2147,14 +2151,14 @@@
    * find_idlest_cpu - find the idlest cpu among the cpus in group.
    */
   static int
- find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu,
-               cpumask_t *tmp)
+ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
   {
         unsigned long load, min_load = ULONG_MAX;
         int idlest = -1;
         int i;
   
         /* Traverse only the allowed CPUs */
-       cpus_and(*tmp, group->cpumask, p->cpus_allowed);
- 
-       for_each_cpu_mask_nr(i, *tmp) {
+       for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) {
                 load = weighted_cpuload(i);
   
                 if (load < min_load || (load == min_load && i == this_cpu)) {
@@@ -2193,7 -2196,6 +2200,6 @@@ static int sched_balance_self(int cpu, 
                 update_shares(sd);
   
         while (sd) {
-               cpumask_t span, tmpmask;
                 struct sched_group *group;
                 int new_cpu, weight;
   
@@@ -2202,14 -2204,13 +2208,13 @@@
                         continue;
                 }
   
-               span = sd->span;
                 group = find_idlest_group(sd, t, cpu);
                 if (!group) {
                         sd = sd->child;
                         continue;
                 }
   
-               new_cpu = find_idlest_cpu(group, t, cpu, &tmpmask);
+               new_cpu = find_idlest_cpu(group, t, cpu);
                 if (new_cpu == -1 || new_cpu == cpu) {
                         /* Now try balancing at a lower domain level of cpu */
                         sd = sd->child;
@@@ -2218,10 -2219,10 +2223,10 @@@
   
                 /* Now try balancing at a lower domain level of new_cpu */
                 cpu = new_cpu;
+               weight = cpumask_weight(sched_domain_span(sd));
                 sd = NULL;
-               weight = cpus_weight(span);
                 for_each_domain(cpu, tmp) {
-                       if (weight <= cpus_weight(tmp->span))
+                       if (weight <= cpumask_weight(sched_domain_span(tmp)))
                                 break;
                         if (tmp->flags & flag)
                                 sd = tmp;
@@@ -2266,7 -2267,7 +2271,7 @@@ static int try_to_wake_up(struct task_s
                 cpu = task_cpu(p);
   
                 for_each_domain(this_cpu, sd) {
-                       if (cpu_isset(cpu, sd->span)) {
+                       if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
                                 update_shares(sd);
                                 break;
                         }
@@@ -2276,7 -2277,6 +2281,7 @@@
   
         smp_wmb();
         rq = task_rq_lock(p, &flags);
+ +      update_rq_clock(rq);
         old_state = p->state;
         if (!(old_state & state))
                 goto out;
@@@ -2315,7 -2315,7 +2320,7 @@@
         else {
                 struct sched_domain *sd;
                 for_each_domain(this_cpu, sd) {
-                       if (cpu_isset(cpu, sd->span)) {
+                       if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
                                 schedstat_inc(sd, ttwu_wake_remote);
                                 break;
                         }
@@@ -2334,11 -2334,12 +2339,11 @@@ out_activate
                 schedstat_inc(p, se.nr_wakeups_local);
         else
                 schedstat_inc(p, se.nr_wakeups_remote);
- -      update_rq_clock(rq);
         activate_task(rq, p, 1);
         success = 1;
   
   out_running:
- -      trace_sched_wakeup(rq, p);
+ +      trace_sched_wakeup(rq, p, success);
         check_preempt_curr(rq, p, sync);
   
         p->state = TASK_RUNNING;
@@@ -2471,7 -2472,7 +2476,7 @@@ void wake_up_new_task(struct task_struc
                 p->sched_class->task_new(rq, p);
                 inc_nr_running(rq);
         }
- -      trace_sched_wakeup_new(rq, p);
+ +      trace_sched_wakeup_new(rq, p, 1);
         check_preempt_curr(rq, p, 0);
   #ifdef CONFIG_SMP
         if (p->sched_class->task_wake_up)
@@@ -2846,10 -2847,11 +2851,10 @@@ static void sched_migrate_task(struct t
         struct rq *rq;
   
         rq = task_rq_lock(p, &flags);
-       if (!cpu_isset(dest_cpu, p->cpus_allowed)
+       if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)
             || unlikely(!cpu_active(dest_cpu)))
                 goto out;
   
- -      trace_sched_migrate_task(rq, p, dest_cpu);
         /* force the process onto the specified CPU */
         if (migrate_task(p, dest_cpu, &req)) {
                 /* Need to wait for migration thread (might exit: take ref). */
@@@ -2911,7 -2913,7 +2916,7 @@@ int can_migrate_task(struct task_struc
          * 2) cannot be migrated to this CPU due to cpus_allowed, or
          * 3) are cache-hot on their current CPU.
          */
-       if (!cpu_isset(this_cpu, p->cpus_allowed)) {
+       if (!cpumask_test_cpu(this_cpu, &p->cpus_allowed)) {
                 schedstat_inc(p, se.nr_failed_migrations_affine);
                 return 0;
         }
@@@ -3086,7 -3088,7 +3091,7 @@@ static int move_one_task(struct rq *thi
   static struct sched_group *
   find_busiest_group(struct sched_domain *sd, int this_cpu,
                    unsigned long *imbalance, enum cpu_idle_type idle,
-                  int *sd_idle, const cpumask_t *cpus, int *balance)
+                  int *sd_idle, const struct cpumask *cpus, int *balance)
   {
         struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
         unsigned long max_load, avg_load, total_load, this_load, total_pwr;
@@@ -3122,10 -3124,11 +3127,11 @@@
                 unsigned long sum_avg_load_per_task;
                 unsigned long avg_load_per_task;
   
-               local_group = cpu_isset(this_cpu, group->cpumask);
+               local_group = cpumask_test_cpu(this_cpu,
+                                              sched_group_cpus(group));
   
                 if (local_group)
-                       balance_cpu = first_cpu(group->cpumask);
+                       balance_cpu = cpumask_first(sched_group_cpus(group));
   
                 /* Tally up the load of all CPUs in the group */
                 sum_weighted_load = sum_nr_running = avg_load = 0;
@@@ -3134,13 -3137,8 +3140,8 @@@
                 max_cpu_load = 0;
                 min_cpu_load = ~0UL;
   
-               for_each_cpu_mask_nr(i, group->cpumask) {
-                       struct rq *rq;
- 
-                       if (!cpu_isset(i, *cpus))
-                               continue;
- 
-                       rq = cpu_rq(i);
+               for_each_cpu_and(i, sched_group_cpus(group), cpus) {
+                       struct rq *rq = cpu_rq(i);
   
                         if (*sd_idle && rq->nr_running)
                                 *sd_idle = 0;
@@@ -3251,8 -3249,8 +3252,8 @@@
                  */
                 if ((sum_nr_running < min_nr_running) ||
                     (sum_nr_running == min_nr_running &&
-                    first_cpu(group->cpumask) <
-                    first_cpu(group_min->cpumask))) {
+                    cpumask_first(sched_group_cpus(group)) >
+                    cpumask_first(sched_group_cpus(group_min)))) {
                         group_min = group;
                         min_nr_running = sum_nr_running;
                         min_load_per_task = sum_weighted_load /
@@@ -3267,8 -3265,8 +3268,8 @@@
                 if (sum_nr_running <= group_capacity - 1) {
                         if (sum_nr_running > leader_nr_running ||
                             (sum_nr_running == leader_nr_running &&
-                            first_cpu(group->cpumask) >
-                             first_cpu(group_leader->cpumask))) {
+                            cpumask_first(sched_group_cpus(group)) <
+                            cpumask_first(sched_group_cpus(group_leader)))) {
                                 group_leader = group;
                                 leader_nr_running = sum_nr_running;
                         }
@@@ -3394,6 -3392,10 +3395,10 @@@ out_balanced
   
         if (this == group_leader && group_leader != group_min) {
                 *imbalance = min_load_per_task;
+               if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) {
+                       cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu =
+                               cpumask_first(sched_group_cpus(group_leader));
+               }
                 return group_min;
         }
   #endif
@@@ -3407,16 -3409,16 +3412,16 @@@ ret
    */
   static struct rq *
   find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
-                  unsigned long imbalance, const cpumask_t *cpus)
+                  unsigned long imbalance, const struct cpumask *cpus)
   {
         struct rq *busiest = NULL, *rq;
         unsigned long max_load = 0;
         int i;
   
-       for_each_cpu_mask_nr(i, group->cpumask) {
+       for_each_cpu(i, sched_group_cpus(group)) {
                 unsigned long wl;
   
-               if (!cpu_isset(i, *cpus))
+               if (!cpumask_test_cpu(i, cpus))
                         continue;
   
                 rq = cpu_rq(i);
@@@ -3446,7 -3448,7 +3451,7 @@@
    */
   static int load_balance(int this_cpu, struct rq *this_rq,
                         struct sched_domain *sd, enum cpu_idle_type idle,
-                       int *balance, cpumask_t *cpus)
+                       int *balance, struct cpumask *cpus)
   {
         int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
         struct sched_group *group;
@@@ -3454,7 -3456,7 +3459,7 @@@
         struct rq *busiest;
         unsigned long flags;
   
-       cpus_setall(*cpus);
+       cpumask_setall(cpus);
   
         /*
          * When power savings policy is enabled for the parent domain, idle
@@@ -3514,8 -3516,8 +3519,8 @@@ redo
   
                 /* All tasks on this runqueue were pinned by CPU affinity */
                 if (unlikely(all_pinned)) {
-                       cpu_clear(cpu_of(busiest), *cpus);
-                       if (!cpus_empty(*cpus))
+                       cpumask_clear_cpu(cpu_of(busiest), cpus);
+                       if (!cpumask_empty(cpus))
                                 goto redo;
                         goto out_balanced;
                 }
@@@ -3532,7 -3534,8 +3537,8 @@@
                         /* don't kick the migration_thread, if the curr
                          * task on busiest cpu can't be moved to this_cpu
                          */
-                       if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) {
+                       if (!cpumask_test_cpu(this_cpu,
+                                             &busiest->curr->cpus_allowed)) {
                                 spin_unlock_irqrestore(&busiest->lock, flags);
                                 all_pinned = 1;
                                 goto out_one_pinned;
@@@ -3607,7 -3610,7 +3613,7 @@@ out
    */
   static int
   load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
-                       cpumask_t *cpus)
+                       struct cpumask *cpus)
   {
         struct sched_group *group;
         struct rq *busiest = NULL;
@@@ -3616,7 -3619,7 +3622,7 @@@
         int sd_idle = 0;
         int all_pinned = 0;
   
-       cpus_setall(*cpus);
+       cpumask_setall(cpus);
   
         /*
          * When power savings policy is enabled for the parent domain, idle
@@@ -3660,17 -3663,71 +3666,71 @@@ redo
                 double_unlock_balance(this_rq, busiest);
   
                 if (unlikely(all_pinned)) {
-                       cpu_clear(cpu_of(busiest), *cpus);
-                       if (!cpus_empty(*cpus))
+                       cpumask_clear_cpu(cpu_of(busiest), cpus);
+                       if (!cpumask_empty(cpus))
                                 goto redo;
                 }
         }
   
         if (!ld_moved) {
+               int active_balance = 0;
+ 
                 schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]);
                 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
                     !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
                         return -1;
+ 
+               if (sched_mc_power_savings < POWERSAVINGS_BALANCE_WAKEUP)
+                       return -1;
+ 
+               if (sd->nr_balance_failed++ < 2)
+                       return -1;
+ 
+               /*
+                * The only task running in a non-idle cpu can be moved to this
+                * cpu in an attempt to completely freeup the other CPU
+                * package. The same method used to move task in load_balance()
+                * have been extended for load_balance_newidle() to speedup
+                * consolidation at sched_mc=POWERSAVINGS_BALANCE_WAKEUP (2)
+                *
+                * The package power saving logic comes from
+                * find_busiest_group().  If there are no imbalance, then
+                * f_b_g() will return NULL.  However when sched_mc={1,2} then
+                * f_b_g() will select a group from which a running task may be
+                * pulled to this cpu in order to make the other package idle.
+                * If there is no opportunity to make a package idle and if
+                * there are no imbalance, then f_b_g() will return NULL and no
+                * action will be taken in load_balance_newidle().
+                *
+                * Under normal task pull operation due to imbalance, there
+                * will be more than one task in the source run queue and
+                * move_tasks() will succeed.  ld_moved will be true and this
+                * active balance code will not be triggered.
+                */
+ 
+               /* Lock busiest in correct order while this_rq is held */
+               double_lock_balance(this_rq, busiest);
+ 
+               /*
+                * don't kick the migration_thread, if the curr
+                * task on busiest cpu can't be moved to this_cpu
+                */
+               if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) {
+                       double_unlock_balance(this_rq, busiest);
+                       all_pinned = 1;
+                       return ld_moved;
+               }
+ 
+               if (!busiest->active_balance) {
+                       busiest->active_balance = 1;
+                       busiest->push_cpu = this_cpu;
+                       active_balance = 1;
+               }
+ 
+               double_unlock_balance(this_rq, busiest);
+               if (active_balance)
+                       wake_up_process(busiest->migration_thread);
+ 
         } else
                 sd->nr_balance_failed = 0;
   
@@@ -3696,7 -3753,10 +3756,10 @@@ static void idle_balance(int this_cpu, 
         struct sched_domain *sd;
         int pulled_task = 0;
         unsigned long next_balance = jiffies + HZ;
-       cpumask_t tmpmask;
+       cpumask_var_t tmpmask;
+ 
+       if (!alloc_cpumask_var(&tmpmask, GFP_ATOMIC))
+               return;
   
         for_each_domain(this_cpu, sd) {
                 unsigned long interval;
@@@ -3707,7 -3767,7 +3770,7 @@@
                 if (sd->flags & SD_BALANCE_NEWIDLE)
                         /* If we've pulled tasks over stop searching: */
                         pulled_task = load_balance_newidle(this_cpu, this_rq,
-                                                          sd, &tmpmask);
+                                                          sd, tmpmask);
   
                 interval = msecs_to_jiffies(sd->balance_interval);
                 if (time_after(next_balance, sd->last_balance + interval))
@@@ -3722,6 -3782,7 +3785,7 @@@
                  */
                 this_rq->next_balance = next_balance;
         }
+       free_cpumask_var(tmpmask);
   }
   
   /*
@@@ -3759,7 -3820,7 +3823,7 @@@ static void active_load_balance(struct 
         /* Search for an sd spanning us and the target CPU. */
         for_each_domain(target_cpu, sd) {
                 if ((sd->flags & SD_LOAD_BALANCE) &&
-                   cpu_isset(busiest_cpu, sd->span))
+                   cpumask_test_cpu(busiest_cpu, sched_domain_span(sd)))
                                 break;
         }
   
@@@ -3778,10 -3839,9 +3842,9 @@@
   #ifdef CONFIG_NO_HZ
   static struct {
         atomic_t load_balancer;
-       cpumask_t cpu_mask;
+       cpumask_var_t cpu_mask;
   } nohz ____cacheline_aligned = {
         .load_balancer = ATOMIC_INIT(-1),
-       .cpu_mask = CPU_MASK_NONE,
   };
   
   /*
@@@ -3809,7 -3869,7 +3872,7 @@@ int select_nohz_load_balancer(int stop_
         int cpu = smp_processor_id();
   
         if (stop_tick) {
-               cpu_set(cpu, nohz.cpu_mask);
+               cpumask_set_cpu(cpu, nohz.cpu_mask);
                 cpu_rq(cpu)->in_nohz_recently = 1;
   
                 /*
@@@ -3823,7 -3883,7 +3886,7 @@@
                 }
   
                 /* time for ilb owner also to sleep */
-               if (cpus_weight(nohz.cpu_mask) == num_online_cpus()) {
+               if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
                         if (atomic_read(&nohz.load_balancer) == cpu)
                                 atomic_set(&nohz.load_balancer, -1);
                         return 0;
@@@ -3836,10 -3896,10 +3899,10 @@@
                 } else if (atomic_read(&nohz.load_balancer) == cpu)
                         return 1;
         } else {
-               if (!cpu_isset(cpu, nohz.cpu_mask))
+               if (!cpumask_test_cpu(cpu, nohz.cpu_mask))
                         return 0;
   
-               cpu_clear(cpu, nohz.cpu_mask);
+               cpumask_clear_cpu(cpu, nohz.cpu_mask);
   
                 if (atomic_read(&nohz.load_balancer) == cpu)
                         if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
@@@ -3867,7 -3927,11 +3930,11 @@@ static void rebalance_domains(int cpu, 
         unsigned long next_balance = jiffies + 60*HZ;
         int update_next_balance = 0;
         int need_serialize;
-       cpumask_t tmp;
+       cpumask_var_t tmp;
+ 
+       /* Fails alloc?  Rebalancing probably not a priority right now. */
+       if (!alloc_cpumask_var(&tmp, GFP_ATOMIC))
+               return;
   
         for_each_domain(cpu, sd) {
                 if (!(sd->flags & SD_LOAD_BALANCE))
@@@ -3892,7 -3956,7 +3959,7 @@@
                 }
   
                 if (time_after_eq(jiffies, sd->last_balance + interval)) {
-                       if (load_balance(cpu, rq, sd, idle, &balance, &tmp)) {
+                       if (load_balance(cpu, rq, sd, idle, &balance, tmp)) {
                                 /*
                                  * We've pulled tasks over so either we're no
                                  * longer idle, or one of our SMT siblings is
@@@ -3926,6 -3990,8 +3993,8 @@@ out
          */
         if (likely(update_next_balance))
                 rq->next_balance = next_balance;
+ 
+       free_cpumask_var(tmp);
   }
   
   /*
@@@ -3950,12 -4016,13 +4019,13 @@@ static void run_rebalance_domains(struc
          */
         if (this_rq->idle_at_tick &&
             atomic_read(&nohz.load_balancer) == this_cpu) {
-               cpumask_t cpus = nohz.cpu_mask;
                 struct rq *rq;
                 int balance_cpu;
   
-               cpu_clear(this_cpu, cpus);
-               for_each_cpu_mask_nr(balance_cpu, cpus) {
+               for_each_cpu(balance_cpu, nohz.cpu_mask) {
+                       if (balance_cpu == this_cpu)
+                               continue;
+ 
                         /*
                          * If this cpu gets work to do, stop the load balancing
                          * work being done for other cpus. Next load
@@@ -3993,7 -4060,7 +4063,7 @@@ static inline void trigger_load_balance
                 rq->in_nohz_recently = 0;
   
                 if (atomic_read(&nohz.load_balancer) == cpu) {
-                       cpu_clear(cpu, nohz.cpu_mask);
+                       cpumask_clear_cpu(cpu, nohz.cpu_mask);
                         atomic_set(&nohz.load_balancer, -1);
                 }
   
@@@ -4006,7 -4073,7 +4076,7 @@@
                          * TBD: Traverse the sched domains and nominate
                          * the nearest cpu in the nohz.cpu_mask.
                          */
-                       int ilb = first_cpu(nohz.cpu_mask);
+                       int ilb = cpumask_first(nohz.cpu_mask);
   
                         if (ilb < nr_cpu_ids)
                                 resched_cpu(ilb);
@@@ -4018,7 -4085,7 +4088,7 @@@
          * cpus with ticks stopped, is it time for that to stop?
          */
         if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) == cpu &&
-           cpus_weight(nohz.cpu_mask) == num_online_cpus()) {
+           cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
                 resched_cpu(cpu);
                 return;
         }
@@@ -4028,7 -4095,7 +4098,7 @@@
          * someone else, then no need raise the SCHED_SOFTIRQ
          */
         if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) != cpu &&
-           cpu_isset(cpu, nohz.cpu_mask))
+           cpumask_test_cpu(cpu, nohz.cpu_mask))
                 return;
   #endif
         if (time_after_eq(jiffies, rq->next_balance))
@@@ -5120,22 -5187,6 +5190,22 @@@ __setscheduler(struct rq *rq, struct ta
         set_load_weight(p);
   }
   
+ +/*
+ + * check the target process has a UID that matches the current process's
+ + */
+ +static bool check_same_owner(struct task_struct *p)
+ +{
+ +      const struct cred *cred = current_cred(), *pcred;
+ +      bool match;
+ +
+ +      rcu_read_lock();
+ +      pcred = __task_cred(p);
+ +      match = (cred->euid == pcred->euid ||
+ +               cred->euid == pcred->uid);
+ +      rcu_read_unlock();
+ +      return match;
+ +}
+ +
   static int __sched_setscheduler(struct task_struct *p, int policy,
                                 struct sched_param *param, bool user)
   {
@@@ -5195,7 -5246,8 +5265,7 @@@ recheck
                         return -EPERM;
   
                 /* can't change other user's priorities */
- -              if ((current->euid != p->euid) &&
- -                  (current->euid != p->uid))
+ +              if (!check_same_owner(p))
                         return -EPERM;
         }
   
@@@ -5401,10 -5453,9 +5471,9 @@@ out_unlock
         return retval;
   }
   
- long sched_setaffinity(pid_t pid, const cpumask_t *in_mask)
+ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
   {
-       cpumask_t cpus_allowed;
-       cpumask_t new_mask = *in_mask;
+       cpumask_var_t cpus_allowed, new_mask;
         struct task_struct *p;
         int retval;
   
@@@ -5426,45 -5477,58 +5495,57 @@@
         get_task_struct(p);
         read_unlock(&tasklist_lock);
   
+       if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
+               retval = -ENOMEM;
+               goto out_put_task;
+       }
+       if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) {
+               retval = -ENOMEM;
+               goto out_free_cpus_allowed;
+       }
         retval = -EPERM;
- -      if ((current->euid != p->euid) && (current->euid != p->uid) &&
- -                      !capable(CAP_SYS_NICE))
+ +      if (!check_same_owner(p) && !capable(CAP_SYS_NICE))
                 goto out_unlock;
   
         retval = security_task_setscheduler(p, 0, NULL);
         if (retval)
                 goto out_unlock;
   
-       cpuset_cpus_allowed(p, &cpus_allowed);
-       cpus_and(new_mask, new_mask, cpus_allowed);
+       cpuset_cpus_allowed(p, cpus_allowed);
+       cpumask_and(new_mask, in_mask, cpus_allowed);
    again:
-       retval = set_cpus_allowed_ptr(p, &new_mask);
+       retval = set_cpus_allowed_ptr(p, new_mask);
   
         if (!retval) {
-               cpuset_cpus_allowed(p, &cpus_allowed);
-               if (!cpus_subset(new_mask, cpus_allowed)) {
+               cpuset_cpus_allowed(p, cpus_allowed);
+               if (!cpumask_subset(new_mask, cpus_allowed)) {
                         /*
                          * We must have raced with a concurrent cpuset
                          * update. Just reset the cpus_allowed to the
                          * cpuset's cpus_allowed
                          */
-                       new_mask = cpus_allowed;
+                       cpumask_copy(new_mask, cpus_allowed);
                         goto again;
                 }
         }
   out_unlock:
+       free_cpumask_var(new_mask);
+ out_free_cpus_allowed:
+       free_cpumask_var(cpus_allowed);
+ out_put_task:
         put_task_struct(p);
         put_online_cpus();
         return retval;
   }
   
   static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
-                            cpumask_t *new_mask)
+                            struct cpumask *new_mask)
   {
-       if (len < sizeof(cpumask_t)) {
-               memset(new_mask, 0, sizeof(cpumask_t));
-       } else if (len > sizeof(cpumask_t)) {
-               len = sizeof(cpumask_t);
-       }
+       if (len < cpumask_size())
+               cpumask_clear(new_mask);
+       else if (len > cpumask_size())
+               len = cpumask_size();
+ 
         return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0;
   }
   
@@@ -5477,17 -5541,20 +5558,20 @@@
   asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len,
                                       unsigned long __user *user_mask_ptr)
   {
-       cpumask_t new_mask;
+       cpumask_var_t new_mask;
         int retval;
   
-       retval = get_user_cpu_mask(user_mask_ptr, len, &new_mask);
-       if (retval)
-               return retval;
+       if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
+               return -ENOMEM;
   
-       return sched_setaffinity(pid, &new_mask);
+       retval = get_user_cpu_mask(user_mask_ptr, len, new_mask);
+       if (retval == 0)
+               retval = sched_setaffinity(pid, new_mask);
+       free_cpumask_var(new_mask);
+       return retval;
   }
   
- long sched_getaffinity(pid_t pid, cpumask_t *mask)
+ long sched_getaffinity(pid_t pid, struct cpumask *mask)
   {
         struct task_struct *p;
         int retval;
@@@ -5504,7 -5571,7 +5588,7 @@@
         if (retval)
                 goto out_unlock;
   
-       cpus_and(*mask, p->cpus_allowed, cpu_online_map);
+       cpumask_and(mask, &p->cpus_allowed, cpu_online_mask);
   
   out_unlock:
         read_unlock(&tasklist_lock);
@@@ -5523,19 -5590,24 +5607,24 @@@ asmlinkage long sys_sched_getaffinity(p
                                       unsigned long __user *user_mask_ptr)
   {
         int ret;
-       cpumask_t mask;
+       cpumask_var_t mask;
   
-       if (len < sizeof(cpumask_t))
+       if (len < cpumask_size())
                 return -EINVAL;
   
-       ret = sched_getaffinity(pid, &mask);
-       if (ret < 0)
-               return ret;
+       if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+               return -ENOMEM;
   
-       if (copy_to_user(user_mask_ptr, &mask, sizeof(cpumask_t)))
-               return -EFAULT;
+       ret = sched_getaffinity(pid, mask);
+       if (ret == 0) {
+               if (copy_to_user(user_mask_ptr, mask, cpumask_size()))
+                       ret = -EFAULT;
+               else
+                       ret = cpumask_size();
+       }
+       free_cpumask_var(mask);
   
-       return sizeof(cpumask_t);
+       return ret;
   }
   
   /**
@@@ -5877,7 -5949,7 +5966,7 @@@ void __cpuinit init_idle(struct task_st
         idle->se.exec_start = sched_clock();
   
         idle->prio = idle->normal_prio = MAX_PRIO;
-       idle->cpus_allowed = cpumask_of_cpu(cpu);
+       cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
         __set_task_cpu(idle, cpu);
   
         rq->curr = rq->idle = idle;
@@@ -5904,9 -5976,9 +5993,9 @@@
    * indicates which cpus entered this state. This is used
    * in the rcu update to wait only for active cpus. For system
    * which do not switch off the HZ timer nohz_cpu_mask should
-  * always be CPU_MASK_NONE.
+  * always be CPU_BITS_NONE.
    */
- cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
+ cpumask_var_t nohz_cpu_mask;
   
   /*
    * Increase the granularity value when there are more CPUs,
@@@ -5961,7 -6033,7 +6050,7 @@@ static inline void sched_init_granulari
    * task must not exit() & deallocate itself prematurely. The
    * call is not atomic; no spinlocks may be held.
    */
- int set_cpus_allowed_ptr(struct task_struct *p, const cpumask_t *new_mask)
+ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
   {
         struct migration_req req;
         unsigned long flags;
@@@ -5969,13 -6041,13 +6058,13 @@@
         int ret = 0;
   
         rq = task_rq_lock(p, &flags);
-       if (!cpus_intersects(*new_mask, cpu_online_map)) {
+       if (!cpumask_intersects(new_mask, cpu_online_mask)) {
                 ret = -EINVAL;
                 goto out;
         }
   
         if (unlikely((p->flags & PF_THREAD_BOUND) && p != current &&
-                    !cpus_equal(p->cpus_allowed, *new_mask))) {
+                    !cpumask_equal(&p->cpus_allowed, new_mask))) {
                 ret = -EINVAL;
                 goto out;
         }
@@@ -5983,15 -6055,15 +6072,15 @@@
         if (p->sched_class->set_cpus_allowed)
                 p->sched_class->set_cpus_allowed(p, new_mask);
         else {
-               p->cpus_allowed = *new_mask;
-               p->rt.nr_cpus_allowed = cpus_weight(*new_mask);
+               cpumask_copy(&p->cpus_allowed, new_mask);
+               p->rt.nr_cpus_allowed = cpumask_weight(new_mask);
         }
   
         /* Can the task run on the task's current CPU? If so, we're done */
-       if (cpu_isset(task_cpu(p), *new_mask))
+       if (cpumask_test_cpu(task_cpu(p), new_mask))
                 goto out;
   
-       if (migrate_task(p, any_online_cpu(*new_mask), &req)) {
+       if (migrate_task(p, cpumask_any_and(cpu_online_mask, new_mask), &req)) {
                 /* Need help from migration thread: drop lock and wait. */
                 task_rq_unlock(rq, &flags);
                 wake_up_process(rq->migration_thread);
@@@ -6033,7 -6105,7 +6122,7 @@@ static int __migrate_task(struct task_s
         if (task_cpu(p) != src_cpu)
                 goto done;
         /* Affinity changed (again). */
-       if (!cpu_isset(dest_cpu, p->cpus_allowed))
+       if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
                 goto fail;
   
         on_rq = p->se.on_rq;
@@@ -6130,50 -6202,43 +6219,43 @@@ static int __migrate_task_irq(struct ta
    */
   static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
   {
-       unsigned long flags;
-       cpumask_t mask;
-       struct rq *rq;
         int dest_cpu;
+       /* FIXME: Use cpumask_of_node here. */
+       cpumask_t _nodemask = node_to_cpumask(cpu_to_node(dead_cpu));
+       const struct cpumask *nodemask = &_nodemask;
+ 
+ again:
+       /* Look for allowed, online CPU in same node. */
+       for_each_cpu_and(dest_cpu, nodemask, cpu_online_mask)
+               if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
+                       goto move;
+ 
+       /* Any allowed, online CPU? */
+       dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_online_mask);
+       if (dest_cpu < nr_cpu_ids)
+               goto move;
+ 
+       /* No more Mr. Nice Guy. */
+       if (dest_cpu >= nr_cpu_ids) {
+               cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
+               dest_cpu = cpumask_any_and(cpu_online_mask, &p->cpus_allowed);
   
-       do {
-               /* On same node? */
-               mask = node_to_cpumask(cpu_to_node(dead_cpu));
-               cpus_and(mask, mask, p->cpus_allowed);
-               dest_cpu = any_online_cpu(mask);
- 
-               /* On any allowed CPU? */
-               if (dest_cpu >= nr_cpu_ids)
-                       dest_cpu = any_online_cpu(p->cpus_allowed);
- 
-               /* No more Mr. Nice Guy. */
-               if (dest_cpu >= nr_cpu_ids) {
-                       cpumask_t cpus_allowed;
- 
-                       cpuset_cpus_allowed_locked(p, &cpus_allowed);
-                       /*
-                        * Try to stay on the same cpuset, where the
-                        * current cpuset may be a subset of all cpus.
-                        * The cpuset_cpus_allowed_locked() variant of
-                        * cpuset_cpus_allowed() will not block. It must be
-                        * called within calls to cpuset_lock/cpuset_unlock.
-                        */
-                       rq = task_rq_lock(p, &flags);
-                       p->cpus_allowed = cpus_allowed;
-                       dest_cpu = any_online_cpu(p->cpus_allowed);
-                       task_rq_unlock(rq, &flags);
- 
-                       /*
-                        * Don't tell them about moving exiting tasks or
-                        * kernel threads (both mm NULL), since they never
-                        * leave kernel.
-                        */
-                       if (p->mm && printk_ratelimit()) {
-                               printk(KERN_INFO "process %d (%s) no "
-                                      "longer affine to cpu%d\n",
-                                       task_pid_nr(p), p->comm, dead_cpu);
-                       }
+               /*
+                * Don't tell them about moving exiting tasks or
+                * kernel threads (both mm NULL), since they never
+                * leave kernel.
+                */
+               if (p->mm && printk_ratelimit()) {
+                       printk(KERN_INFO "process %d (%s) no "
+                              "longer affine to cpu%d\n",
+                              task_pid_nr(p), p->comm, dead_cpu);
                 }
-       } while (!__migrate_task_irq(p, dead_cpu, dest_cpu));
+       }
+ 
+ move:
+       /* It can have affinity changed while we were choosing. */
+       if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu)))
+               goto again;
   }
   
   /*
@@@ -6185,7 -6250,7 +6267,7 @@@
    */
   static void migrate_nr_uninterruptible(struct rq *rq_src)
   {
-       struct rq *rq_dest = cpu_rq(any_online_cpu(*CPU_MASK_ALL_PTR));
+       struct rq *rq_dest = cpu_rq(cpumask_any(cpu_online_mask));
         unsigned long flags;
   
         local_irq_save(flags);
@@@ -6475,7 -6540,7 +6557,7 @@@ static void set_rq_online(struct rq *rq
         if (!rq->online) {
                 const struct sched_class *class;
   
-               cpu_set(rq->cpu, rq->rd->online);
+               cpumask_set_cpu(rq->cpu, rq->rd->online);
                 rq->online = 1;
   
                 for_each_class(class) {
@@@ -6495,7 -6560,7 +6577,7 @@@ static void set_rq_offline(struct rq *r
                                 class->rq_offline(rq);
                 }
   
-               cpu_clear(rq->cpu, rq->rd->online);
+               cpumask_clear_cpu(rq->cpu, rq->rd->online);
                 rq->online = 0;
         }
   }
@@@ -6536,7 -6601,7 +6618,7 @@@ migration_call(struct notifier_block *n
                 rq = cpu_rq(cpu);
                 spin_lock_irqsave(&rq->lock, flags);
                 if (rq->rd) {
-                       BUG_ON(!cpu_isset(cpu, rq->rd->span));
+                       BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
   
                         set_rq_online(rq);
                 }
@@@ -6550,7 -6615,7 +6632,7 @@@
                         break;
                 /* Unbind it from offline cpu so it can run. Fall thru. */
                 kthread_bind(cpu_rq(cpu)->migration_thread,
-                            any_online_cpu(cpu_online_map));
+                            cpumask_any(cpu_online_mask));
                 kthread_stop(cpu_rq(cpu)->migration_thread);
                 cpu_rq(cpu)->migration_thread = NULL;
                 break;
@@@ -6600,7 -6665,7 +6682,7 @@@
                 rq = cpu_rq(cpu);
                 spin_lock_irqsave(&rq->lock, flags);
                 if (rq->rd) {
-                       BUG_ON(!cpu_isset(cpu, rq->rd->span));
+                       BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
                         set_rq_offline(rq);
                 }
                 spin_unlock_irqrestore(&rq->lock, flags);
@@@ -6639,13 -6704,13 +6721,13 @@@ early_initcall(migration_init)
   #ifdef CONFIG_SCHED_DEBUG
   
   static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
-                                 cpumask_t *groupmask)
+                                 struct cpumask *groupmask)
   {
         struct sched_group *group = sd->groups;
         char str[256];
   
-       cpulist_scnprintf(str, sizeof(str), sd->span);
-       cpus_clear(*groupmask);
+       cpulist_scnprintf(str, sizeof(str), sched_domain_span(sd));
+       cpumask_clear(groupmask);
   
         printk(KERN_DEBUG "%*s domain %d: ", level, "", level);
   
@@@ -6659,11 -6724,11 +6741,11 @@@
   
         printk(KERN_CONT "span %s level %s\n", str, sd->name);
   
-       if (!cpu_isset(cpu, sd->span)) {
+       if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) {
                 printk(KERN_ERR "ERROR: domain->span does not contain "
                                 "CPU%d\n", cpu);
         }
-       if (!cpu_isset(cpu, group->cpumask)) {
+       if (!cpumask_test_cpu(cpu, sched_group_cpus(group))) {
                 printk(KERN_ERR "ERROR: domain->groups does not contain"
                                 " CPU%d\n", cpu);
         }
@@@ -6683,31 -6748,32 +6765,32 @@@
                         break;
                 }
   
-               if (!cpus_weight(group->cpumask)) {
+               if (!cpumask_weight(sched_group_cpus(group))) {
                         printk(KERN_CONT "\n");
                         printk(KERN_ERR "ERROR: empty group\n");
                         break;
                 }
   
-               if (cpus_intersects(*groupmask, group->cpumask)) {
+               if (cpumask_intersects(groupmask, sched_group_cpus(group))) {
                         printk(KERN_CONT "\n");
                         printk(KERN_ERR "ERROR: repeated CPUs\n");
                         break;
                 }
   
-               cpus_or(*groupmask, *groupmask, group->cpumask);
+               cpumask_or(groupmask, groupmask, sched_group_cpus(group));
   
-               cpulist_scnprintf(str, sizeof(str), group->cpumask);
+               cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
                 printk(KERN_CONT " %s", str);
   
                 group = group->next;
         } while (group != sd->groups);
         printk(KERN_CONT "\n");
   
-       if (!cpus_equal(sd->span, *groupmask))
+       if (!cpumask_equal(sched_domain_span(sd), groupmask))
                 printk(KERN_ERR "ERROR: groups don't span domain->span\n");
   
-       if (sd->parent && !cpus_subset(*groupmask, sd->parent->span))
+       if (sd->parent &&
+           !cpumask_subset(groupmask, sched_domain_span(sd->parent)))
                 printk(KERN_ERR "ERROR: parent span is not a superset "
                         "of domain->span\n");
         return 0;
@@@ -6715,7 -6781,7 +6798,7 @@@
   
   static void sched_domain_debug(struct sched_domain *sd, int cpu)
   {
-       cpumask_t *groupmask;
+       cpumask_var_t groupmask;
         int level = 0;
   
         if (!sd) {
@@@ -6725,8 -6791,7 +6808,7 @@@
   
         printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu);
   
-       groupmask = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
-       if (!groupmask) {
+       if (!alloc_cpumask_var(&groupmask, GFP_KERNEL)) {
                 printk(KERN_DEBUG "Cannot load-balance (out of memory)\n");
                 return;
         }
@@@ -6739,7 -6804,7 +6821,7 @@@
                 if (!sd)
                         break;
         }
-       kfree(groupmask);
+       free_cpumask_var(groupmask);
   }
   #else /* !CONFIG_SCHED_DEBUG */
   # define sched_domain_debug(sd, cpu) do { } while (0)
@@@ -6747,7 -6812,7 +6829,7 @@@
   
   static int sd_degenerate(struct sched_domain *sd)
   {
-       if (cpus_weight(sd->span) == 1)
+       if (cpumask_weight(sched_domain_span(sd)) == 1)
                 return 1;
   
         /* Following flags need at least 2 groups */
@@@ -6778,7 -6843,7 +6860,7 @@@ sd_parent_degenerate(struct sched_domai
         if (sd_degenerate(parent))
                 return 1;
   
-       if (!cpus_equal(sd->span, parent->span))
+       if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent)))
                 return 0;
   
         /* Does parent contain flags not in child? */
@@@ -6802,6 -6867,16 +6884,16 @@@
         return 1;
   }
   
+ static void free_rootdomain(struct root_domain *rd)
+ {
+       cpupri_cleanup(&rd->cpupri);
+ 
+       free_cpumask_var(rd->rto_mask);
+       free_cpumask_var(rd->online);
+       free_cpumask_var(rd->span);
+       kfree(rd);
+ }
+ 
   static void rq_attach_root(struct rq *rq, struct root_domain *rd)
   {
         unsigned long flags;
@@@ -6811,38 -6886,63 +6903,63 @@@
         if (rq->rd) {
                 struct root_domain *old_rd = rq->rd;
   
-               if (cpu_isset(rq->cpu, old_rd->online))
+               if (cpumask_test_cpu(rq->cpu, old_rd->online))
                         set_rq_offline(rq);
   
-               cpu_clear(rq->cpu, old_rd->span);
+               cpumask_clear_cpu(rq->cpu, old_rd->span);
   
                 if (atomic_dec_and_test(&old_rd->refcount))
-                       kfree(old_rd);
+                       free_rootdomain(old_rd);
         }
   
         atomic_inc(&rd->refcount);
         rq->rd = rd;
   
-       cpu_set(rq->cpu, rd->span);
-       if (cpu_isset(rq->cpu, cpu_online_map))
+       cpumask_set_cpu(rq->cpu, rd->span);
+       if (cpumask_test_cpu(rq->cpu, cpu_online_mask))
                 set_rq_online(rq);
   
         spin_unlock_irqrestore(&rq->lock, flags);
   }
   
- static void init_rootdomain(struct root_domain *rd)
+ static int init_rootdomain(struct root_domain *rd, bool bootmem)
   {
         memset(rd, 0, sizeof(*rd));
   
-       cpus_clear(rd->span);
-       cpus_clear(rd->online);
+       if (bootmem) {
+               alloc_bootmem_cpumask_var(&def_root_domain.span);
+               alloc_bootmem_cpumask_var(&def_root_domain.online);
+               alloc_bootmem_cpumask_var(&def_root_domain.rto_mask);
+               cpupri_init(&rd->cpupri, true);
+               return 0;
+       }
+ 
+       if (!alloc_cpumask_var(&rd->span, GFP_KERNEL))
+               goto free_rd;
+       if (!alloc_cpumask_var(&rd->online, GFP_KERNEL))
+               goto free_span;
+       if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
+               goto free_online;
+ 
+       if (cpupri_init(&rd->cpupri, false) != 0)
+               goto free_rto_mask;
+       return 0;
   
-       cpupri_init(&rd->cpupri);
+ free_rto_mask:
+       free_cpumask_var(rd->rto_mask);
+ free_online:
+       free_cpumask_var(rd->online);
+ free_span:
+       free_cpumask_var(rd->span);
+ free_rd:
+       kfree(rd);
+       return -ENOMEM;
   }
   
   static void init_defrootdomain(void)
   {
-       init_rootdomain(&def_root_domain);
+       init_rootdomain(&def_root_domain, true);
+ 
         atomic_set(&def_root_domain.refcount, 1);
   }
   
@@@ -6854,7 -6954,10 +6971,10 @@@ static struct root_domain *alloc_rootdo
         if (!rd)
                 return NULL;
   
-       init_rootdomain(rd);
+       if (init_rootdomain(rd, false) != 0) {
+               kfree(rd);
+               return NULL;
+       }
   
         return rd;
   }
@@@ -6896,19 -6999,12 +7016,12 @@@ cpu_attach_domain(struct sched_domain *
   }
   
   /* cpus with isolated domains */
- static cpumask_t cpu_isolated_map = CPU_MASK_NONE;
+ static cpumask_var_t cpu_isolated_map;
   
   /* Setup the mask of cpus configured for isolated domains */
   static int __init isolated_cpu_setup(char *str)
   {
-       static int __initdata ints[NR_CPUS];
-       int i;
- 
-       str = get_options(str, ARRAY_SIZE(ints), ints);
-       cpus_clear(cpu_isolated_map);
-       for (i = 1; i <= ints[0]; i++)
-               if (ints[i] < NR_CPUS)
-                       cpu_set(ints[i], cpu_isolated_map);
+       cpulist_parse(str, cpu_isolated_map);
         return 1;
   }
   
@@@ -6917,42 -7013,43 +7030,43 @@@ __setup("isolcpus=", isolated_cpu_setup
   /*
    * init_sched_build_groups takes the cpumask we wish to span, and a pointer
    * to a function which identifies what group(along with sched group) a CPU
-  * belongs to. The return value of group_fn must be a >= 0 and < NR_CPUS
-  * (due to the fact that we keep track of groups covered with a cpumask_t).
+  * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids
+  * (due to the fact that we keep track of groups covered with a struct cpumask).
    *
    * init_sched_build_groups will build a circular linked list of the groups
    * covered by the given span, and will set each group's ->cpumask correctly,
    * and ->cpu_power to 0.
    */
   static void
- init_sched_build_groups(const cpumask_t *span, const cpumask_t *cpu_map,
-                       int (*group_fn)(int cpu, const cpumask_t *cpu_map,
+ init_sched_build_groups(const struct cpumask *span,
+                       const struct cpumask *cpu_map,
+                       int (*group_fn)(int cpu, const struct cpumask *cpu_map,
                                         struct sched_group **sg,
-                                       cpumask_t *tmpmask),
-                       cpumask_t *covered, cpumask_t *tmpmask)
+                                       struct cpumask *tmpmask),
+                       struct cpumask *covered, struct cpumask *tmpmask)
   {
         struct sched_group *first = NULL, *last = NULL;
         int i;
   
-       cpus_clear(*covered);
+       cpumask_clear(covered);
   
-       for_each_cpu_mask_nr(i, *span) {
+       for_each_cpu(i, span) {
                 struct sched_group *sg;
                 int group = group_fn(i, cpu_map, &sg, tmpmask);
                 int j;
   
-               if (cpu_isset(i, *covered))
+               if (cpumask_test_cpu(i, covered))
                         continue;
   
-               cpus_clear(sg->cpumask);
+               cpumask_clear(sched_group_cpus(sg));
                 sg->__cpu_power = 0;
   
-               for_each_cpu_mask_nr(j, *span) {
+               for_each_cpu(j, span) {
                         if (group_fn(j, cpu_map, NULL, tmpmask) != group)
                                 continue;
   
-                       cpu_set(j, *covered);
-                       cpu_set(j, sg->cpumask);
+                       cpumask_set_cpu(j, covered);
+                       cpumask_set_cpu(j, sched_group_cpus(sg));
                 }
                 if (!first)
                         first = sg;
@@@ -7016,9 -7113,10 +7130,10 @@@ static int find_next_best_node(int node
    * should be one that prevents unnecessary balancing, but also spreads tasks
    * out optimally.
    */
- static void sched_domain_node_span(int node, cpumask_t *span)
+ static void sched_domain_node_span(int node, struct cpumask *span)
   {
         nodemask_t used_nodes;
+       /* FIXME: use cpumask_of_node() */
         node_to_cpumask_ptr(nodemask, node);
         int i;
   
@@@ -7039,19 -7137,34 +7154,34 @@@
   
   int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
   
+ /*
+  * The cpus mask in sched_group and sched_domain hangs off the end.
+  * FIXME: use cpumask_var_t or dynamic percpu alloc to avoid wasting space
+  * for nr_cpu_ids < CONFIG_NR_CPUS.
+  */
+ struct static_sched_group {
+       struct sched_group sg;
+       DECLARE_BITMAP(cpus, CONFIG_NR_CPUS);
+ };
+ 
+ struct static_sched_domain {
+       struct sched_domain sd;
+       DECLARE_BITMAP(span, CONFIG_NR_CPUS);
+ };
+ 
   /*
    * SMT sched-domains:
    */
   #ifdef CONFIG_SCHED_SMT
- static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
- static DEFINE_PER_CPU(struct sched_group, sched_group_cpus);
+ static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains);
+ static DEFINE_PER_CPU(struct static_sched_group, sched_group_cpus);
   
   static int
- cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
-                cpumask_t *unused)
+ cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map,
+                struct sched_group **sg, struct cpumask *unused)
   {
         if (sg)
-               *sg = &per_cpu(sched_group_cpus, cpu);
+               *sg = &per_cpu(sched_group_cpus, cpu).sg;
         return cpu;
   }
   #endif /* CONFIG_SCHED_SMT */
@@@ -7060,56 -7173,55 +7190,55 @@@
    * multi-core sched-domains:
    */
   #ifdef CONFIG_SCHED_MC
- static DEFINE_PER_CPU(struct sched_domain, core_domains);
- static DEFINE_PER_CPU(struct sched_group, sched_group_core);
+ static DEFINE_PER_CPU(struct static_sched_domain, core_domains);
+ static DEFINE_PER_CPU(struct static_sched_group, sched_group_core);
   #endif /* CONFIG_SCHED_MC */
   
   #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT)
   static int
- cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
-                 cpumask_t *mask)
+ cpu_to_core_group(int cpu, const struct cpumask *cpu_map,
+                 struct sched_group **sg, struct cpumask *mask)
   {
         int group;
   
-       *mask = per_cpu(cpu_sibling_map, cpu);
-       cpus_and(*mask, *mask, *cpu_map);
-       group = first_cpu(*mask);
+       cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
+       group = cpumask_first(mask);
         if (sg)
-               *sg = &per_cpu(sched_group_core, group);
+               *sg = &per_cpu(sched_group_core, group).sg;
         return group;
   }
   #elif defined(CONFIG_SCHED_MC)
   static int
- cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
-                 cpumask_t *unused)
+ cpu_to_core_group(int cpu, const struct cpumask *cpu_map,
+                 struct sched_group **sg, struct cpumask *unused)
   {
         if (sg)
-               *sg = &per_cpu(sched_group_core, cpu);
+               *sg = &per_cpu(sched_group_core, cpu).sg;
         return cpu;
   }
   #endif
   
- static DEFINE_PER_CPU(struct sched_domain, phys_domains);
- static DEFINE_PER_CPU(struct sched_group, sched_group_phys);
+ static DEFINE_PER_CPU(struct static_sched_domain, phys_domains);
+ static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys);
   
   static int
- cpu_to_phys_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
-                 cpumask_t *mask)
+ cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
+                 struct sched_group **sg, struct cpumask *mask)
   {
         int group;
   #ifdef CONFIG_SCHED_MC
+       /* FIXME: Use cpu_coregroup_mask. */
         *mask = cpu_coregroup_map(cpu);
         cpus_and(*mask, *mask, *cpu_map);
-       group = first_cpu(*mask);
+       group = cpumask_first(mask);
   #elif defined(CONFIG_SCHED_SMT)
-       *mask = per_cpu(cpu_sibling_map, cpu);
-       cpus_and(*mask, *mask, *cpu_map);
-       group = first_cpu(*mask);
+       cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
+       group = cpumask_first(mask);
   #else
         group = cpu;
   #endif
         if (sg)
-               *sg = &per_cpu(sched_group_phys, group);
+               *sg = &per_cpu(sched_group_phys, group).sg;
         return group;
   }
   
@@@ -7123,19 -7235,21 +7252,21 @@@ static DEFINE_PER_CPU(struct sched_doma
   static struct sched_group ***sched_group_nodes_bycpu;
   
   static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
- static DEFINE_PER_CPU(struct sched_group, sched_group_allnodes);
+ static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes);
   
- static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map,
-                                struct sched_group **sg, cpumask_t *nodemask)
+ static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map,
+                                struct sched_group **sg,
+                                struct cpumask *nodemask)
   {
         int group;
+       /* FIXME: use cpumask_of_node */
+       node_to_cpumask_ptr(pnodemask, cpu_to_node(cpu));
   
-       *nodemask = node_to_cpumask(cpu_to_node(cpu));
-       cpus_and(*nodemask, *nodemask, *cpu_map);
-       group = first_cpu(*nodemask);
+       cpumask_and(nodemask, pnodemask, cpu_map);
+       group = cpumask_first(nodemask);
   
         if (sg)
-               *sg = &per_cpu(sched_group_allnodes, group);
+               *sg = &per_cpu(sched_group_allnodes, group).sg;
         return group;
   }
   
@@@ -7147,11 -7261,11 +7278,11 @@@ static void init_numa_sched_groups_powe
         if (!sg)
                 return;
         do {
-               for_each_cpu_mask_nr(j, sg->cpumask) {
+               for_each_cpu(j, sched_group_cpus(sg)) {
                         struct sched_domain *sd;
   
-                       sd = &per_cpu(phys_domains, j);
-                       if (j != first_cpu(sd->groups->cpumask)) {
+                       sd = &per_cpu(phys_domains, j).sd;
+                       if (j != cpumask_first(sched_group_cpus(sd->groups))) {
                                 /*
                                  * Only add "power" once for each
                                  * physical package.
@@@ -7168,11 -7282,12 +7299,12 @@@
   
   #ifdef CONFIG_NUMA
   /* Free memory allocated for various sched_group structures */
- static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask)
+ static void free_sched_groups(const struct cpumask *cpu_map,
+                             struct cpumask *nodemask)
   {
         int cpu, i;
   
-       for_each_cpu_mask_nr(cpu, *cpu_map) {
+       for_each_cpu(cpu, cpu_map) {
                 struct sched_group **sched_group_nodes
                         = sched_group_nodes_bycpu[cpu];
   
@@@ -7181,10 -7296,11 +7313,11 @@@
   
                 for (i = 0; i < nr_node_ids; i++) {
                         struct sched_group *oldsg, *sg = sched_group_nodes[i];
+                       /* FIXME: Use cpumask_of_node */
+                       node_to_cpumask_ptr(pnodemask, i);
   
-                       *nodemask = node_to_cpumask(i);
-                       cpus_and(*nodemask, *nodemask, *cpu_map);
-                       if (cpus_empty(*nodemask))
+                       cpus_and(*nodemask, *pnodemask, *cpu_map);
+                       if (cpumask_empty(nodemask))
                                 continue;
   
                         if (sg == NULL)
@@@ -7202,7 -7318,8 +7335,8 @@@ next_sg
         }
   }
   #else /* !CONFIG_NUMA */
- static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask)
+ static void free_sched_groups(const struct cpumask *cpu_map,
+                             struct cpumask *nodemask)
   {
   }
   #endif /* CONFIG_NUMA */
@@@ -7228,7 -7345,7 +7362,7 @@@ static void init_sched_groups_power(in
   
         WARN_ON(!sd || !sd->groups);
   
-       if (cpu != first_cpu(sd->groups->cpumask))
+       if (cpu != cpumask_first(sched_group_cpus(sd->groups)))
                 return;
   
         child = sd->child;
@@@ -7293,48 -7410,6 +7427,6 @@@ SD_INIT_FUNC(CPU
    SD_INIT_FUNC(MC)
   #endif
   
- /*
-  * To minimize stack usage kmalloc room for cpumasks and share the
-  * space as the usage in build_sched_domains() dictates.  Used only
-  * if the amount of space is significant.
-  */
- struct allmasks {
-       cpumask_t tmpmask;                      /* make this one first */
-       union {
-               cpumask_t nodemask;
-               cpumask_t this_sibling_map;
-               cpumask_t this_core_map;
-       };
-       cpumask_t send_covered;
- 
- #ifdef CONFIG_NUMA
-       cpumask_t domainspan;
-       cpumask_t covered;
-       cpumask_t notcovered;
- #endif
- };
- 
- #if   NR_CPUS > 128
- #define SCHED_CPUMASK_DECLARE(v)      struct allmasks *v
- static inline void sched_cpumask_alloc(struct allmasks **masks)
- {
-       *masks = kmalloc(sizeof(**masks), GFP_KERNEL);
- }
- static inline void sched_cpumask_free(struct allmasks *masks)
- {
-       kfree(masks);
- }
- #else
- #define SCHED_CPUMASK_DECLARE(v)      struct allmasks _v, *v = &_v
- static inline void sched_cpumask_alloc(struct allmasks **masks)
- { }
- static inline void sched_cpumask_free(struct allmasks *masks)
- { }
- #endif
- 
- #define       SCHED_CPUMASK_VAR(v, a)         cpumask_t *v = (cpumask_t *) \
-                       ((unsigned long)(a) + offsetof(struct allmasks, v))
- 
   static int default_relax_domain_level = -1;
   
   static int __init setup_relax_domain_level(char *str)
@@@ -7374,17 -7449,38 +7466,38 @@@ static void set_domain_attribute(struc
    * Build sched domains for a given set of cpus and attach the sched domains
    * to the individual cpus
    */
- static int __build_sched_domains(const cpumask_t *cpu_map,
+ static int __build_sched_domains(const struct cpumask *cpu_map,
                                  struct sched_domain_attr *attr)
   {
-       int i;
+       int i, err = -ENOMEM;
         struct root_domain *rd;
-       SCHED_CPUMASK_DECLARE(allmasks);
-       cpumask_t *tmpmask;
+       cpumask_var_t nodemask, this_sibling_map, this_core_map, send_covered,
+               tmpmask;
   #ifdef CONFIG_NUMA
+       cpumask_var_t domainspan, covered, notcovered;
         struct sched_group **sched_group_nodes = NULL;
         int sd_allnodes = 0;
   
+       if (!alloc_cpumask_var(&domainspan, GFP_KERNEL))
+               goto out;
+       if (!alloc_cpumask_var(&covered, GFP_KERNEL))
+               goto free_domainspan;
+       if (!alloc_cpumask_var(&notcovered, GFP_KERNEL))
+               goto free_covered;
+ #endif
+ 
+       if (!alloc_cpumask_var(&nodemask, GFP_KERNEL))
+               goto free_notcovered;
+       if (!alloc_cpumask_var(&this_sibling_map, GFP_KERNEL))
+               goto free_nodemask;
+       if (!alloc_cpumask_var(&this_core_map, GFP_KERNEL))
+               goto free_this_sibling_map;
+       if (!alloc_cpumask_var(&send_covered, GFP_KERNEL))
+               goto free_this_core_map;
+       if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL))
+               goto free_send_covered;
+ 
+ #ifdef CONFIG_NUMA
         /*
          * Allocate the per-node list of sched groups
          */
@@@ -7392,54 -7488,37 +7505,37 @@@
                                     GFP_KERNEL);
         if (!sched_group_nodes) {
                 printk(KERN_WARNING "Can not alloc sched group node list\n");
-               return -ENOMEM;
+               goto free_tmpmask;
         }
   #endif
   
         rd = alloc_rootdomain();
         if (!rd) {
                 printk(KERN_WARNING "Cannot alloc root domain\n");
- #ifdef CONFIG_NUMA
-               kfree(sched_group_nodes);
- #endif
-               return -ENOMEM;
+               goto free_sched_groups;
         }
   
-       /* get space for all scratch cpumask variables */
-       sched_cpumask_alloc(&allmasks);
-       if (!allmasks) {
-               printk(KERN_WARNING "Cannot alloc cpumask array\n");
-               kfree(rd);
   #ifdef CONFIG_NUMA
-               kfree(sched_group_nodes);
- #endif
-               return -ENOMEM;
-       }
- 
-       tmpmask = (cpumask_t *)allmasks;
- 
- 
- #ifdef CONFIG_NUMA
-       sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes;
+       sched_group_nodes_bycpu[cpumask_first(cpu_map)] = sched_group_nodes;
   #endif
   
         /*
          * Set up domains for cpus specified by the cpu_map.
          */
-       for_each_cpu_mask_nr(i, *cpu_map) {
+       for_each_cpu(i, cpu_map) {
                 struct sched_domain *sd = NULL, *p;
-               SCHED_CPUMASK_VAR(nodemask, allmasks);
   
+               /* FIXME: use cpumask_of_node */
                 *nodemask = node_to_cpumask(cpu_to_node(i));
                 cpus_and(*nodemask, *nodemask, *cpu_map);
   
   #ifdef CONFIG_NUMA
-               if (cpus_weight(*cpu_map) >
-                               SD_NODES_PER_DOMAIN*cpus_weight(*nodemask)) {
+               if (cpumask_weight(cpu_map) >
+                               SD_NODES_PER_DOMAIN*cpumask_weight(nodemask)) {
                         sd = &per_cpu(allnodes_domains, i);
                         SD_INIT(sd, ALLNODES);
                         set_domain_attribute(sd, attr);
-                       sd->span = *cpu_map;
+                       cpumask_copy(sched_domain_span(sd), cpu_map);
                         cpu_to_allnodes_group(i, cpu_map, &sd->groups, tmpmask);
                         p = sd;
                         sd_allnodes = 1;
@@@ -7449,18 -7528,19 +7545,19 @@@
                 sd = &per_cpu(node_domains, i);
                 SD_INIT(sd, NODE);
                 set_domain_attribute(sd, attr);
-               sched_domain_node_span(cpu_to_node(i), &sd->span);
+               sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd));
                 sd->parent = p;
                 if (p)
                         p->child = sd;
-               cpus_and(sd->span, sd->span, *cpu_map);
+               cpumask_and(sched_domain_span(sd),
+                           sched_domain_span(sd), cpu_map);
   #endif
   
                 p = sd;
-               sd = &per_cpu(phys_domains, i);
+               sd = &per_cpu(phys_domains, i).sd;
                 SD_INIT(sd, CPU);
                 set_domain_attribute(sd, attr);
-               sd->span = *nodemask;
+               cpumask_copy(sched_domain_span(sd), nodemask);
                 sd->parent = p;
                 if (p)
                         p->child = sd;
@@@ -7468,11 -7548,12 +7565,12 @@@
   
   #ifdef CONFIG_SCHED_MC
                 p = sd;
-               sd = &per_cpu(core_domains, i);
+               sd = &per_cpu(core_domains, i).sd;
                 SD_INIT(sd, MC);
                 set_domain_attribute(sd, attr);
-               sd->span = cpu_coregroup_map(i);
-               cpus_and(sd->span, sd->span, *cpu_map);
+               *sched_domain_span(sd) = cpu_coregroup_map(i);
+               cpumask_and(sched_domain_span(sd),
+                           sched_domain_span(sd), cpu_map);
                 sd->parent = p;
                 p->child = sd;
                 cpu_to_core_group(i, cpu_map, &sd->groups, tmpmask);
@@@ -7480,11 -7561,11 +7578,11 @@@
   
   #ifdef CONFIG_SCHED_SMT
                 p = sd;
-               sd = &per_cpu(cpu_domains, i);
+               sd = &per_cpu(cpu_domains, i).sd;
                 SD_INIT(sd, SIBLING);
                 set_domain_attribute(sd, attr);
-               sd->span = per_cpu(cpu_sibling_map, i);
-               cpus_and(sd->span, sd->span, *cpu_map);
+               cpumask_and(sched_domain_span(sd),
+                           &per_cpu(cpu_sibling_map, i), cpu_map);
                 sd->parent = p;
                 p->child = sd;
                 cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask);
@@@ -7493,13 -7574,10 +7591,10 @@@
   
   #ifdef CONFIG_SCHED_SMT
         /* Set up CPU (sibling) groups */
-       for_each_cpu_mask_nr(i, *cpu_map) {
-               SCHED_CPUMASK_VAR(this_sibling_map, allmasks);
-               SCHED_CPUMASK_VAR(send_covered, allmasks);
- 
-               *this_sibling_map = per_cpu(cpu_sibling_map, i);
-               cpus_and(*this_sibling_map, *this_sibling_map, *cpu_map);
-               if (i != first_cpu(*this_sibling_map))
+       for_each_cpu(i, cpu_map) {
+               cpumask_and(this_sibling_map,
+                           &per_cpu(cpu_sibling_map, i), cpu_map);
+               if (i != cpumask_first(this_sibling_map))
                         continue;
   
                 init_sched_build_groups(this_sibling_map, cpu_map,
@@@ -7510,13 -7588,11 +7605,11 @@@
   
   #ifdef CONFIG_SCHED_MC
         /* Set up multi-core groups */
-       for_each_cpu_mask_nr(i, *cpu_map) {
-               SCHED_CPUMASK_VAR(this_core_map, allmasks);
-               SCHED_CPUMASK_VAR(send_covered, allmasks);
- 
+       for_each_cpu(i, cpu_map) {
+               /* FIXME: Use cpu_coregroup_mask */
                 *this_core_map = cpu_coregroup_map(i);
                 cpus_and(*this_core_map, *this_core_map, *cpu_map);
-               if (i != first_cpu(*this_core_map))
+               if (i != cpumask_first(this_core_map))
                         continue;
   
                 init_sched_build_groups(this_core_map, cpu_map,
@@@ -7527,12 -7603,10 +7620,10 @@@
   
         /* Set up physical groups */
         for (i = 0; i < nr_node_ids; i++) {
-               SCHED_CPUMASK_VAR(nodemask, allmasks);
-               SCHED_CPUMASK_VAR(send_covered, allmasks);
- 
+               /* FIXME: Use cpumask_of_node */
                 *nodemask = node_to_cpumask(i);
                 cpus_and(*nodemask, *nodemask, *cpu_map);
-               if (cpus_empty(*nodemask))
+               if (cpumask_empty(nodemask))
                         continue;
   
                 init_sched_build_groups(nodemask, cpu_map,
@@@ -7543,8 -7617,6 +7634,6 @@@
   #ifdef CONFIG_NUMA
         /* Set up node groups */
         if (sd_allnodes) {
-               SCHED_CPUMASK_VAR(send_covered, allmasks);
- 
                 init_sched_build_groups(cpu_map, cpu_map,
                                         &cpu_to_allnodes_group,
                                         send_covered, tmpmask);
@@@ -7553,58 -7625,58 +7642,58 @@@
         for (i = 0; i < nr_node_ids; i++) {
                 /* Set up node groups */
                 struct sched_group *sg, *prev;
-               SCHED_CPUMASK_VAR(nodemask, allmasks);
-               SCHED_CPUMASK_VAR(domainspan, allmasks);
-               SCHED_CPUMASK_VAR(covered, allmasks);
                 int j;
   
+               /* FIXME: Use cpumask_of_node */
                 *nodemask = node_to_cpumask(i);
-               cpus_clear(*covered);
+               cpumask_clear(covered);
   
                 cpus_and(*nodemask, *nodemask, *cpu_map);
-               if (cpus_empty(*nodemask)) {
+               if (cpumask_empty(nodemask)) {
                         sched_group_nodes[i] = NULL;
                         continue;
                 }
   
                 sched_domain_node_span(i, domainspan);
-               cpus_and(*domainspan, *domainspan, *cpu_map);
+               cpumask_and(domainspan, domainspan, cpu_map);
   
-               sg = kmalloc_node(sizeof(struct sched_group), GFP_KERNEL, i);
+               sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(),
+                                 GFP_KERNEL, i);
                 if (!sg) {
                         printk(KERN_WARNING "Can not alloc domain group for "
                                 "node %d\n", i);
                         goto error;
                 }
                 sched_group_nodes[i] = sg;
-               for_each_cpu_mask_nr(j, *nodemask) {
+               for_each_cpu(j, nodemask) {
                         struct sched_domain *sd;
   
                         sd = &per_cpu(node_domains, j);
                         sd->groups = sg;
                 }
                 sg->__cpu_power = 0;
-               sg->cpumask = *nodemask;
+               cpumask_copy(sched_group_cpus(sg), nodemask);
                 sg->next = sg;
-               cpus_or(*covered, *covered, *nodemask);
+               cpumask_or(covered, covered, nodemask);
                 prev = sg;
   
                 for (j = 0; j < nr_node_ids; j++) {
-                       SCHED_CPUMASK_VAR(notcovered, allmasks);
                         int n = (i + j) % nr_node_ids;
+                       /* FIXME: Use cpumask_of_node */
                         node_to_cpumask_ptr(pnodemask, n);
   
-                       cpus_complement(*notcovered, *covered);
-                       cpus_and(*tmpmask, *notcovered, *cpu_map);
-                       cpus_and(*tmpmask, *tmpmask, *domainspan);
-                       if (cpus_empty(*tmpmask))
+                       cpumask_complement(notcovered, covered);
+                       cpumask_and(tmpmask, notcovered, cpu_map);
+                       cpumask_and(tmpmask, tmpmask, domainspan);
+                       if (cpumask_empty(tmpmask))
                                 break;
   
-                       cpus_and(*tmpmask, *tmpmask, *pnodemask);
-                       if (cpus_empty(*tmpmask))
+                       cpumask_and(tmpmask, tmpmask, pnodemask);
+                       if (cpumask_empty(tmpmask))
                                 continue;
   
-                       sg = kmalloc_node(sizeof(struct sched_group),
+                       sg = kmalloc_node(sizeof(struct sched_group) +
+                                         cpumask_size(),
                                           GFP_KERNEL, i);
                         if (!sg) {
                                 printk(KERN_WARNING
@@@ -7612,9 -7684,9 +7701,9 @@@
                                 goto error;
                         }
                         sg->__cpu_power = 0;
-                       sg->cpumask = *tmpmask;
+                       cpumask_copy(sched_group_cpus(sg), tmpmask);
                         sg->next = prev->next;
-                       cpus_or(*covered, *covered, *tmpmask);
+                       cpumask_or(covered, covered, tmpmask);
                         prev->next = sg;
                         prev = sg;
                 }
@@@ -7623,22 -7695,22 +7712,22 @@@
   
         /* Calculate CPU power for physical packages and nodes */
   #ifdef CONFIG_SCHED_SMT
-       for_each_cpu_mask_nr(i, *cpu_map) {
-               struct sched_domain *sd = &per_cpu(cpu_domains, i);
+       for_each_cpu(i, cpu_map) {
+               struct sched_domain *sd = &per_cpu(cpu_domains, i).sd;
   
                 init_sched_groups_power(i, sd);
         }
   #endif
   #ifdef CONFIG_SCHED_MC
-       for_each_cpu_mask_nr(i, *cpu_map) {
-               struct sched_domain *sd = &per_cpu(core_domains, i);
+       for_each_cpu(i, cpu_map) {
+               struct sched_domain *sd = &per_cpu(core_domains, i).sd;
   
                 init_sched_groups_power(i, sd);
         }
   #endif
   
-       for_each_cpu_mask_nr(i, *cpu_map) {
-               struct sched_domain *sd = &per_cpu(phys_domains, i);
+       for_each_cpu(i, cpu_map) {
+               struct sched_domain *sd = &per_cpu(phys_domains, i).sd;
   
                 init_sched_groups_power(i, sd);
         }
@@@ -7650,53 -7722,78 +7739,78 @@@
         if (sd_allnodes) {
                 struct sched_group *sg;
   
-               cpu_to_allnodes_group(first_cpu(*cpu_map), cpu_map, &sg,
+               cpu_to_allnodes_group(cpumask_first(cpu_map), cpu_map, &sg,
                                                                 tmpmask);
                 init_numa_sched_groups_power(sg);
         }
   #endif
   
         /* Attach the domains */
-       for_each_cpu_mask_nr(i, *cpu_map) {
+       for_each_cpu(i, cpu_map) {
                 struct sched_domain *sd;
   #ifdef CONFIG_SCHED_SMT
-               sd = &per_cpu(cpu_domains, i);
+               sd = &per_cpu(cpu_domains, i).sd;
   #elif defined(CONFIG_SCHED_MC)
-               sd = &per_cpu(core_domains, i);
+               sd = &per_cpu(core_domains, i).sd;
   #else
-               sd = &per_cpu(phys_domains, i);
+               sd = &per_cpu(phys_domains, i).sd;
   #endif
                 cpu_attach_domain(sd, rd, i);
         }
   
-       sched_cpumask_free(allmasks);
-       return 0;
+       err = 0;
+ 
+ free_tmpmask:
+       free_cpumask_var(tmpmask);
+ free_send_covered:
+       free_cpumask_var(send_covered);
+ free_this_core_map:
+       free_cpumask_var(this_core_map);
+ free_this_sibling_map:
+       free_cpumask_var(this_sibling_map);
+ free_nodemask:
+       free_cpumask_var(nodemask);
+ free_notcovered:
+ #ifdef CONFIG_NUMA
+       free_cpumask_var(notcovered);
+ free_covered:
+       free_cpumask_var(covered);
+ free_domainspan:
+       free_cpumask_var(domainspan);
+ out:
+ #endif
+       return err;
+ 
+ free_sched_groups:
+ #ifdef CONFIG_NUMA
+       kfree(sched_group_nodes);
+ #endif
+       goto free_tmpmask;
   
   #ifdef CONFIG_NUMA
   error:
         free_sched_groups(cpu_map, tmpmask);
-       sched_cpumask_free(allmasks);
-       kfree(rd);
-       return -ENOMEM;
+       free_rootdomain(rd);
+       goto free_tmpmask;
   #endif
   }
   
- static int build_sched_domains(const cpumask_t *cpu_map)
+ static int build_sched_domains(const struct cpumask *cpu_map)
   {
         return __build_sched_domains(cpu_map, NULL);
   }
   
- static cpumask_t *doms_cur;   /* current sched domains */
+ static struct cpumask *doms_cur;      /* current sched domains */
   static int ndoms_cur;         /* number of sched domains in 'doms_cur' */
   static struct sched_domain_attr *dattr_cur;
                                 /* attribues of custom domains in 'doms_cur' */
   
   /*
    * Special case: If a kmalloc of a doms_cur partition (array of
-  * cpumask_t) fails, then fallback to a single sched domain,
-  * as determined by the single cpumask_t fallback_doms.
+  * cpumask) fails, then fallback to a single sched domain,
+  * as determined by the single cpumask fallback_doms.
    */
- static cpumask_t fallback_doms;
+ static cpumask_var_t fallback_doms;
   
   /*
    * arch_update_cpu_topology lets virtualized architectures update the
@@@ -7713,16 -7810,16 +7827,16 @@@ int __attribute__((weak)) arch_update_c
    * For now this just excludes isolated cpus, but could be used to
    * exclude other special cases in the future.
    */
- static int arch_init_sched_domains(const cpumask_t *cpu_map)
+ static int arch_init_sched_domains(const struct cpumask *cpu_map)
   {
         int err;
   
         arch_update_cpu_topology();
         ndoms_cur = 1;
-       doms_cur = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
+       doms_cur = kmalloc(cpumask_size(), GFP_KERNEL);
         if (!doms_cur)
-               doms_cur = &fallback_doms;
-       cpus_andnot(*doms_cur, *cpu_map, cpu_isolated_map);
+               doms_cur = fallback_doms;
+       cpumask_andnot(doms_cur, cpu_map, cpu_isolated_map);
         dattr_cur = NULL;
         err = build_sched_domains(doms_cur);
         register_sched_domain_sysctl();
@@@ -7730,8 -7827,8 +7844,8 @@@
         return err;
   }
   
- static void arch_destroy_sched_domains(const cpumask_t *cpu_map,
-                                      cpumask_t *tmpmask)
+ static void arch_destroy_sched_domains(const struct cpumask *cpu_map,
+                                      struct cpumask *tmpmask)
   {
         free_sched_groups(cpu_map, tmpmask);
   }
@@@ -7740,15 -7837,16 +7854,16 @@@
    * Detach sched domains from a group of cpus specified in cpu_map
    * These cpus will now be attached to the NULL domain
    */
- static void detach_destroy_domains(const cpumask_t *cpu_map)
+ static void detach_destroy_domains(const struct cpumask *cpu_map)
   {
-       cpumask_t tmpmask;
+       /* Save because hotplug lock held. */
+       static DECLARE_BITMAP(tmpmask, CONFIG_NR_CPUS);
         int i;
   
-       for_each_cpu_mask_nr(i, *cpu_map)
+       for_each_cpu(i, cpu_map)
                 cpu_attach_domain(NULL, &def_root_domain, i);
         synchronize_sched();
-       arch_destroy_sched_domains(cpu_map, &tmpmask);
+       arch_destroy_sched_domains(cpu_map, to_cpumask(tmpmask));
   }
   
   /* handle null as "default" */
@@@ -7773,7 -7871,7 +7888,7 @@@ static int dattrs_equal(struct sched_do
    * doms_new[] to the current sched domain partitioning, doms_cur[].
    * It destroys each deleted domain and builds each new domain.
    *
-  * 'doms_new' is an array of cpumask_t's of length 'ndoms_new'.
+  * 'doms_new' is an array of cpumask's of length 'ndoms_new'.
    * The masks don't intersect (don't overlap.) We should setup one
    * sched domain for each mask. CPUs not in any of the cpumasks will
    * not be load balanced. If the same cpumask appears both in the
@@@ -7787,13 -7885,14 +7902,14 @@@
    * the single partition 'fallback_doms', it also forces the domains
    * to be rebuilt.
    *
-  * If doms_new == NULL it will be replaced with cpu_online_map.
+  * If doms_new == NULL it will be replaced with cpu_online_mask.
    * ndoms_new == 0 is a special case for destroying existing domains,
    * and it will not create the default domain.
    *
    * Call with hotplug lock held
    */
- void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
+ /* FIXME: Change to struct cpumask *doms_new[] */
+ void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
                              struct sched_domain_attr *dattr_new)
   {
         int i, j, n;
@@@ -7812,7 -7911,7 +7928,7 @@@
         /* Destroy deleted domains */
         for (i = 0; i < ndoms_cur; i++) {
                 for (j = 0; j < n && !new_topology; j++) {
-                       if (cpus_equal(doms_cur[i], doms_new[j])
+                       if (cpumask_equal(&doms_cur[i], &doms_new[j])
                             && dattrs_equal(dattr_cur, i, dattr_new, j))
                                 goto match1;
                 }
@@@ -7824,15 -7923,15 +7940,15 @@@ match1
   
         if (doms_new == NULL) {
                 ndoms_cur = 0;
-               doms_new = &fallback_doms;
-               cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
+               doms_new = fallback_doms;
+               cpumask_andnot(&doms_new[0], cpu_online_mask, cpu_isolated_map);
                 WARN_ON_ONCE(dattr_new);
         }
   
         /* Build new domains */
         for (i = 0; i < ndoms_new; i++) {
                 for (j = 0; j < ndoms_cur && !new_topology; j++) {
-                       if (cpus_equal(doms_new[i], doms_cur[j])
+                       if (cpumask_equal(&doms_new[i], &doms_cur[j])
                             && dattrs_equal(dattr_new, i, dattr_cur, j))
                                 goto match2;
                 }
@@@ -7844,7 -7943,7 +7960,7 @@@ match2
         }
   
         /* Remember the new sched domains */
-       if (doms_cur != &fallback_doms)
+       if (doms_cur != fallback_doms)
                 kfree(doms_cur);
         kfree(dattr_cur);       /* kfree(NULL) is safe */
         doms_cur = doms_new;
@@@ -7873,14 -7972,25 +7989,25 @@@ int arch_reinit_sched_domains(void
   static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
   {
         int ret;
+       unsigned int level = 0;
   
-       if (buf[0] != '0' && buf[0] != '1')
+       if (sscanf(buf, "%u", &level) != 1)
+               return -EINVAL;
+ 
+       /*
+        * level is always be positive so don't check for
+        * level < POWERSAVINGS_BALANCE_NONE which is 0
+        * What happens on 0 or 1 byte write,
+        * need to check for count as well?
+        */
+ 
+       if (level >= MAX_POWERSAVINGS_BALANCE_LEVELS)
                 return -EINVAL;
   
         if (smt)
-               sched_smt_power_savings = (buf[0] == '1');
+               sched_smt_power_savings = level;
         else
-               sched_mc_power_savings = (buf[0] == '1');
+               sched_mc_power_savings = level;
   
         ret = arch_reinit_sched_domains();
   
@@@ -7984,7 -8094,9 +8111,9 @@@ static int update_runtime(struct notifi
   
   void __init sched_init_smp(void)
   {
-       cpumask_t non_isolated_cpus;
+       cpumask_var_t non_isolated_cpus;
+ 
+       alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
   
   #if defined(CONFIG_NUMA)
         sched_group_nodes_bycpu = kzalloc(nr_cpu_ids * sizeof(void **),
@@@ -7993,10 -8105,10 +8122,10 @@@
   #endif
         get_online_cpus();
         mutex_lock(&sched_domains_mutex);
-       arch_init_sched_domains(&cpu_online_map);
-       cpus_andnot(non_isolated_cpus, cpu_possible_map, cpu_isolated_map);
-       if (cpus_empty(non_isolated_cpus))
-               cpu_set(smp_processor_id(), non_isolated_cpus);
+       arch_init_sched_domains(cpu_online_mask);
+       cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
+       if (cpumask_empty(non_isolated_cpus))
+               cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
         mutex_unlock(&sched_domains_mutex);
         put_online_cpus();
   
@@@ -8011,9 -8123,13 +8140,13 @@@
         init_hrtick();
   
         /* Move init over to a non-isolated CPU */
-       if (set_cpus_allowed_ptr(current, &non_isolated_cpus) < 0)
+       if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0)
                 BUG();
         sched_init_granularity();
+       free_cpumask_var(non_isolated_cpus);
+ 
+       alloc_cpumask_var(&fallback_doms, GFP_KERNEL);
+       init_sched_rt_class();
   }
   #else
   void __init sched_init_smp(void)
@@@ -8328,6 -8444,15 +8461,15 @@@ void __init sched_init(void
          */
         current->sched_class = &fair_sched_class;
   
+       /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */
+       alloc_bootmem_cpumask_var(&nohz_cpu_mask);
+ #ifdef CONFIG_SMP
+ #ifdef CONFIG_NO_HZ
+       alloc_bootmem_cpumask_var(&nohz.cpu_mask);
+ #endif
+       alloc_bootmem_cpumask_var(&cpu_isolated_map);
+ #endif /* SMP */
+ 
         scheduler_running = 1;
   }
   
@@@ -9298,41 -9423,6 +9440,41 @@@ cpuacct_destroy(struct cgroup_subsys *s
         kfree(ca);
   }
   
+ +static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
+ +{
+ +      u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
+ +      u64 data;
+ +
+ +#ifndef CONFIG_64BIT
+ +      /*
+ +       * Take rq->lock to make 64-bit read safe on 32-bit platforms.
+ +       */
+ +      spin_lock_irq(&cpu_rq(cpu)->lock);
+ +      data = *cpuusage;
+ +      spin_unlock_irq(&cpu_rq(cpu)->lock);
+ +#else
+ +      data = *cpuusage;
+ +#endif
+ +
+ +      return data;
+ +}
+ +
+ +static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
+ +{
+ +      u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
+ +
+ +#ifndef CONFIG_64BIT
+ +      /*
+ +       * Take rq->lock to make 64-bit write safe on 32-bit platforms.
+ +       */
+ +      spin_lock_irq(&cpu_rq(cpu)->lock);
+ +      *cpuusage = val;
+ +      spin_unlock_irq(&cpu_rq(cpu)->lock);
+ +#else
+ +      *cpuusage = val;
+ +#endif
+ +}
+ +
   /* return total cpu usage (in nanoseconds) of a group */
   static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft)
   {
@@@ -9340,8 -9430,17 +9482,8 @@@
         u64 totalcpuusage = 0;
         int i;
   
- -      for_each_possible_cpu(i) {
- -              u64 *cpuusage = percpu_ptr(ca->cpuusage, i);
- -
- -              /*
- -               * Take rq->lock to make 64-bit addition safe on 32-bit
- -               * platforms.
- -               */
- -              spin_lock_irq(&cpu_rq(i)->lock);
- -              totalcpuusage += *cpuusage;
- -              spin_unlock_irq(&cpu_rq(i)->lock);
- -      }
+ +      for_each_present_cpu(i)
+ +              totalcpuusage += cpuacct_cpuusage_read(ca, i);
   
         return totalcpuusage;
   }
@@@ -9358,39 -9457,23 +9500,39 @@@ static int cpuusage_write(struct cgrou
                 goto out;
         }
   
- -      for_each_possible_cpu(i) {
- -              u64 *cpuusage = percpu_ptr(ca->cpuusage, i);
+ +      for_each_present_cpu(i)
+ +              cpuacct_cpuusage_write(ca, i, 0);
   
- -              spin_lock_irq(&cpu_rq(i)->lock);
- -              *cpuusage = 0;
- -              spin_unlock_irq(&cpu_rq(i)->lock);
- -      }
   out:
         return err;
   }
   
+ +static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft,
+ +                                 struct seq_file *m)
+ +{
+ +      struct cpuacct *ca = cgroup_ca(cgroup);
+ +      u64 percpu;
+ +      int i;
+ +
+ +      for_each_present_cpu(i) {
+ +              percpu = cpuacct_cpuusage_read(ca, i);
+ +              seq_printf(m, "%llu ", (unsigned long long) percpu);
+ +      }
+ +      seq_printf(m, "\n");
+ +      return 0;
+ +}
+ +
   static struct cftype files[] = {
         {
                 .name = "usage",
                 .read_u64 = cpuusage_read,
                 .write_u64 = cpuusage_write,
         },
+ +      {
+ +              .name = "usage_percpu",
+ +              .read_seq_string = cpuacct_percpu_seq_read,
+ +      },
+ +
   };
   
   static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
diff --combined kernel/sched_fair.c

index 5ad4440f0fc44798b84417fa56463dd19ad81267,36b5e34fa99ecf7aa4dcb9b7bdaabbf938beebb9..56c0efe902a79bca1c578aa7fcd5e7d5f0df3144
--- 1/kernel/sched_fair.c
--- 2/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@@ -492,8 -492,6 +492,8 @@@ static void update_curr(struct cfs_rq *
          * overflow on 32 bits):
          */
         delta_exec = (unsigned long)(now - curr->exec_start);
+ +      if (!delta_exec)
+ +              return;
   
         __update_curr(cfs_rq, curr, delta_exec);
         curr->exec_start = now;
@@@ -1019,16 -1017,33 +1019,33 @@@ static void yield_task_fair(struct rq *
    * search starts with cpus closest then further out as needed,
    * so we always favor a closer, idle cpu.
    * Domains may include CPUs that are not usable for migration,
-  * hence we need to mask them out (cpu_active_map)
+  * hence we need to mask them out (cpu_active_mask)
    *
    * Returns the CPU we should wake onto.
    */
   #if defined(ARCH_HAS_SCHED_WAKE_IDLE)
   static int wake_idle(int cpu, struct task_struct *p)
   {
-       cpumask_t tmp;
         struct sched_domain *sd;
         int i;
+       unsigned int chosen_wakeup_cpu;
+       int this_cpu;
+ 
+       /*
+        * At POWERSAVINGS_BALANCE_WAKEUP level, if both this_cpu and prev_cpu
+        * are idle and this is not a kernel thread and this task's affinity
+        * allows it to be moved to preferred cpu, then just move!
+        */
+ 
+       this_cpu = smp_processor_id();
+       chosen_wakeup_cpu =
+               cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu;
+ 
+       if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP &&
+               idle_cpu(cpu) && idle_cpu(this_cpu) &&
+               p->mm && !(p->flags & PF_KTHREAD) &&
+               cpu_isset(chosen_wakeup_cpu, p->cpus_allowed))
+               return chosen_wakeup_cpu;
   
         /*
          * If it is idle, then it is the best cpu to run this task.
@@@ -1046,10 -1061,9 +1063,9 @@@
                 if ((sd->flags & SD_WAKE_IDLE)
                     || ((sd->flags & SD_WAKE_IDLE_FAR)
                         && !task_hot(p, task_rq(p)->clock, sd))) {
-                       cpus_and(tmp, sd->span, p->cpus_allowed);
-                       cpus_and(tmp, tmp, cpu_active_map);
-                       for_each_cpu_mask_nr(i, tmp) {
-                               if (idle_cpu(i)) {
+                       for_each_cpu_and(i, sched_domain_span(sd),
+                                        &p->cpus_allowed) {
+                               if (cpu_active(i) && idle_cpu(i)) {
                                         if (i != task_cpu(p)) {
                                                 schedstat_inc(p,
                                                        se.nr_wakeups_idle);
@@@ -1242,13 -1256,13 +1258,13 @@@ static int select_task_rq_fair(struct t
          * this_cpu and prev_cpu are present in:
          */
         for_each_domain(this_cpu, sd) {
-               if (cpu_isset(prev_cpu, sd->span)) {
+               if (cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) {
                         this_sd = sd;
                         break;
                 }
         }
   
-       if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
+       if (unlikely(!cpumask_test_cpu(this_cpu, &p->cpus_allowed)))
                 goto out;
   
         /*
@@@ -1347,11 -1361,12 +1363,11 @@@ static void check_preempt_wakeup(struc
   {
         struct task_struct *curr = rq->curr;
         struct sched_entity *se = &curr->se, *pse = &p->se;
+ +      struct cfs_rq *cfs_rq = task_cfs_rq(curr);
   
- -      if (unlikely(rt_prio(p->prio))) {
- -              struct cfs_rq *cfs_rq = task_cfs_rq(curr);
+ +      update_curr(cfs_rq);
   
- -              update_rq_clock(rq);
- -              update_curr(cfs_rq);
+ +      if (unlikely(rt_prio(p->prio))) {
                 resched_task(curr);
                 return;
         }
diff --combined kernel/sched_rt.c

index 51d2af3e6191e0680e7acb894dd48a1b26381e51,1bbd99014011894766334eee96da13d6fbc4a8de..833b6d44483c57bb4473b9eddf448a2366302b6a
--- 1/kernel/sched_rt.c
--- 2/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@@ -15,7 -15,7 +15,7 @@@ static inline void rt_set_overload(stru
         if (!rq->online)
                 return;
   
-       cpu_set(rq->cpu, rq->rd->rto_mask);
+       cpumask_set_cpu(rq->cpu, rq->rd->rto_mask);
         /*
          * Make sure the mask is visible before we set
          * the overload count. That is checked to determine
@@@ -34,7 -34,7 +34,7 @@@ static inline void rt_clear_overload(st
   
         /* the order here really doesn't matter */
         atomic_dec(&rq->rd->rto_count);
-       cpu_clear(rq->cpu, rq->rd->rto_mask);
+       cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask);
   }
   
   static void update_rt_migration(struct rq *rq)
@@@ -77,7 -77,7 +77,7 @@@ static inline u64 sched_rt_period(struc
   }
   
   #define for_each_leaf_rt_rq(rt_rq, rq) \
- -      list_for_each_entry(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list)
+ +      list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list)
   
   static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
   {
@@@ -139,14 -139,14 +139,14 @@@ static int rt_se_boosted(struct sched_r
   }
   
   #ifdef CONFIG_SMP
- static inline cpumask_t sched_rt_period_mask(void)
+ static inline const struct cpumask *sched_rt_period_mask(void)
   {
         return cpu_rq(smp_processor_id())->rd->span;
   }
   #else
- static inline cpumask_t sched_rt_period_mask(void)
+ static inline const struct cpumask *sched_rt_period_mask(void)
   {
-       return cpu_online_map;
+       return cpu_online_mask;
   }
   #endif
   
@@@ -212,9 -212,9 +212,9 @@@ static inline int rt_rq_throttled(struc
         return rt_rq->rt_throttled;
   }
   
- static inline cpumask_t sched_rt_period_mask(void)
+ static inline const struct cpumask *sched_rt_period_mask(void)
   {
-       return cpu_online_map;
+       return cpu_online_mask;
   }
   
   static inline
@@@ -241,11 -241,11 +241,11 @@@ static int do_balance_runtime(struct rt
         int i, weight, more = 0;
         u64 rt_period;
   
-       weight = cpus_weight(rd->span);
+       weight = cpumask_weight(rd->span);
   
         spin_lock(&rt_b->rt_runtime_lock);
         rt_period = ktime_to_ns(rt_b->rt_period);
-       for_each_cpu_mask_nr(i, rd->span) {
+       for_each_cpu(i, rd->span) {
                 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
                 s64 diff;
   
@@@ -324,7 -324,7 +324,7 @@@ static void __disable_runtime(struct r
                 /*
                  * Greedy reclaim, take back as much as we can.
                  */
-               for_each_cpu_mask(i, rd->span) {
+               for_each_cpu(i, rd->span) {
                         struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
                         s64 diff;
   
@@@ -429,13 -429,13 +429,13 @@@ static inline int balance_runtime(struc
   static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
   {
         int i, idle = 1;
-       cpumask_t span;
+       const struct cpumask *span;
   
         if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
                 return 1;
   
         span = sched_rt_period_mask();
-       for_each_cpu_mask(i, span) {
+       for_each_cpu(i, span) {
                 int enqueue = 0;
                 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
                 struct rq *rq = rq_of_rt_rq(rt_rq);
@@@ -805,17 -805,20 +805,20 @@@ static int select_task_rq_rt(struct tas
   
   static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
   {
-       cpumask_t mask;
+       cpumask_var_t mask;
   
         if (rq->curr->rt.nr_cpus_allowed == 1)
                 return;
   
-       if (p->rt.nr_cpus_allowed != 1
-           && cpupri_find(&rq->rd->cpupri, p, &mask))
+       if (!alloc_cpumask_var(&mask, GFP_ATOMIC))
                 return;
   
-       if (!cpupri_find(&rq->rd->cpupri, rq->curr, &mask))
-               return;
+       if (p->rt.nr_cpus_allowed != 1
+           && cpupri_find(&rq->rd->cpupri, p, mask))
+               goto free;
+ 
+       if (!cpupri_find(&rq->rd->cpupri, rq->curr, mask))
+               goto free;
   
         /*
          * There appears to be other cpus that can accept
@@@ -824,6 -827,8 +827,8 @@@
          */
         requeue_task_rt(rq, p, 1);
         resched_task(rq->curr);
+ free:
+       free_cpumask_var(mask);
   }
   
   #endif /* CONFIG_SMP */
@@@ -914,7 -919,7 +919,7 @@@ static void deactivate_task(struct rq *
   static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
   {
         if (!task_running(rq, p) &&
-           (cpu < 0 || cpu_isset(cpu, p->cpus_allowed)) &&
+           (cpu < 0 || cpumask_test_cpu(cpu, &p->cpus_allowed)) &&
             (p->rt.nr_cpus_allowed > 1))
                 return 1;
         return 0;
@@@ -953,7 -958,7 +958,7 @@@ static struct task_struct *pick_next_hi
         return next;
   }
   
- static DEFINE_PER_CPU(cpumask_t, local_cpu_mask);
+ static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
   
   static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
   {
@@@ -973,7 -978,7 +978,7 @@@
   static int find_lowest_rq(struct task_struct *task)
   {
         struct sched_domain *sd;
-       cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask);
+       struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask);
         int this_cpu = smp_processor_id();
         int cpu      = task_cpu(task);
   
@@@ -988,7 -993,7 +993,7 @@@
          * I guess we might want to change cpupri_find() to ignore those
          * in the first place.
          */
-       cpus_and(*lowest_mask, *lowest_mask, cpu_active_map);
+       cpumask_and(lowest_mask, lowest_mask, cpu_active_mask);
   
         /*
          * At this point we have built a mask of cpus representing the
@@@ -998,7 -1003,7 +1003,7 @@@
          * We prioritize the last cpu that the task executed on since
          * it is most likely cache-hot in that location.
          */
-       if (cpu_isset(cpu, *lowest_mask))
+       if (cpumask_test_cpu(cpu, lowest_mask))
                 return cpu;
   
         /*
@@@ -1013,7 -1018,8 +1018,8 @@@
                         cpumask_t domain_mask;
                         int       best_cpu;
   
-                       cpus_and(domain_mask, sd->span, *lowest_mask);
+                       cpumask_and(&domain_mask, sched_domain_span(sd),
+                                   lowest_mask);
   
                         best_cpu = pick_optimal_cpu(this_cpu,
                                                     &domain_mask);
@@@ -1054,8 -1060,8 +1060,8 @@@ static struct rq *find_lock_lowest_rq(s
                          * Also make sure that it wasn't scheduled on its rq.
                          */
                         if (unlikely(task_rq(task) != rq ||
-                                    !cpu_isset(lowest_rq->cpu,
-                                               task->cpus_allowed) ||
+                                    !cpumask_test_cpu(lowest_rq->cpu,
+                                                      &task->cpus_allowed) ||
                                      task_running(rq, task) ||
                                      !task->se.on_rq)) {
   
@@@ -1176,7 -1182,7 +1182,7 @@@ static int pull_rt_task(struct rq *this
   
         next = pick_next_task_rt(this_rq);
   
-       for_each_cpu_mask_nr(cpu, this_rq->rd->rto_mask) {
+       for_each_cpu(cpu, this_rq->rd->rto_mask) {
                 if (this_cpu == cpu)
                         continue;
   
@@@ -1305,9 -1311,9 +1311,9 @@@ move_one_task_rt(struct rq *this_rq, in
   }
   
   static void set_cpus_allowed_rt(struct task_struct *p,
-                               const cpumask_t *new_mask)
+                               const struct cpumask *new_mask)
   {
-       int weight = cpus_weight(*new_mask);
+       int weight = cpumask_weight(new_mask);
   
         BUG_ON(!rt_task(p));
   
@@@ -1328,7 -1334,7 +1334,7 @@@
                 update_rt_migration(rq);
         }
   
-       p->cpus_allowed    = *new_mask;
+       cpumask_copy(&p->cpus_allowed, new_mask);
         p->rt.nr_cpus_allowed = weight;
   }
   
@@@ -1371,6 -1377,14 +1377,14 @@@ static void switched_from_rt(struct rq 
         if (!rq->rt.rt_nr_running)
                 pull_rt_task(rq);
   }
+ 
+ static inline void init_sched_rt_class(void)
+ {
+       unsigned int i;
+ 
+       for_each_possible_cpu(i)
+               alloc_cpumask_var(&per_cpu(local_cpu_mask, i), GFP_KERNEL);
+ }
   #endif /* CONFIG_SMP */
   
   /*
@@@ -1541,3 -1555,4 +1555,4 @@@ static void print_rt_stats(struct seq_f
         rcu_read_unlock();
   }
   #endif /* CONFIG_SCHED_DEBUG */
+ 
diff --combined kernel/sched_stats.h

index 3b01098164c8963b47ad04d27f68eb7b711a37dc,5fcf0e18458670e8e56a259cefb096ac38a7c95c..f2773b5d12260bbb10c3b293bd4fd1811b483848
--- 1/kernel/sched_stats.h
--- 2/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@@ -31,7 -31,7 +31,7 @@@ static int show_schedstat(struct seq_fi
                     rq->yld_act_empty, rq->yld_exp_empty, rq->yld_count,
                     rq->sched_switch, rq->sched_count, rq->sched_goidle,
                     rq->ttwu_count, rq->ttwu_local,
- -                  rq->rq_sched_info.cpu_time,
+ +                  rq->rq_cpu_time,
                     rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount);
   
                 seq_printf(seq, "\n");
@@@ -42,7 -42,8 +42,8 @@@
                 for_each_domain(cpu, sd) {
                         enum cpu_idle_type itype;
   
-                       cpumask_scnprintf(mask_str, mask_len, sd->span);
+                       cpumask_scnprintf(mask_str, mask_len,
+                                         sched_domain_span(sd));
                         seq_printf(seq, "domain%d %s", dcount++, mask_str);
                         for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES;
                                         itype++) {
@@@ -123,7 -124,7 +124,7 @@@ static inline voi
   rq_sched_info_depart(struct rq *rq, unsigned long long delta)
   {
         if (rq)
- -              rq->rq_sched_info.cpu_time += delta;
+ +              rq->rq_cpu_time += delta;
   }
   
   static inline void
@@@ -236,6 -237,7 +237,6 @@@ static inline void sched_info_depart(st
         unsigned long long delta = task_rq(t)->clock -
                                         t->sched_info.last_arrival;
   
- -      t->sched_info.cpu_time += delta;
         rq_sched_info_depart(task_rq(t), delta);
   
         if (t->state == TASK_RUNNING)
diff --combined kernel/time/tick-sched.c

index 8f3fc2582d38b7073927e9dc004ef38e16909ab0,70f872c71f4e47dbaef945cc99990fc9ce01d1db..76a574bbef97270672dee229b81b77872ba319e6
--- 1/kernel/time/tick-sched.c
--- 2/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@@ -144,7 -144,7 +144,7 @@@ void tick_nohz_update_jiffies(void
         if (!ts->tick_stopped)
                 return;
   
-       cpu_clear(cpu, nohz_cpu_mask);
+       cpumask_clear_cpu(cpu, nohz_cpu_mask);
         now = ktime_get();
         ts->idle_waketime = now;
   
@@@ -247,7 -247,7 +247,7 @@@ void tick_nohz_stop_sched_tick(int inid
         if (need_resched())
                 goto end;
   
- -      if (unlikely(local_softirq_pending())) {
+ +      if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
                 static int ratelimit;
   
                 if (ratelimit < 10) {
@@@ -282,31 -282,8 +282,31 @@@
         /* Schedule the tick, if we are at least one jiffie off */
         if ((long)delta_jiffies >= 1) {
   
+ +              /*
+ +              * calculate the expiry time for the next timer wheel
+ +              * timer
+ +              */
+ +              expires = ktime_add_ns(last_update, tick_period.tv64 *
+ +                                 delta_jiffies);
+ +
+ +              /*
+ +               * If this cpu is the one which updates jiffies, then
+ +               * give up the assignment and let it be taken by the
+ +               * cpu which runs the tick timer next, which might be
+ +               * this cpu as well. If we don't drop this here the
+ +               * jiffies might be stale and do_timer() never
+ +               * invoked.
+ +               */
+ +              if (cpu == tick_do_timer_cpu)
+ +                      tick_do_timer_cpu = TICK_DO_TIMER_NONE;
+ +
                 if (delta_jiffies > 1)
-                       cpu_set(cpu, nohz_cpu_mask);
+                       cpumask_set_cpu(cpu, nohz_cpu_mask);
+ +
+ +              /* Skip reprogram of event if its not changed */
+ +              if (ts->tick_stopped && ktime_equal(expires, dev->next_event))
+ +                      goto out;
+ +
                 /*
                  * nohz_stop_sched_tick can be called several times before
                  * the nohz_restart_sched_tick is called. This happens when
@@@ -319,7 -296,7 +319,7 @@@
                                 /*
                                  * sched tick not stopped!
                                  */
-                               cpu_clear(cpu, nohz_cpu_mask);
+                               cpumask_clear_cpu(cpu, nohz_cpu_mask);
                                 goto out;
                         }
   
@@@ -329,6 -306,17 +329,6 @@@
                         rcu_enter_nohz();
                 }
   
- -              /*
- -               * If this cpu is the one which updates jiffies, then
- -               * give up the assignment and let it be taken by the
- -               * cpu which runs the tick timer next, which might be
- -               * this cpu as well. If we don't drop this here the
- -               * jiffies might be stale and do_timer() never
- -               * invoked.
- -               */
- -              if (cpu == tick_do_timer_cpu)
- -                      tick_do_timer_cpu = TICK_DO_TIMER_NONE;
- -
                 ts->idle_sleeps++;
   
                 /*
@@@ -344,7 -332,12 +344,7 @@@
                         goto out;
                 }
   
- -              /*
- -               * calculate the expiry time for the next timer wheel
- -               * timer
- -               */
- -              expires = ktime_add_ns(last_update, tick_period.tv64 *
- -                                     delta_jiffies);
+ +              /* Mark expiries */
                 ts->idle_expires = expires;
   
                 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
@@@ -361,7 -354,7 +361,7 @@@
                  * softirq.
                  */
                 tick_do_update_jiffies64(ktime_get());
-               cpu_clear(cpu, nohz_cpu_mask);
+               cpumask_clear_cpu(cpu, nohz_cpu_mask);
         }
         raise_softirq_irqoff(TIMER_SOFTIRQ);
   out:
@@@ -439,7 -432,7 +439,7 @@@ void tick_nohz_restart_sched_tick(void
         select_nohz_load_balancer(0);
         now = ktime_get();
         tick_do_update_jiffies64(now);
-       cpu_clear(cpu, nohz_cpu_mask);
+       cpumask_clear_cpu(cpu, nohz_cpu_mask);
   
         /*
          * We stopped the tick in idle. Update process times would miss the
@@@ -688,6 -681,7 +688,6 @@@ void tick_setup_sched_timer(void
          */
         hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
         ts->sched_timer.function = tick_sched_timer;
- -      ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
   
         /* Get the next period (per cpu) */
         hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
diff --combined kernel/trace/trace.c

index 4185d5221633f19755efb818a1fb969a69d0d29c,6adf660fc8163c83cafb2fde2bea12fcf80a7bd5..0e91f43b6baf53cac177af4ca02f0939997c1a0c
--- 1/kernel/trace/trace.c
--- 2/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@@ -30,6 -30,7 +30,6 @@@
   #include <linux/gfp.h>
   #include <linux/fs.h>
   #include <linux/kprobes.h>
- -#include <linux/seq_file.h>
   #include <linux/writeback.h>
   
   #include <linux/stacktrace.h>
@@@ -286,7 -287,6 +286,7 @@@ static const char *trace_options[] = 
         "annotate",
         "userstacktrace",
         "sym-userobj",
+ +      "printk-msg-only",
         NULL
   };
   
@@@ -320,7 -320,7 +320,7 @@@ __update_max_tr(struct trace_array *tr
   
         memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
         data->pid = tsk->pid;
- -      data->uid = tsk->uid;
+ +      data->uid = task_uid(tsk);
         data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
         data->policy = tsk->policy;
         data->rt_priority = tsk->rt_priority;
@@@ -678,16 -678,6 +678,16 @@@ void tracing_reset(struct trace_array *
         ftrace_enable_cpu();
   }
   
+ +void tracing_reset_online_cpus(struct trace_array *tr)
+ +{
+ +      int cpu;
+ +
+ +      tr->time_start = ftrace_now(tr->cpu);
+ +
+ +      for_each_online_cpu(cpu)
+ +              tracing_reset(tr, cpu);
+ +}
+ +
   #define SAVED_CMDLINES 128
   static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
   static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
@@@ -1309,7 -1299,7 +1309,7 @@@ enum trace_file_type 
         TRACE_FILE_ANNOTATE     = 2,
   };
   
- -static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
+ +static void trace_iterator_increment(struct trace_iterator *iter)
   {
         /* Don't allow ftrace to trace into the ring buffers */
         ftrace_disable_cpu();
@@@ -1388,7 -1378,7 +1388,7 @@@ static void *find_next_entry_inc(struc
         iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts);
   
         if (iter->ent)
- -              trace_iterator_increment(iter, iter->cpu);
+ +              trace_iterator_increment(iter);
   
         return iter->ent ? iter : NULL;
   }
@@@ -1757,13 -1747,6 +1757,13 @@@ lat_print_timestamp(struct trace_seq *s
   
   static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
   
+ +static int task_state_char(unsigned long state)
+ +{
+ +      int bit = state ? __ffs(state) + 1 : 0;
+ +
+ +      return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?';
+ +}
+ +
   /*
    * The message is supposed to contain an ending newline.
    * If the printing stops prematurely, try to add a newline of our own.
@@@ -1832,6 -1815,7 +1832,6 @@@ print_lat_fmt(struct trace_iterator *it
         char *comm;
         int S, T;
         int i;
- -      unsigned state;
   
         if (entry->type == TRACE_CONT)
                 return TRACE_TYPE_HANDLED;
@@@ -1877,8 -1861,12 +1877,8 @@@
   
                 trace_assign_type(field, entry);
   
- -              T = field->next_state < sizeof(state_to_char) ?
- -                      state_to_char[field->next_state] : 'X';
- -
- -              state = field->prev_state ?
- -                      __ffs(field->prev_state) + 1 : 0;
- -              S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X';
+ +              T = task_state_char(field->next_state);
+ +              S = task_state_char(field->prev_state);
                 comm = trace_find_cmdline(field->next_pid);
                 trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
                                  field->prev_pid,
@@@ -2019,8 -2007,10 +2019,8 @@@ static enum print_line_t print_trace_fm
   
                 trace_assign_type(field, entry);
   
- -              S = field->prev_state < sizeof(state_to_char) ?
- -                      state_to_char[field->prev_state] : 'X';
- -              T = field->next_state < sizeof(state_to_char) ?
- -                      state_to_char[field->next_state] : 'X';
+ +              T = task_state_char(field->next_state);
+ +              S = task_state_char(field->prev_state);
                 ret = trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c\n",
                                        field->prev_pid,
                                        field->prev_prio,
@@@ -2150,9 -2140,12 +2150,9 @@@ static enum print_line_t print_raw_fmt(
   
                 trace_assign_type(field, entry);
   
- -              S = field->prev_state < sizeof(state_to_char) ?
- -                      state_to_char[field->prev_state] : 'X';
- -              T = field->next_state < sizeof(state_to_char) ?
- -                      state_to_char[field->next_state] : 'X';
- -              if (entry->type == TRACE_WAKE)
- -                      S = '+';
+ +              T = task_state_char(field->next_state);
+ +              S = entry->type == TRACE_WAKE ? '+' :
+ +                      task_state_char(field->prev_state);
                 ret = trace_seq_printf(s, "%d %d %c %d %d %d %c\n",
                                        field->prev_pid,
                                        field->prev_prio,
@@@ -2239,9 -2232,12 +2239,9 @@@ static enum print_line_t print_hex_fmt(
   
                 trace_assign_type(field, entry);
   
- -              S = field->prev_state < sizeof(state_to_char) ?
- -                      state_to_char[field->prev_state] : 'X';
- -              T = field->next_state < sizeof(state_to_char) ?
- -                      state_to_char[field->next_state] : 'X';
- -              if (entry->type == TRACE_WAKE)
- -                      S = '+';
+ +              T = task_state_char(field->next_state);
+ +              S = entry->type == TRACE_WAKE ? '+' :
+ +                      task_state_char(field->prev_state);
                 SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
                 SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
                 SEQ_PUT_HEX_FIELD_RET(s, S);
@@@ -2269,25 -2265,6 +2269,25 @@@
         return TRACE_TYPE_HANDLED;
   }
   
+ +static enum print_line_t print_printk_msg_only(struct trace_iterator *iter)
+ +{
+ +      struct trace_seq *s = &iter->seq;
+ +      struct trace_entry *entry = iter->ent;
+ +      struct print_entry *field;
+ +      int ret;
+ +
+ +      trace_assign_type(field, entry);
+ +
+ +      ret = trace_seq_printf(s, field->buf);
+ +      if (!ret)
+ +              return TRACE_TYPE_PARTIAL_LINE;
+ +
+ +      if (entry->flags & TRACE_FLAG_CONT)
+ +              trace_seq_print_cont(s, iter);
+ +
+ +      return TRACE_TYPE_HANDLED;
+ +}
+ +
   static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
   {
         struct trace_seq *s = &iter->seq;
@@@ -2368,11 -2345,6 +2368,11 @@@ static enum print_line_t print_trace_li
                         return ret;
         }
   
+ +      if (iter->ent->type == TRACE_PRINT &&
+ +                      trace_flags & TRACE_ITER_PRINTK &&
+ +                      trace_flags & TRACE_ITER_PRINTK_MSGONLY)
+ +              return print_printk_msg_only(iter);
+ +
         if (trace_flags & TRACE_ITER_BIN)
                 return print_bin_fmt(iter);
   
@@@ -2453,7 -2425,7 +2453,7 @@@ __tracing_open(struct inode *inode, str
   
         /* Notify the tracer early; before we stop tracing. */
         if (iter->trace && iter->trace->open)
- -                      iter->trace->open(iter);
+ +              iter->trace->open(iter);
   
         /* Annotate start of buffers if we had overruns */
         if (ring_buffer_overruns(iter->tr->buffer))
@@@ -2674,7 -2646,7 +2674,7 @@@ tracing_cpumask_read(struct file *filp
   
         mutex_lock(&tracing_cpumask_update_lock);
   
-       len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
+       len = cpumask_scnprintf(mask_str, count, &tracing_cpumask);
         if (count - len < 2) {
                 count = -EINVAL;
                 goto out_err;
@@@ -2695,7 -2667,7 +2695,7 @@@ tracing_cpumask_write(struct file *filp
         int err, cpu;
   
         mutex_lock(&tracing_cpumask_update_lock);
-       err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
+       err = cpumask_parse_user(ubuf, count, &tracing_cpumask_new);
         if (err)
                 goto err_unlock;
   
diff --combined lib/Kconfig

index fd4118e097f0711c9c08cf6e08575678fcc5ec16,7823f8342abf1c8f403a251b56d428dc9bf703a5..2ba43c4a5b075a28cb3dd05ae3f5db46fee23661
--- 1/lib/Kconfig
--- 2/lib/Kconfig
+++ b/lib/Kconfig
@@@ -64,8 -64,6 +64,8 @@@ config CRC
   
   config LIBCRC32C
         tristate "CRC32c (Castagnoli, et al) Cyclic Redundancy-Check"
+ +      select CRYPTO
+ +      select CRYPTO_CRC32C
         help
           This option is provided for the case where no in-kernel-tree
           modules require CRC32c functions, but a module built outside the
@@@ -159,4 -157,11 +159,11 @@@ config CHECK_SIGNATUR
   config HAVE_LMB
         boolean
   
+ config CPUMASK_OFFSTACK
+       bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS
+       help
+         Use dynamic allocation for cpumask_var_t, instead of putting
+         them on the stack.  This is a bit more expensive, but avoids
+         stack overflow.
+ 
   endmenu
diff --combined mm/slub.c

index 6cb7ad10785227f2b889bcff96d6610ddeabe37a,8e516e29f98920f65e8da441e5d2cc51e4cf4f97..0d861c3154b6eeed56981c33b3505ecca87d5712
--- 1/mm/slub.c
--- 2/mm/slub.c
+++ b/mm/slub.c
@@@ -24,7 -24,6 +24,7 @@@
   #include <linux/kallsyms.h>
   #include <linux/memory.h>
   #include <linux/math64.h>
+ +#include <linux/fault-inject.h>
   
   /*
    * Lock order:
@@@ -154,10 -153,6 +154,10 @@@
   #define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
   #endif
   
+ +#define OO_SHIFT      16
+ +#define OO_MASK               ((1 << OO_SHIFT) - 1)
+ +#define MAX_OBJS_PER_PAGE     65535 /* since page.objects is u16 */
+ +
   /* Internal SLUB flags */
   #define __OBJECT_POISON               0x80000000 /* Poison object */
   #define __SYSFS_ADD_DEFERRED  0x40000000 /* Not yet visible via sysfs */
@@@ -183,7 -178,7 +183,7 @@@ static LIST_HEAD(slab_caches)
    * Tracking user of a slab.
    */
   struct track {
- -      void *addr;             /* Called from address */
+ +      unsigned long addr;     /* Called from address */
         int cpu;                /* Was running on cpu */
         int pid;                /* Pid context */
         unsigned long when;     /* When did the operation occur */
@@@ -295,7 -290,7 +295,7 @@@ static inline struct kmem_cache_order_o
                                                 unsigned long size)
   {
         struct kmem_cache_order_objects x = {
- -              (order << 16) + (PAGE_SIZE << order) / size
+ +              (order << OO_SHIFT) + (PAGE_SIZE << order) / size
         };
   
         return x;
@@@ -303,12 -298,12 +303,12 @@@
   
   static inline int oo_order(struct kmem_cache_order_objects x)
   {
- -      return x.x >> 16;
+ +      return x.x >> OO_SHIFT;
   }
   
   static inline int oo_objects(struct kmem_cache_order_objects x)
   {
- -      return x.x & ((1 << 16) - 1);
+ +      return x.x & OO_MASK;
   }
   
   #ifdef CONFIG_SLUB_DEBUG
@@@ -372,7 -367,7 +372,7 @@@ static struct track *get_track(struct k
   }
   
   static void set_track(struct kmem_cache *s, void *object,
- -                              enum track_item alloc, void *addr)
+ +                      enum track_item alloc, unsigned long addr)
   {
         struct track *p;
   
@@@ -396,8 -391,8 +396,8 @@@ static void init_tracking(struct kmem_c
         if (!(s->flags & SLAB_STORE_USER))
                 return;
   
- -      set_track(s, object, TRACK_FREE, NULL);
- -      set_track(s, object, TRACK_ALLOC, NULL);
+ +      set_track(s, object, TRACK_FREE, 0UL);
+ +      set_track(s, object, TRACK_ALLOC, 0UL);
   }
   
   static void print_track(const char *s, struct track *t)
@@@ -406,7 -401,7 +406,7 @@@
                 return;
   
         printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
- -              s, t->addr, jiffies - t->when, t->cpu, t->pid);
+ +              s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);
   }
   
   static void print_tracking(struct kmem_cache *s, void *object)
@@@ -697,7 -692,7 +697,7 @@@ static int check_object(struct kmem_cac
         if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
                 object_err(s, page, p, "Freepointer corrupt");
                 /*
- -               * No choice but to zap it and thus loose the remainder
+ +               * No choice but to zap it and thus lose the remainder
                  * of the free objects in this slab. May cause
                  * another error because the object count is now wrong.
                  */
@@@ -769,8 -764,8 +769,8 @@@ static int on_freelist(struct kmem_cach
         }
   
         max_objects = (PAGE_SIZE << compound_order(page)) / s->size;
- -      if (max_objects > 65535)
- -              max_objects = 65535;
+ +      if (max_objects > MAX_OBJS_PER_PAGE)
+ +              max_objects = MAX_OBJS_PER_PAGE;
   
         if (page->objects != max_objects) {
                 slab_err(s, page, "Wrong number of objects. Found %d but "
@@@ -871,7 -866,7 +871,7 @@@ static void setup_object_debug(struct k
   }
   
   static int alloc_debug_processing(struct kmem_cache *s, struct page *page,
- -                                              void *object, void *addr)
+ +                                      void *object, unsigned long addr)
   {
         if (!check_slab(s, page))
                 goto bad;
@@@ -911,7 -906,7 +911,7 @@@ bad
   }
   
   static int free_debug_processing(struct kmem_cache *s, struct page *page,
- -                                              void *object, void *addr)
+ +                                      void *object, unsigned long addr)
   {
         if (!check_slab(s, page))
                 goto fail;
@@@ -1034,10 -1029,10 +1034,10 @@@ static inline void setup_object_debug(s
                         struct page *page, void *object) {}
   
   static inline int alloc_debug_processing(struct kmem_cache *s,
- -      struct page *page, void *object, void *addr) { return 0; }
+ +      struct page *page, void *object, unsigned long addr) { return 0; }
   
   static inline int free_debug_processing(struct kmem_cache *s,
- -      struct page *page, void *object, void *addr) { return 0; }
+ +      struct page *page, void *object, unsigned long addr) { return 0; }
   
   static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
                         { return 1; }
@@@ -1504,8 -1499,8 +1504,8 @@@ static inline int node_match(struct kme
    * we need to allocate a new slab. This is the slowest path since it involves
    * a call to the page allocator and the setup of a new slab.
    */
- -static void *__slab_alloc(struct kmem_cache *s,
- -              gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c)
+ +static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+ +                        unsigned long addr, struct kmem_cache_cpu *c)
   {
         void **object;
         struct page *new;
@@@ -1589,18 -1584,13 +1589,18 @@@ debug
    * Otherwise we can simply pick the next object from the lockless free list.
    */
   static __always_inline void *slab_alloc(struct kmem_cache *s,
- -              gfp_t gfpflags, int node, void *addr)
+ +              gfp_t gfpflags, int node, unsigned long addr)
   {
         void **object;
         struct kmem_cache_cpu *c;
         unsigned long flags;
         unsigned int objsize;
   
+ +      might_sleep_if(gfpflags & __GFP_WAIT);
+ +
+ +      if (should_failslab(s->objsize, gfpflags))
+ +              return NULL;
+ +
         local_irq_save(flags);
         c = get_cpu_slab(s, smp_processor_id());
         objsize = c->objsize;
@@@ -1623,14 -1613,14 +1623,14 @@@
   
   void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
   {
- -      return slab_alloc(s, gfpflags, -1, __builtin_return_address(0));
+ +      return slab_alloc(s, gfpflags, -1, _RET_IP_);
   }
   EXPORT_SYMBOL(kmem_cache_alloc);
   
   #ifdef CONFIG_NUMA
   void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
   {
- -      return slab_alloc(s, gfpflags, node, __builtin_return_address(0));
+ +      return slab_alloc(s, gfpflags, node, _RET_IP_);
   }
   EXPORT_SYMBOL(kmem_cache_alloc_node);
   #endif
@@@ -1644,7 -1634,7 +1644,7 @@@
    * handling required then we can return immediately.
    */
   static void __slab_free(struct kmem_cache *s, struct page *page,
- -                              void *x, void *addr, unsigned int offset)
+ +                      void *x, unsigned long addr, unsigned int offset)
   {
         void *prior;
         void **object = (void *)x;
@@@ -1714,7 -1704,7 +1714,7 @@@ debug
    * with all sorts of special processing.
    */
   static __always_inline void slab_free(struct kmem_cache *s,
- -                      struct page *page, void *x, void *addr)
+ +                      struct page *page, void *x, unsigned long addr)
   {
         void **object = (void *)x;
         struct kmem_cache_cpu *c;
@@@ -1741,11 -1731,11 +1741,11 @@@ void kmem_cache_free(struct kmem_cache 
   
         page = virt_to_head_page(x);
   
- -      slab_free(s, page, x, __builtin_return_address(0));
+ +      slab_free(s, page, x, _RET_IP_);
   }
   EXPORT_SYMBOL(kmem_cache_free);
   
- -/* Figure out on which slab object the object resides */
+ +/* Figure out on which slab page the object resides */
   static struct page *get_object_page(const void *x)
   {
         struct page *page = virt_to_head_page(x);
@@@ -1817,8 -1807,8 +1817,8 @@@ static inline int slab_order(int size, 
         int rem;
         int min_order = slub_min_order;
   
- -      if ((PAGE_SIZE << min_order) / size > 65535)
- -              return get_order(size * 65535) - 1;
+ +      if ((PAGE_SIZE << min_order) / size > MAX_OBJS_PER_PAGE)
+ +              return get_order(size * MAX_OBJS_PER_PAGE) - 1;
   
         for (order = max(min_order,
                                 fls(min_objects * size - 1) - PAGE_SHIFT);
@@@ -2083,7 -2073,8 +2083,7 @@@ static inline int alloc_kmem_cache_cpus
    * when allocating for the kmalloc_node_cache. This is used for bootstrapping
    * memory on a fresh node that has no slab structures yet.
    */
- -static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags,
- -                                                         int node)
+ +static void early_kmem_cache_node_alloc(gfp_t gfpflags, int node)
   {
         struct page *page;
         struct kmem_cache_node *n;
@@@ -2121,6 -2112,7 +2121,6 @@@
         local_irq_save(flags);
         add_partial(n, page, 0);
         local_irq_restore(flags);
- -      return n;
   }
   
   static void free_kmem_cache_nodes(struct kmem_cache *s)
@@@ -2152,7 -2144,8 +2152,7 @@@ static int init_kmem_cache_nodes(struc
                         n = &s->local_node;
                 else {
                         if (slab_state == DOWN) {
- -                              n = early_kmem_cache_node_alloc(gfpflags,
- -                                                              node);
+ +                              early_kmem_cache_node_alloc(gfpflags, node);
                                 continue;
                         }
                         n = kmem_cache_alloc_node(kmalloc_caches,
@@@ -2666,7 -2659,7 +2666,7 @@@ void *__kmalloc(size_t size, gfp_t flag
         if (unlikely(ZERO_OR_NULL_PTR(s)))
                 return s;
   
- -      return slab_alloc(s, flags, -1, __builtin_return_address(0));
+ +      return slab_alloc(s, flags, -1, _RET_IP_);
   }
   EXPORT_SYMBOL(__kmalloc);
   
@@@ -2694,7 -2687,7 +2694,7 @@@ void *__kmalloc_node(size_t size, gfp_
         if (unlikely(ZERO_OR_NULL_PTR(s)))
                 return s;
   
- -      return slab_alloc(s, flags, node, __builtin_return_address(0));
+ +      return slab_alloc(s, flags, node, _RET_IP_);
   }
   EXPORT_SYMBOL(__kmalloc_node);
   #endif
@@@ -2751,7 -2744,7 +2751,7 @@@ void kfree(const void *x
                 put_page(page);
                 return;
         }
- -      slab_free(page->slab, page, object, __builtin_return_address(0));
+ +      slab_free(page->slab, page, object, _RET_IP_);
   }
   EXPORT_SYMBOL(kfree);
   
@@@ -3130,12 -3123,8 +3130,12 @@@ struct kmem_cache *kmem_cache_create(co
                 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
                 up_write(&slub_lock);
   
- -              if (sysfs_slab_alias(s, name))
+ +              if (sysfs_slab_alias(s, name)) {
+ +                      down_write(&slub_lock);
+ +                      s->refcount--;
+ +                      up_write(&slub_lock);
                         goto err;
+ +              }
                 return s;
         }
   
@@@ -3145,13 -3134,8 +3145,13 @@@
                                 size, align, flags, ctor)) {
                         list_add(&s->list, &slab_caches);
                         up_write(&slub_lock);
- -                      if (sysfs_slab_add(s))
+ +                      if (sysfs_slab_add(s)) {
+ +                              down_write(&slub_lock);
+ +                              list_del(&s->list);
+ +                              up_write(&slub_lock);
+ +                              kfree(s);
                                 goto err;
+ +                      }
                         return s;
                 }
                 kfree(s);
@@@ -3218,7 -3202,7 +3218,7 @@@ static struct notifier_block __cpuinitd
   
   #endif
   
- -void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller)
+ +void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
   {
         struct kmem_cache *s;
   
@@@ -3234,7 -3218,7 +3234,7 @@@
   }
   
   void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
- -                                      int node, void *caller)
+ +                                      int node, unsigned long caller)
   {
         struct kmem_cache *s;
   
@@@ -3445,7 -3429,7 +3445,7 @@@ static void resiliency_test(void) {}
   
   struct location {
         unsigned long count;
- -      void *addr;
+ +      unsigned long addr;
         long long sum_time;
         long min_time;
         long max_time;
@@@ -3493,7 -3477,7 +3493,7 @@@ static int add_location(struct loc_trac
   {
         long start, end, pos;
         struct location *l;
- -      void *caddr;
+ +      unsigned long caddr;
         unsigned long age = jiffies - track->when;
   
         start = -1;
@@@ -3642,7 -3626,7 +3642,7 @@@ static int list_locations(struct kmem_c
                                 len < PAGE_SIZE - 60) {
                         len += sprintf(buf + len, " cpus=");
                         len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50,
-                                       l->cpus);
+                                       &l->cpus);
                 }
   
                 if (num_online_nodes() > 1 && !nodes_empty(l->nodes) &&
@@@ -4361,7 -4345,7 +4361,7 @@@ static void sysfs_slab_remove(struct km
   
   /*
    * Need to buffer aliases during bootup until sysfs becomes
- - * available lest we loose that information.
+ + * available lest we lose that information.
    */
   struct saved_alias {
         struct kmem_cache *s;
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 2 Jan 2009 19:44:09 +0000 (11:44 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 2 Jan 2009 19:44:09 +0000 (11:44 -0800)
		1	2
arch/arm/kernel/smp.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm/mach-at91/at91rm9200_time.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm/mach-pxa/time.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm/mach-realview/core.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm/mach-realview/localtimer.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm/mach-sa1100/time.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm/mach-versatile/core.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kernel/smp.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kernel/time.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/platforms/pseries/xics.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/sysdev/mpic.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/kernel/smp.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/kernel/time.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/sparc/kernel/irq_64.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/sparc/kernel/of_device_64.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/sparc/kernel/pci_msi.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/sparc/kernel/smp_32.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/sparc/kernel/smp_64.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/sparc/kernel/sparc_ksyms_32.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/sparc/kernel/time_64.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/irq.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/apic.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/intel_cacheinfo.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/mcheck/mce_amd_64.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/genx2apic_uv_x.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/hpet.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/io_apic.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/irq_64.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/irqinit_32.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/irqinit_64.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/reboot.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/smp.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/smpboot.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/tlb_32.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/tlb_64.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/traps.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/lguest/boot.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/xen/mmu.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/xen/events.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/interrupt.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/irq.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/sched.h	patch \|	diff1 \|	diff2 \|	blob \| history
init/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/irq/chip.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/irq/manage.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched_fair.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched_rt.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched_stats.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/time/tick-sched.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/trace/trace.c	patch \|	diff1 \|	diff2 \|	blob \| history
lib/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
mm/slub.c	patch \|	diff1 \|	diff2 \|	blob \| history