Merge branch 'x86/uv' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux...

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 16 Apr 2009 23:43:20 +0000 (16:43 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 16 Apr 2009 23:43:20 +0000 (16:43 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 16 Apr 2009 23:43:20 +0000 (16:43 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 16 Apr 2009 23:43:20 +0000 (16:43 -0700)
diff --combined arch/x86/Kconfig

index bc25b9f5e4cd27583ba2a079850b55858546ef43,2817ab5a1204b8b85df10f0b03c43aa21db3dd7c..c9086e6307a5e18096ed19d78c85da7931211bdd
--- 1/arch/x86/Kconfig
--- 2/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@@ -34,8 -34,6 +34,8 @@@ config X8
         select HAVE_FUNCTION_TRACER
         select HAVE_FUNCTION_GRAPH_TRACER
         select HAVE_FUNCTION_TRACE_MCOUNT_TEST
+ +      select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE
+ +      select HAVE_FTRACE_SYSCALLS
         select HAVE_KVM
         select HAVE_ARCH_KGDB
         select HAVE_ARCH_TRACEHOOK
@@@ -252,13 -250,16 +252,13 @@@ config SM
   
   config X86_X2APIC
         bool "Support x2apic"
- -      depends on X86_LOCAL_APIC && X86_64
+ +      depends on X86_LOCAL_APIC && X86_64 && INTR_REMAP
         ---help---
           This enables x2apic support on CPUs that have this feature.
   
           This allows 32-bit apic IDs (so it can support very large systems),
           and accesses the local apic via MSRs not via mmio.
   
- -        ( On certain CPU models you may need to enable INTR_REMAP too,
- -          to get functional x2apic mode. )
- -
           If you don't know what to do here, say N.
   
   config SPARSE_IRQ
@@@ -353,6 -354,7 +353,7 @@@ config X86_U
         bool "SGI Ultraviolet"
         depends on X86_64
         depends on X86_EXTENDED_PLATFORM
+       depends on NUMA
         select X86_X2APIC
         ---help---
           This option is needed in order to support SGI Ultraviolet systems.
@@@ -1143,7 -1145,7 +1144,7 @@@ config NODES_SHIF
         depends on NEED_MULTIPLE_NODES
         ---help---
           Specify the maximum number of NUMA Nodes available on the target
- -        system.  Increases memory reserved to accomodate various tables.
+ +        system.  Increases memory reserved to accommodate various tables.
   
   config HAVE_ARCH_BOOTMEM
         def_bool y
@@@ -1321,7 -1323,7 +1322,7 @@@ config MTRR_SANITIZE
           add writeback entries.
   
           Can be disabled with disable_mtrr_cleanup on the kernel command line.
- -        The largest mtrr entry size for a continous block can be set with
+ +        The largest mtrr entry size for a continuous block can be set with
           mtrr_chunk_size.
   
           If unsure, say Y.
@@@ -1836,8 -1838,8 +1837,8 @@@ config PCI_MMCONFI
   
   config DMAR
         bool "Support for DMA Remapping Devices (EXPERIMENTAL)"
- -      depends on X86_64 && PCI_MSI && ACPI && EXPERIMENTAL
- -      ---help---
+ +      depends on PCI_MSI && ACPI && EXPERIMENTAL
+ +      help
           DMA remapping (DMAR) devices support enables independent address
           translations for Direct Memory Access (DMA) from devices.
           These DMA remapping devices are reported via ACPI tables
@@@ -1878,6 -1880,7 +1879,6 @@@ config DMAR_FLOPPY_W
   config INTR_REMAP
         bool "Support for Interrupt Remapping (EXPERIMENTAL)"
         depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL
- -      select X86_X2APIC
         ---help---
           Supports Interrupt remapping for IO-APIC and MSI devices.
           To use x2apic mode in the CPU's which support x2APIC enhancements or
diff --combined arch/x86/kernel/tlb_uv.c

index deb5ebb32c3b71a8a698df7b68ae5447e12f0cf8,78422336ddea55fd3719d9f9e11d6d174cd48278..ed0c33761e6d1d75bf0b435c490384e2cd8b31dd
--- 1/arch/x86/kernel/tlb_uv.c
--- 2/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@@ -25,12 -25,42 +25,42 @@@ static int                 uv_bau_retry_limit __read_
   
   /* position of pnode (which is nasid>>1): */
   static int                    uv_nshift __read_mostly;
+ /* base pnode in this partition */
+ static int                    uv_partition_base_pnode __read_mostly;
   
   static unsigned long          uv_mmask __read_mostly;
   
   static DEFINE_PER_CPU(struct ptc_stats, ptcstats);
   static DEFINE_PER_CPU(struct bau_control, bau_control);
   
+ /*
+  * Determine the first node on a blade.
+  */
+ static int __init blade_to_first_node(int blade)
+ {
+       int node, b;
+ 
+       for_each_online_node(node) {
+               b = uv_node_to_blade_id(node);
+               if (blade == b)
+                       return node;
+       }
+       return -1; /* shouldn't happen */
+ }
+ 
+ /*
+  * Determine the apicid of the first cpu on a blade.
+  */
+ static int __init blade_to_first_apicid(int blade)
+ {
+       int cpu;
+ 
+       for_each_present_cpu(cpu)
+               if (blade == uv_cpu_to_blade_id(cpu))
+                       return per_cpu(x86_cpu_to_apicid, cpu);
+       return -1;
+ }
+ 
   /*
    * Free a software acknowledge hardware resource by clearing its Pending
    * bit. This will return a reply to the sender.
@@@ -67,7 -97,7 +97,7 @@@ static void uv_bau_process_message(stru
         msp = __get_cpu_var(bau_control).msg_statuses + msg_slot;
         cpu = uv_blade_processor_id();
         msg->number_of_cpus =
-           uv_blade_nr_online_cpus(uv_node_to_blade_id(numa_node_id()));
+               uv_blade_nr_online_cpus(uv_node_to_blade_id(numa_node_id()));
         this_cpu_mask = 1UL << cpu;
         if (msp->seen_by.bits & this_cpu_mask)
                 return;
@@@ -215,14 -245,14 +245,14 @@@ static int uv_wait_completion(struct ba
    * Returns @flush_mask if some remote flushing remains to be done. The
    * mask will have some bits still set.
    */
- const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade,
+ const struct cpumask *uv_flush_send_and_wait(int cpu, int this_pnode,
                                              struct bau_desc *bau_desc,
                                              struct cpumask *flush_mask)
   {
         int completion_status = 0;
         int right_shift;
         int tries = 0;
-       int blade;
+       int pnode;
         int bit;
         unsigned long mmr_offset;
         unsigned long index;
@@@ -265,8 -295,8 +295,8 @@@
          * use the IPI method of shootdown on them.
          */
         for_each_cpu(bit, flush_mask) {
-               blade = uv_cpu_to_blade_id(bit);
-               if (blade == this_blade)
+               pnode = uv_cpu_to_pnode(bit);
+               if (pnode == this_pnode)
                         continue;
                 cpumask_clear_cpu(bit, flush_mask);
         }
@@@ -275,8 -305,6 +305,8 @@@
         return NULL;
   }
   
+ +static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask);
+ +
   /**
    * uv_flush_tlb_others - globally purge translation cache of a virtual
    * address or all TLB's
@@@ -306,19 -334,20 +336,19 @@@ const struct cpumask *uv_flush_tlb_othe
                                           struct mm_struct *mm,
                                           unsigned long va, unsigned int cpu)
   {
- -      static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask);
- -      struct cpumask *flush_mask = &__get_cpu_var(flush_tlb_mask);
+ +      struct cpumask *flush_mask = __get_cpu_var(uv_flush_tlb_mask);
         int i;
         int bit;
-       int blade;
+       int pnode;
         int uv_cpu;
-       int this_blade;
+       int this_pnode;
         int locals = 0;
         struct bau_desc *bau_desc;
   
         cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
   
         uv_cpu = uv_blade_processor_id();
-       this_blade = uv_numa_blade_id();
+       this_pnode = uv_hub_info->pnode;
         bau_desc = __get_cpu_var(bau_control).descriptor_base;
         bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu;
   
@@@ -326,13 -355,14 +356,14 @@@
   
         i = 0;
         for_each_cpu(bit, flush_mask) {
-               blade = uv_cpu_to_blade_id(bit);
-               BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1));
-               if (blade == this_blade) {
+               pnode = uv_cpu_to_pnode(bit);
+               BUG_ON(pnode > (UV_DISTRIBUTION_SIZE - 1));
+               if (pnode == this_pnode) {
                         locals++;
                         continue;
                 }
-               bau_node_set(blade, &bau_desc->distribution);
+               bau_node_set(pnode - uv_partition_base_pnode,
+                               &bau_desc->distribution);
                 i++;
         }
         if (i == 0) {
@@@ -350,7 -380,7 +381,7 @@@
         bau_desc->payload.address = va;
         bau_desc->payload.sending_cpu = cpu;
   
-       return uv_flush_send_and_wait(uv_cpu, this_blade, bau_desc, flush_mask);
+       return uv_flush_send_and_wait(uv_cpu, this_pnode, bau_desc, flush_mask);
   }
   
   /*
@@@ -418,24 -448,58 +449,58 @@@ void uv_bau_message_interrupt(struct pt
         set_irq_regs(old_regs);
   }
   
+ /*
+  * uv_enable_timeouts
+  *
+  * Each target blade (i.e. blades that have cpu's) needs to have
+  * shootdown message timeouts enabled.  The timeout does not cause
+  * an interrupt, but causes an error message to be returned to
+  * the sender.
+  */
   static void uv_enable_timeouts(void)
   {
-       int i;
         int blade;
-       int last_blade;
+       int nblades;
         int pnode;
-       int cur_cpu = 0;
-       unsigned long apicid;
+       unsigned long mmr_image;
   
-       last_blade = -1;
-       for_each_online_node(i) {
-               blade = uv_node_to_blade_id(i);
-               if (blade == last_blade)
+       nblades = uv_num_possible_blades();
+ 
+       for (blade = 0; blade < nblades; blade++) {
+               if (!uv_blade_nr_possible_cpus(blade))
                         continue;
-               last_blade = blade;
-               apicid = per_cpu(x86_cpu_to_apicid, cur_cpu);
+ 
                 pnode = uv_blade_to_pnode(blade);
-               cur_cpu += uv_blade_nr_possible_cpus(i);
+               mmr_image =
+                   uv_read_global_mmr64(pnode, UVH_LB_BAU_MISC_CONTROL);
+               /*
+                * Set the timeout period and then lock it in, in three
+                * steps; captures and locks in the period.
+                *
+                * To program the period, the SOFT_ACK_MODE must be off.
+                */
+               mmr_image &= ~((unsigned long)1 <<
+                              UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT);
+               uv_write_global_mmr64
+                   (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
+               /*
+                * Set the 4-bit period.
+                */
+               mmr_image &= ~((unsigned long)0xf <<
+                       UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT);
+               mmr_image |= (UV_INTD_SOFT_ACK_TIMEOUT_PERIOD <<
+                            UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT);
+               uv_write_global_mmr64
+                   (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
+               /*
+                * Subsequent reversals of the timebase bit (3) cause an
+                * immediate timeout of one or all INTD resources as
+                * indicated in bits 2:0 (7 causes all of them to timeout).
+                */
+               mmr_image |= ((unsigned long)1 <<
+                             UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT);
+               uv_write_global_mmr64
+                   (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
         }
   }
   
@@@ -482,8 -546,7 +547,7 @@@ static int uv_ptc_seq_show(struct seq_f
                            stat->requestee, stat->onetlb, stat->alltlb,
                            stat->s_retry, stat->d_retry, stat->ptc_i);
                 seq_printf(file, "%lx %ld %ld %ld %ld %ld %ld\n",
-                          uv_read_global_mmr64(uv_blade_to_pnode
-                                       (uv_cpu_to_blade_id(cpu)),
+                          uv_read_global_mmr64(uv_cpu_to_pnode(cpu),
                                         UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE),
                            stat->sflush, stat->dflush,
                            stat->retriesok, stat->nomsg,
@@@ -617,16 -680,18 +681,18 @@@ static struct bau_control * __init uv_t
    * finish the initialization of the per-blade control structures
    */
   static void __init
- uv_table_bases_finish(int blade, int node, int cur_cpu,
+ uv_table_bases_finish(int blade,
                       struct bau_control *bau_tablesp,
                       struct bau_desc *adp)
   {
         struct bau_control *bcp;
-       int i;
+       int cpu;
   
-       for (i = cur_cpu; i < cur_cpu + uv_blade_nr_possible_cpus(blade); i++) {
-               bcp = (struct bau_control *)&per_cpu(bau_control, i);
+       for_each_present_cpu(cpu) {
+               if (blade != uv_cpu_to_blade_id(cpu))
+                       continue;
   
+               bcp = (struct bau_control *)&per_cpu(bau_control, cpu);
                 bcp->bau_msg_head       = bau_tablesp->va_queue_first;
                 bcp->va_queue_first     = bau_tablesp->va_queue_first;
                 bcp->va_queue_last      = bau_tablesp->va_queue_last;
@@@ -649,11 -714,10 +715,10 @@@ uv_activation_descriptor_init(int node
         struct bau_desc *adp;
         struct bau_desc *ad2;
   
-       adp = (struct bau_desc *)
-           kmalloc_node(16384, GFP_KERNEL, node);
+       adp = (struct bau_desc *)kmalloc_node(16384, GFP_KERNEL, node);
         BUG_ON(!adp);
   
-       pa = __pa((unsigned long)adp);
+       pa = uv_gpa(adp); /* need the real nasid*/
         n = pa >> uv_nshift;
         m = pa & uv_mmask;
   
@@@ -667,8 -731,12 +732,12 @@@
         for (i = 0, ad2 = adp; i < UV_ACTIVATION_DESCRIPTOR_SIZE; i++, ad2++) {
                 memset(ad2, 0, sizeof(struct bau_desc));
                 ad2->header.sw_ack_flag = 1;
-               ad2->header.base_dest_nodeid =
-                   uv_blade_to_pnode(uv_cpu_to_blade_id(0));
+               /*
+                * base_dest_nodeid is the first node in the partition, so
+                * the bit map will indicate partition-relative node numbers.
+                * note that base_dest_nodeid is actually a nasid.
+                */
+               ad2->header.base_dest_nodeid = uv_partition_base_pnode << 1;
                 ad2->header.command = UV_NET_ENDPOINT_INTD;
                 ad2->header.int_both = 1;
                 /*
@@@ -686,6 -754,8 +755,8 @@@ static struct bau_payload_queue_entry 
   uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp)
   {
         struct bau_payload_queue_entry *pqp;
+       unsigned long pa;
+       int pn;
         char *cp;
   
         pqp = (struct bau_payload_queue_entry *) kmalloc_node(
@@@ -696,10 -766,14 +767,14 @@@
         cp = (char *)pqp + 31;
         pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5);
         bau_tablesp->va_queue_first = pqp;
+       /*
+        * need the pnode of where the memory was really allocated
+        */
+       pa = uv_gpa(pqp);
+       pn = pa >> uv_nshift;
         uv_write_global_mmr64(pnode,
                               UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST,
-                             ((unsigned long)pnode <<
-                              UV_PAYLOADQ_PNODE_SHIFT) |
+                             ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) |
                               uv_physnodeaddr(pqp));
         uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL,
                               uv_physnodeaddr(pqp));
@@@ -715,8 -789,9 +790,9 @@@
   /*
    * Initialization of each UV blade's structures
    */
- static int __init uv_init_blade(int blade, int node, int cur_cpu)
+ static int __init uv_init_blade(int blade)
   {
+       int node;
         int pnode;
         unsigned long pa;
         unsigned long apicid;
@@@ -724,16 -799,17 +800,17 @@@
         struct bau_payload_queue_entry *pqp;
         struct bau_control *bau_tablesp;
   
+       node = blade_to_first_node(blade);
         bau_tablesp = uv_table_bases_init(blade, node);
         pnode = uv_blade_to_pnode(blade);
         adp = uv_activation_descriptor_init(node, pnode);
         pqp = uv_payload_queue_init(node, pnode, bau_tablesp);
-       uv_table_bases_finish(blade, node, cur_cpu, bau_tablesp, adp);
+       uv_table_bases_finish(blade, bau_tablesp, adp);
         /*
          * the below initialization can't be in firmware because the
          * messaging IRQ will be determined by the OS
          */
-       apicid = per_cpu(x86_cpu_to_apicid, cur_cpu);
+       apicid = blade_to_first_apicid(blade);
         pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG);
         if ((pa & 0xff) != UV_BAU_MESSAGE) {
                 uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
@@@ -748,44 -824,30 +825,34 @@@
   static int __init uv_bau_init(void)
   {
         int blade;
-       int node;
         int nblades;
-       int last_blade;
         int cur_cpu;
   
         if (!is_uv_system())
                 return 0;
   
+ +      for_each_possible_cpu(cur_cpu)
+ +              alloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu),
+ +                                     GFP_KERNEL, cpu_to_node(cur_cpu));
+ +
         uv_bau_retry_limit = 1;
         uv_nshift = uv_hub_info->n_val;
         uv_mmask = (1UL << uv_hub_info->n_val) - 1;
-       nblades = 0;
-       last_blade = -1;
-       cur_cpu = 0;
-       for_each_online_node(node) {
-               blade = uv_node_to_blade_id(node);
-               if (blade == last_blade)
-                       continue;
-               last_blade = blade;
-               nblades++;
-       }
+       nblades = uv_num_possible_blades();
+ 
         uv_bau_table_bases = (struct bau_control **)
             kmalloc(nblades * sizeof(struct bau_control *), GFP_KERNEL);
         BUG_ON(!uv_bau_table_bases);
   
-       last_blade = -1;
-       for_each_online_node(node) {
-               blade = uv_node_to_blade_id(node);
-               if (blade == last_blade)
-                       continue;
-               last_blade = blade;
-               uv_init_blade(blade, node, cur_cpu);
-               cur_cpu += uv_blade_nr_possible_cpus(blade);
-       }
+       uv_partition_base_pnode = 0x7fffffff;
+       for (blade = 0; blade < nblades; blade++)
+               if (uv_blade_nr_possible_cpus(blade) &&
+                       (uv_blade_to_pnode(blade) < uv_partition_base_pnode))
+                       uv_partition_base_pnode = uv_blade_to_pnode(blade);
+       for (blade = 0; blade < nblades; blade++)
+               if (uv_blade_nr_possible_cpus(blade))
+                       uv_init_blade(blade);
+ 
         alloc_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1);
         uv_enable_timeouts();
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 16 Apr 2009 23:43:20 +0000 (16:43 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 16 Apr 2009 23:43:20 +0000 (16:43 -0700)
		1	2
arch/x86/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/tlb_uv.c	patch \|	diff1 \|	diff2 \|	blob \| history