]> Pileus Git - ~andy/linux/commitdiff
percpu: align percpu readmostly subsection to cacheline
authorTejun Heo <tj@kernel.org>
Tue, 25 Jan 2011 13:26:50 +0000 (14:26 +0100)
committerTejun Heo <tj@kernel.org>
Tue, 25 Jan 2011 13:26:50 +0000 (14:26 +0100)
Currently percpu readmostly subsection may share cachelines with other
percpu subsections which may result in unnecessary cacheline bounce
and performance degradation.

This patch adds @cacheline parameter to PERCPU() and PERCPU_VADDR()
linker macros, makes each arch linker scripts specify its cacheline
size and use it to align percpu subsections.

This is based on Shaohua's x86 only patch.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Shaohua Li <shaohua.li@intel.com>
19 files changed:
arch/alpha/kernel/vmlinux.lds.S
arch/arm/kernel/vmlinux.lds.S
arch/blackfin/kernel/vmlinux.lds.S
arch/cris/kernel/vmlinux.lds.S
arch/frv/kernel/vmlinux.lds.S
arch/ia64/kernel/vmlinux.lds.S
arch/m32r/kernel/vmlinux.lds.S
arch/mips/kernel/vmlinux.lds.S
arch/mn10300/kernel/vmlinux.lds.S
arch/parisc/kernel/vmlinux.lds.S
arch/powerpc/kernel/vmlinux.lds.S
arch/s390/kernel/vmlinux.lds.S
arch/sh/kernel/vmlinux.lds.S
arch/sparc/kernel/vmlinux.lds.S
arch/tile/kernel/vmlinux.lds.S
arch/um/include/asm/common.lds.S
arch/x86/kernel/vmlinux.lds.S
arch/xtensa/kernel/vmlinux.lds.S
include/asm-generic/vmlinux.lds.h

index 003ef4c02585a3eaba223f765a5623056c1ab9f1..173518f8c8bb7303e76bd9ae2bb0f9f087f85f5f 100644 (file)
@@ -38,7 +38,7 @@ SECTIONS
        __init_begin = ALIGN(PAGE_SIZE);
        INIT_TEXT_SECTION(PAGE_SIZE)
        INIT_DATA_SECTION(16)
-       PERCPU(PAGE_SIZE)
+       PERCPU(64, PAGE_SIZE)
        /* Align to THREAD_SIZE rather than PAGE_SIZE here so any padding page
           needed for the THREAD_SIZE aligned init_task gets freed after init */
        . = ALIGN(THREAD_SIZE);
index 86b66f3f203187c2b8f0bed5ec7ff096ca656967..cf78a03bf810286826b686c3b141e5d9de6ef0f6 100644 (file)
@@ -70,7 +70,7 @@ SECTIONS
 #endif
        }
 
-       PERCPU(PAGE_SIZE)
+       PERCPU(32, PAGE_SIZE)
 
 #ifndef CONFIG_XIP_KERNEL
        . = ALIGN(PAGE_SIZE);
index 4122678529c0ace39455107651e100f919d5b7cf..c40d07f708e8daabebcecd09fffc960f646dfaab 100644 (file)
@@ -136,7 +136,7 @@ SECTIONS
 
        . = ALIGN(16);
        INIT_DATA_SECTION(16)
-       PERCPU(4)
+       PERCPU(32, 4)
 
        .exit.data :
        {
index 442218980db02e716b3be48e1071661598ab7c10..c62e1346f47cad58b57f3a1d50d5a044f7d2a17d 100644 (file)
@@ -107,7 +107,7 @@ SECTIONS
 #endif
        __vmlinux_end = .;              /* Last address of the physical file. */
 #ifdef CONFIG_ETRAX_ARCH_V32
-       PERCPU(PAGE_SIZE)
+       PERCPU(32, PAGE_SIZE)
 
        .init.ramfs : {
                INIT_RAM_FS
index 8b973f3cc90e432ba2ac4a72dc602f5e4e5e3db3..0daae8af5787bfd44fe467689b2fdbd2e7b73289 100644 (file)
@@ -37,7 +37,7 @@ SECTIONS
   _einittext = .;
 
   INIT_DATA_SECTION(8)
-  PERCPU(4096)
+  PERCPU(L1_CACHE_BYTES, 4096)
 
   . = ALIGN(PAGE_SIZE);
   __init_end = .;
index 5a4d044dcb1c885ef9b2c61d379df78d25a88baa..787de4a77d82a9ba80891ac215b2c099ac644524 100644 (file)
@@ -198,7 +198,7 @@ SECTIONS {
 
        /* Per-cpu data: */
        . = ALIGN(PERCPU_PAGE_SIZE);
-       PERCPU_VADDR(PERCPU_ADDR, :percpu)
+       PERCPU_VADDR(SMP_CACHE_BYTES, PERCPU_ADDR, :percpu)
        __phys_per_cpu_start = __per_cpu_load;
        /*
         * ensure percpu data fits
index 7da94eaa082b82b6891c7c785a6cc22b8186363e..c194d64cdbb9173912ba2962c4a185561a1e2ad4 100644 (file)
@@ -53,7 +53,7 @@ SECTIONS
   __init_begin = .;
   INIT_TEXT_SECTION(PAGE_SIZE)
   INIT_DATA_SECTION(16)
-  PERCPU(PAGE_SIZE)
+  PERCPU(32, PAGE_SIZE)
   . = ALIGN(PAGE_SIZE);
   __init_end = .;
   /* freed after init ends here */
index 570607b376b57d16696089773343f8a9f9e5f6e9..832afbb87588bedcf2b8265495fd44b6874ccabb 100644 (file)
@@ -115,7 +115,7 @@ SECTIONS
                EXIT_DATA
        }
 
-       PERCPU(PAGE_SIZE)
+       PERCPU(1 << CONFIG_MIPS_L1_CACHE_SHIFT, PAGE_SIZE)
        . = ALIGN(PAGE_SIZE);
        __init_end = .;
        /* freed after init ends here */
index febbeee7f2f54ba99bf429b72d9ea74aab80be62..968bcd2cb0226c325cc37837518e25dafe4ed6b9 100644 (file)
@@ -70,7 +70,7 @@ SECTIONS
        .exit.text : { EXIT_TEXT; }
        .exit.data : { EXIT_DATA; }
 
-  PERCPU(PAGE_SIZE)
+  PERCPU(32, PAGE_SIZE)
   . = ALIGN(PAGE_SIZE);
   __init_end = .;
   /* freed after init ends here */
index d64a6bbec2aa0585b380e485c600db818219b51e..8f1e4efd143e0f32e40ec48a6afb09247b75f0bc 100644 (file)
@@ -145,7 +145,7 @@ SECTIONS
                EXIT_DATA
        }
 
-       PERCPU(PAGE_SIZE)
+       PERCPU(L1_CACHE_BYTES, PAGE_SIZE)
        . = ALIGN(PAGE_SIZE);
        __init_end = .;
        /* freed after init ends here */
index 8a0deefac08d216c2ebd127add50ce8117f74f79..b9150f07d2664a9a763ebeda3564b1c08f18fb17 100644 (file)
@@ -160,7 +160,7 @@ SECTIONS
                INIT_RAM_FS
        }
 
-       PERCPU(PAGE_SIZE)
+       PERCPU(L1_CACHE_BYTES, PAGE_SIZE)
 
        . = ALIGN(8);
        .machine.desc : AT(ADDR(.machine.desc) - LOAD_OFFSET) {
index a68ac10213b26475483e5568f78297cdf431a7ad..1bc18cdb525b3d838d9b47f7b98fc3474101355f 100644 (file)
@@ -77,7 +77,7 @@ SECTIONS
        . = ALIGN(PAGE_SIZE);
        INIT_DATA_SECTION(0x100)
 
-       PERCPU(PAGE_SIZE)
+       PERCPU(0x100, PAGE_SIZE)
        . = ALIGN(PAGE_SIZE);
        __init_end = .;         /* freed after init ends here */
 
index 7f8a709c3adaa3c9f9f5b31ca3f2c1c7a9650895..af4d46187a79f18bb8ae350294ccb5923f5410fb 100644 (file)
@@ -66,7 +66,7 @@ SECTIONS
                __machvec_end = .;
        }
 
-       PERCPU(PAGE_SIZE)
+       PERCPU(L1_CACHE_BYTES, PAGE_SIZE)
 
        /*
         * .exit.text is discarded at runtime, not link time, to deal with
index 0c1e6783657f26172291186637ea5d20a6966677..92b557afe535a59e95d8ab185d5d51c254d4d081 100644 (file)
@@ -108,7 +108,7 @@ SECTIONS
                __sun4v_2insn_patch_end = .;
        }
 
-       PERCPU(PAGE_SIZE)
+       PERCPU(SMP_CACHE_BYTES, PAGE_SIZE)
 
        . = ALIGN(PAGE_SIZE);
        __init_end = .;
index 25fdc0c1839a4d6245e34336ffa527e1c09b5e4a..c6ce378e067877ef59cd4eb662fed82f3bbcda93 100644 (file)
@@ -63,7 +63,7 @@ SECTIONS
     *(.init.page)
   } :data =0
   INIT_DATA_SECTION(16)
-  PERCPU(PAGE_SIZE)
+  PERCPU(L2_CACHE_BYTES, PAGE_SIZE)
   . = ALIGN(PAGE_SIZE);
   VMLINUX_SYMBOL(_einitdata) = .;
 
index ac55b9efa1ce58c9260f194890d8308c3b4195fa..34bede8aad4a9ca9c23df9261be3a7c3d8ac20ec 100644 (file)
@@ -42,7 +42,7 @@
        INIT_SETUP(0)
   }
 
-  PERCPU(32)
+  PERCPU(32, 32)
        
   .initcall.init : {
        INIT_CALLS
index bf4700755184e32d4b4e549bd19f4014caa46468..cef446f8ac78a0c57e6f489c7a7b7b3e68fe0047 100644 (file)
@@ -230,7 +230,7 @@ SECTIONS
         * output PHDR, so the next output section - .init.text - should
         * start another segment - init.
         */
-       PERCPU_VADDR(0, :percpu)
+       PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu)
 #endif
 
        INIT_TEXT_SECTION(PAGE_SIZE)
@@ -305,7 +305,7 @@ SECTIONS
        }
 
 #if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
-       PERCPU(THREAD_SIZE)
+       PERCPU(INTERNODE_CACHE_BYTES, THREAD_SIZE)
 #endif
 
        . = ALIGN(PAGE_SIZE);
index 9b526154c9ba9840674dc778a032a4846bc55f18..a2820065927eaa642224af0d748491506fdd473d 100644 (file)
@@ -155,7 +155,7 @@ SECTIONS
     INIT_RAM_FS
   }
 
-  PERCPU(PAGE_SIZE)
+  PERCPU(XCHAL_ICACHE_LINESIZE, PAGE_SIZE)
 
   /* We need this dummy segment here */
 
index 6ebb81030d2d109ce9f99069b130c29a39ccfa11..439df587c12cad476d5e7c14fe0d9c38e4b16f4a 100644 (file)
@@ -15,7 +15,7 @@
  *     HEAD_TEXT_SECTION
  *     INIT_TEXT_SECTION(PAGE_SIZE)
  *     INIT_DATA_SECTION(...)
- *     PERCPU(PAGE_SIZE)
+ *     PERCPU(CACHELINE_SIZE, PAGE_SIZE)
  *     __init_end = .;
  *
  *     _stext = .;
 
 /**
  * PERCPU_VADDR - define output section for percpu area
+ * @cacheline: cacheline size
  * @vaddr: explicit base address (optional)
  * @phdr: destination PHDR (optional)
  *
- * Macro which expands to output section for percpu area.  If @vaddr
- * is not blank, it specifies explicit base address and all percpu
- * symbols will be offset from the given address.  If blank, @vaddr
- * always equals @laddr + LOAD_OFFSET.
+ * Macro which expands to output section for percpu area.
+ *
+ * @cacheline is used to align subsections to avoid false cacheline
+ * sharing between subsections for different purposes.
+ *
+ * If @vaddr is not blank, it specifies explicit base address and all
+ * percpu symbols will be offset from the given address.  If blank,
+ * @vaddr always equals @laddr + LOAD_OFFSET.
  *
  * @phdr defines the output PHDR to use if not blank.  Be warned that
  * output PHDR is sticky.  If @phdr is specified, the next output
  * If there is no need to put the percpu section at a predetermined
  * address, use PERCPU().
  */
-#define PERCPU_VADDR(vaddr, phdr)                                      \
+#define PERCPU_VADDR(cacheline, vaddr, phdr)                           \
        VMLINUX_SYMBOL(__per_cpu_load) = .;                             \
        .data..percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load)         \
                                - LOAD_OFFSET) {                        \
                *(.data..percpu..first)                                 \
                . = ALIGN(PAGE_SIZE);                                   \
                *(.data..percpu..page_aligned)                          \
+               . = ALIGN(cacheline);                                   \
                *(.data..percpu..readmostly)                            \
+               . = ALIGN(cacheline);                                   \
                *(.data..percpu)                                        \
                *(.data..percpu..shared_aligned)                        \
                VMLINUX_SYMBOL(__per_cpu_end) = .;                      \
 
 /**
  * PERCPU - define output section for percpu area, simple version
+ * @cacheline: cacheline size
  * @align: required alignment
  *
- * Align to @align and outputs output section for percpu area.  This
- * macro doesn't maniuplate @vaddr or @phdr and __per_cpu_load and
+ * Align to @align and outputs output section for percpu area.  This macro
+ * doesn't manipulate @vaddr or @phdr and __per_cpu_load and
  * __per_cpu_start will be identical.
  *
- * This macro is equivalent to ALIGN(align); PERCPU_VADDR( , ) except
- * that __per_cpu_load is defined as a relative symbol against
- * .data..percpu which is required for relocatable x86_32
- * configuration.
+ * This macro is equivalent to ALIGN(@align); PERCPU_VADDR(@cacheline,,)
+ * except that __per_cpu_load is defined as a relative symbol against
+ * .data..percpu which is required for relocatable x86_32 configuration.
  */
-#define PERCPU(align)                                                  \
+#define PERCPU(cacheline, align)                                       \
        . = ALIGN(align);                                               \
        .data..percpu   : AT(ADDR(.data..percpu) - LOAD_OFFSET) {       \
                VMLINUX_SYMBOL(__per_cpu_load) = .;                     \
                *(.data..percpu..first)                                 \
                . = ALIGN(PAGE_SIZE);                                   \
                *(.data..percpu..page_aligned)                          \
+               . = ALIGN(cacheline);                                   \
                *(.data..percpu..readmostly)                            \
+               . = ALIGN(cacheline);                                   \
                *(.data..percpu)                                        \
                *(.data..percpu..shared_aligned)                        \
                VMLINUX_SYMBOL(__per_cpu_end) = .;                      \