2 * linux/arch/x86-64/kernel/setup.c
4 * Copyright (C) 1995 Linus Torvalds
6 * Nov 2001 Dave Jones <davej@suse.de>
7 * Forked from i386 setup code.
13 * This file handles the architecture-dependent parts of initialization
16 #include <linux/errno.h>
17 #include <linux/sched.h>
18 #include <linux/kernel.h>
20 #include <linux/stddef.h>
21 #include <linux/unistd.h>
22 #include <linux/ptrace.h>
23 #include <linux/slab.h>
24 #include <linux/user.h>
25 #include <linux/a.out.h>
26 #include <linux/tty.h>
27 #include <linux/ioport.h>
28 #include <linux/delay.h>
29 #include <linux/config.h>
30 #include <linux/init.h>
31 #include <linux/initrd.h>
32 #include <linux/highmem.h>
33 #include <linux/bootmem.h>
34 #include <linux/module.h>
35 #include <asm/processor.h>
36 #include <linux/console.h>
37 #include <linux/seq_file.h>
38 #include <linux/crash_dump.h>
39 #include <linux/root_dev.h>
40 #include <linux/pci.h>
41 #include <linux/acpi.h>
42 #include <linux/kallsyms.h>
43 #include <linux/edd.h>
44 #include <linux/mmzone.h>
45 #include <linux/kexec.h>
46 #include <linux/cpufreq.h>
47 #include <linux/dmi.h>
48 #include <linux/dma-mapping.h>
51 #include <asm/uaccess.h>
52 #include <asm/system.h>
57 #include <video/edid.h>
60 #include <asm/mpspec.h>
61 #include <asm/mmu_context.h>
62 #include <asm/bootsetup.h>
63 #include <asm/proto.h>
64 #include <asm/setup.h>
65 #include <asm/mach_apic.h>
67 #include <asm/swiotlb.h>
68 #include <asm/sections.h>
69 #include <asm/gart-mapping.h>
75 struct cpuinfo_x86 boot_cpu_data __read_mostly;
77 unsigned long mmu_cr4_features;
80 EXPORT_SYMBOL(acpi_disabled);
82 extern int __initdata acpi_ht;
83 extern acpi_interrupt_flags acpi_sci_flags;
84 int __initdata acpi_force = 0;
87 int acpi_numa __initdata;
89 /* Boot loader ID as an integer, for the benefit of proc_dointvec */
92 unsigned long saved_video_mode;
97 struct drive_info_struct { char dummy[32]; } drive_info;
98 struct screen_info screen_info;
99 struct sys_desc_table_struct {
100 unsigned short length;
101 unsigned char table[0];
104 struct edid_info edid_info;
107 extern int root_mountflags;
109 char command_line[COMMAND_LINE_SIZE];
111 struct resource standard_io_resources[] = {
112 { .name = "dma1", .start = 0x00, .end = 0x1f,
113 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
114 { .name = "pic1", .start = 0x20, .end = 0x21,
115 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
116 { .name = "timer0", .start = 0x40, .end = 0x43,
117 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
118 { .name = "timer1", .start = 0x50, .end = 0x53,
119 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
120 { .name = "keyboard", .start = 0x60, .end = 0x6f,
121 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
122 { .name = "dma page reg", .start = 0x80, .end = 0x8f,
123 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
124 { .name = "pic2", .start = 0xa0, .end = 0xa1,
125 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
126 { .name = "dma2", .start = 0xc0, .end = 0xdf,
127 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
128 { .name = "fpu", .start = 0xf0, .end = 0xff,
129 .flags = IORESOURCE_BUSY | IORESOURCE_IO }
132 #define STANDARD_IO_RESOURCES \
133 (sizeof standard_io_resources / sizeof standard_io_resources[0])
135 #define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM)
137 struct resource data_resource = {
138 .name = "Kernel data",
141 .flags = IORESOURCE_RAM,
143 struct resource code_resource = {
144 .name = "Kernel code",
147 .flags = IORESOURCE_RAM,
150 #define IORESOURCE_ROM (IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM)
152 static struct resource system_rom_resource = {
153 .name = "System ROM",
156 .flags = IORESOURCE_ROM,
159 static struct resource extension_rom_resource = {
160 .name = "Extension ROM",
163 .flags = IORESOURCE_ROM,
166 static struct resource adapter_rom_resources[] = {
167 { .name = "Adapter ROM", .start = 0xc8000, .end = 0,
168 .flags = IORESOURCE_ROM },
169 { .name = "Adapter ROM", .start = 0, .end = 0,
170 .flags = IORESOURCE_ROM },
171 { .name = "Adapter ROM", .start = 0, .end = 0,
172 .flags = IORESOURCE_ROM },
173 { .name = "Adapter ROM", .start = 0, .end = 0,
174 .flags = IORESOURCE_ROM },
175 { .name = "Adapter ROM", .start = 0, .end = 0,
176 .flags = IORESOURCE_ROM },
177 { .name = "Adapter ROM", .start = 0, .end = 0,
178 .flags = IORESOURCE_ROM }
181 #define ADAPTER_ROM_RESOURCES \
182 (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
184 static struct resource video_rom_resource = {
188 .flags = IORESOURCE_ROM,
191 static struct resource video_ram_resource = {
192 .name = "Video RAM area",
195 .flags = IORESOURCE_RAM,
198 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
200 static int __init romchecksum(unsigned char *rom, unsigned long length)
202 unsigned char *p, sum = 0;
204 for (p = rom; p < rom + length; p++)
209 static void __init probe_roms(void)
211 unsigned long start, length, upper;
216 upper = adapter_rom_resources[0].start;
217 for (start = video_rom_resource.start; start < upper; start += 2048) {
218 rom = isa_bus_to_virt(start);
219 if (!romsignature(rom))
222 video_rom_resource.start = start;
224 /* 0 < length <= 0x7f * 512, historically */
225 length = rom[2] * 512;
227 /* if checksum okay, trust length byte */
228 if (length && romchecksum(rom, length))
229 video_rom_resource.end = start + length - 1;
231 request_resource(&iomem_resource, &video_rom_resource);
235 start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
240 request_resource(&iomem_resource, &system_rom_resource);
241 upper = system_rom_resource.start;
243 /* check for extension rom (ignore length byte!) */
244 rom = isa_bus_to_virt(extension_rom_resource.start);
245 if (romsignature(rom)) {
246 length = extension_rom_resource.end - extension_rom_resource.start + 1;
247 if (romchecksum(rom, length)) {
248 request_resource(&iomem_resource, &extension_rom_resource);
249 upper = extension_rom_resource.start;
253 /* check for adapter roms on 2k boundaries */
254 for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
255 rom = isa_bus_to_virt(start);
256 if (!romsignature(rom))
259 /* 0 < length <= 0x7f * 512, historically */
260 length = rom[2] * 512;
262 /* but accept any length that fits if checksum okay */
263 if (!length || start + length > upper || !romchecksum(rom, length))
266 adapter_rom_resources[i].start = start;
267 adapter_rom_resources[i].end = start + length - 1;
268 request_resource(&iomem_resource, &adapter_rom_resources[i]);
270 start = adapter_rom_resources[i++].end & ~2047UL;
274 static __init void parse_cmdline_early (char ** cmdline_p)
276 char c = ' ', *to = command_line, *from = COMMAND_LINE;
286 * If the BIOS enumerates physical processors before logical,
287 * maxcpus=N at enumeration-time can be used to disable HT.
289 else if (!memcmp(from, "maxcpus=", 8)) {
290 extern unsigned int maxcpus;
292 maxcpus = simple_strtoul(from + 8, NULL, 0);
296 /* "acpi=off" disables both ACPI table parsing and interpreter init */
297 if (!memcmp(from, "acpi=off", 8))
300 if (!memcmp(from, "acpi=force", 10)) {
301 /* add later when we do DMI horrors: */
306 /* acpi=ht just means: do ACPI MADT parsing
307 at bootup, but don't enable the full ACPI interpreter */
308 if (!memcmp(from, "acpi=ht", 7)) {
313 else if (!memcmp(from, "pci=noacpi", 10))
315 else if (!memcmp(from, "acpi=noirq", 10))
318 else if (!memcmp(from, "acpi_sci=edge", 13))
319 acpi_sci_flags.trigger = 1;
320 else if (!memcmp(from, "acpi_sci=level", 14))
321 acpi_sci_flags.trigger = 3;
322 else if (!memcmp(from, "acpi_sci=high", 13))
323 acpi_sci_flags.polarity = 1;
324 else if (!memcmp(from, "acpi_sci=low", 12))
325 acpi_sci_flags.polarity = 3;
327 /* acpi=strict disables out-of-spec workarounds */
328 else if (!memcmp(from, "acpi=strict", 11)) {
331 #ifdef CONFIG_X86_IO_APIC
332 else if (!memcmp(from, "acpi_skip_timer_override", 24))
333 acpi_skip_timer_override = 1;
337 if (!memcmp(from, "disable_timer_pin_1", 19))
338 disable_timer_pin_1 = 1;
339 if (!memcmp(from, "enable_timer_pin_1", 18))
340 disable_timer_pin_1 = -1;
342 if (!memcmp(from, "nolapic", 7) ||
343 !memcmp(from, "disableapic", 11))
346 /* Don't confuse with noapictimer */
347 if (!memcmp(from, "noapic", 6) &&
348 (from[6] == ' ' || from[6] == 0))
349 skip_ioapic_setup = 1;
351 /* Make sure to not confuse with apic= */
352 if (!memcmp(from, "apic", 4) &&
353 (from[4] == ' ' || from[4] == 0)) {
354 skip_ioapic_setup = 0;
358 if (!memcmp(from, "mem=", 4))
359 parse_memopt(from+4, &from);
361 if (!memcmp(from, "memmap=", 7)) {
362 /* exactmap option is for used defined memory */
363 if (!memcmp(from+7, "exactmap", 8)) {
364 #ifdef CONFIG_CRASH_DUMP
365 /* If we are doing a crash dump, we
366 * still need to know the real mem
367 * size before original memory map is
370 saved_max_pfn = e820_end_of_ram();
378 parse_memmapopt(from+7, &from);
384 if (!memcmp(from, "numa=", 5))
388 if (!memcmp(from,"iommu=",6)) {
392 if (!memcmp(from,"oops=panic", 10))
395 if (!memcmp(from, "noexec=", 7))
396 nonx_setup(from + 7);
399 /* crashkernel=size@addr specifies the location to reserve for
400 * a crash kernel. By reserving this memory we guarantee
401 * that linux never set's it up as a DMA target.
402 * Useful for holding code to do something appropriate
403 * after a kernel panic.
405 else if (!memcmp(from, "crashkernel=", 12)) {
406 unsigned long size, base;
407 size = memparse(from+12, &from);
409 base = memparse(from+1, &from);
410 /* FIXME: Do I want a sanity check
411 * to validate the memory range?
413 crashk_res.start = base;
414 crashk_res.end = base + size - 1;
419 #ifdef CONFIG_PROC_VMCORE
420 /* elfcorehdr= specifies the location of elf core header
421 * stored by the crashed kernel. This option will be passed
422 * by kexec loader to the capture kernel.
424 else if(!memcmp(from, "elfcorehdr=", 11))
425 elfcorehdr_addr = memparse(from+11, &from);
431 if (COMMAND_LINE_SIZE <= ++len)
436 printk(KERN_INFO "user-defined physical RAM map:\n");
437 e820_print_map("user");
440 *cmdline_p = command_line;
445 contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
447 unsigned long bootmap_size, bootmap;
449 bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
450 bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size);
452 panic("Cannot find bootmem map of size %ld\n",bootmap_size);
453 bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
454 e820_bootmem_free(NODE_DATA(0), 0, end_pfn << PAGE_SHIFT);
455 reserve_bootmem(bootmap, bootmap_size);
459 /* Use inline assembly to define this because the nops are defined
460 as inline assembly strings in the include files and we cannot
461 get them easily into strings. */
462 asm("\t.data\nk8nops: "
463 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
466 extern unsigned char k8nops[];
467 static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
473 k8nops + 1 + 2 + 3 + 4,
474 k8nops + 1 + 2 + 3 + 4 + 5,
475 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
476 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
479 extern char __vsyscall_0;
481 /* Replace instructions with better alternatives for this CPU type.
483 This runs before SMP is initialized to avoid SMP problems with
484 self modifying code. This implies that assymetric systems where
485 APs have less capabilities than the boot processor are not handled.
486 In this case boot with "noreplacement". */
487 void apply_alternatives(void *start, void *end)
491 for (a = start; (void *)a < end; a++) {
494 if (!boot_cpu_has(a->cpuid))
497 BUG_ON(a->replacementlen > a->instrlen);
499 /* vsyscall code is not mapped yet. resolve it manually. */
500 if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END)
501 instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
502 __inline_memcpy(instr, a->replacement, a->replacementlen);
503 diff = a->instrlen - a->replacementlen;
505 /* Pad the rest with nops */
506 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
510 __inline_memcpy(instr + i, k8_nops[k], k);
515 static int no_replacement __initdata = 0;
517 void __init alternative_instructions(void)
519 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
522 apply_alternatives(__alt_instructions, __alt_instructions_end);
525 static int __init noreplacement_setup(char *s)
531 __setup("noreplacement", noreplacement_setup);
533 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
535 #ifdef CONFIG_EDD_MODULE
539 * copy_edd() - Copy the BIOS EDD information
540 * from boot_params into a safe place.
543 static inline void copy_edd(void)
545 memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
546 memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
547 edd.mbr_signature_nr = EDD_MBR_SIG_NR;
548 edd.edd_info_nr = EDD_NR;
551 static inline void copy_edd(void)
556 #define EBDA_ADDR_POINTER 0x40E
557 static void __init reserve_ebda_region(void)
561 * there is a real-mode segmented pointer pointing to the
562 * 4K EBDA area at 0x40E
564 addr = *(unsigned short *)phys_to_virt(EBDA_ADDR_POINTER);
567 reserve_bootmem_generic(addr, PAGE_SIZE);
570 void __init setup_arch(char **cmdline_p)
572 unsigned long kernel_end;
574 ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
575 drive_info = DRIVE_INFO;
576 screen_info = SCREEN_INFO;
577 edid_info = EDID_INFO;
578 saved_video_mode = SAVED_VIDEO_MODE;
579 bootloader_type = LOADER_TYPE;
581 #ifdef CONFIG_BLK_DEV_RAM
582 rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
583 rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
584 rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
586 setup_memory_region();
589 if (!MOUNT_ROOT_RDONLY)
590 root_mountflags &= ~MS_RDONLY;
591 init_mm.start_code = (unsigned long) &_text;
592 init_mm.end_code = (unsigned long) &_etext;
593 init_mm.end_data = (unsigned long) &_edata;
594 init_mm.brk = (unsigned long) &_end;
596 code_resource.start = virt_to_phys(&_text);
597 code_resource.end = virt_to_phys(&_etext)-1;
598 data_resource.start = virt_to_phys(&_etext);
599 data_resource.end = virt_to_phys(&_edata)-1;
601 parse_cmdline_early(cmdline_p);
603 early_identify_cpu(&boot_cpu_data);
606 * partially used pages are not usable - thus
607 * we are rounding upwards:
609 end_pfn = e820_end_of_ram();
613 init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
619 * Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
620 * Call this early for SRAT node setup.
622 acpi_boot_table_init();
625 #ifdef CONFIG_ACPI_NUMA
627 * Parse SRAT to discover nodes.
633 numa_initmem_init(0, end_pfn);
635 contig_initmem_init(0, end_pfn);
638 /* Reserve direct mapping */
639 reserve_bootmem_generic(table_start << PAGE_SHIFT,
640 (table_end - table_start) << PAGE_SHIFT);
643 kernel_end = round_up(__pa_symbol(&_end),PAGE_SIZE);
644 reserve_bootmem_generic(HIGH_MEMORY, kernel_end - HIGH_MEMORY);
647 * reserve physical page 0 - it's a special BIOS page on many boxes,
648 * enabling clean reboots, SMP operation, laptop functions.
650 reserve_bootmem_generic(0, PAGE_SIZE);
652 /* reserve ebda region */
653 reserve_ebda_region();
657 * But first pinch a few for the stack/trampoline stuff
658 * FIXME: Don't need the extra page at 4K, but need to fix
659 * trampoline before removing it. (see the GDT stuff)
661 reserve_bootmem_generic(PAGE_SIZE, PAGE_SIZE);
663 /* Reserve SMP trampoline */
664 reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, PAGE_SIZE);
667 #ifdef CONFIG_ACPI_SLEEP
669 * Reserve low memory region for sleep support.
671 acpi_reserve_bootmem();
673 #ifdef CONFIG_X86_LOCAL_APIC
675 * Find and reserve possible boot-time SMP configuration:
679 #ifdef CONFIG_BLK_DEV_INITRD
680 if (LOADER_TYPE && INITRD_START) {
681 if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) {
682 reserve_bootmem_generic(INITRD_START, INITRD_SIZE);
684 INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
685 initrd_end = initrd_start+INITRD_SIZE;
688 printk(KERN_ERR "initrd extends beyond end of memory "
689 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
690 (unsigned long)(INITRD_START + INITRD_SIZE),
691 (unsigned long)(end_pfn << PAGE_SHIFT));
697 if (crashk_res.start != crashk_res.end) {
698 reserve_bootmem(crashk_res.start,
699 crashk_res.end - crashk_res.start + 1);
709 * Read APIC and some other early information from ACPI tables.
716 #ifdef CONFIG_X86_LOCAL_APIC
718 * get boot-time SMP configuration:
720 if (smp_found_config)
722 init_apic_mappings();
726 * Request address space for all standard RAM and ROM resources
727 * and also for regions reported as reserved by the e820.
730 e820_reserve_resources();
732 request_resource(&iomem_resource, &video_ram_resource);
736 /* request I/O space for devices used on all i[345]86 PCs */
737 for (i = 0; i < STANDARD_IO_RESOURCES; i++)
738 request_resource(&ioport_resource, &standard_io_resources[i]);
743 #ifdef CONFIG_GART_IOMMU
748 #if defined(CONFIG_VGA_CONSOLE)
749 conswitchp = &vga_con;
750 #elif defined(CONFIG_DUMMY_CONSOLE)
751 conswitchp = &dummy_con;
756 static int __cpuinit get_model_name(struct cpuinfo_x86 *c)
760 if (c->extended_cpuid_level < 0x80000004)
763 v = (unsigned int *) c->x86_model_id;
764 cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
765 cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
766 cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
767 c->x86_model_id[48] = 0;
772 static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
774 unsigned int n, dummy, eax, ebx, ecx, edx;
776 n = c->extended_cpuid_level;
778 if (n >= 0x80000005) {
779 cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
780 printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
781 edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
782 c->x86_cache_size=(ecx>>24)+(edx>>24);
783 /* On K8 L1 TLB is inclusive, so don't count it */
787 if (n >= 0x80000006) {
788 cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
789 ecx = cpuid_ecx(0x80000006);
790 c->x86_cache_size = ecx >> 16;
791 c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
793 printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
794 c->x86_cache_size, ecx & 0xFF);
798 cpuid(0x80000007, &dummy, &dummy, &dummy, &c->x86_power);
799 if (n >= 0x80000008) {
800 cpuid(0x80000008, &eax, &dummy, &dummy, &dummy);
801 c->x86_virt_bits = (eax >> 8) & 0xff;
802 c->x86_phys_bits = eax & 0xff;
807 static int nearby_node(int apicid)
810 for (i = apicid - 1; i >= 0; i--) {
811 int node = apicid_to_node[i];
812 if (node != NUMA_NO_NODE && node_online(node))
815 for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
816 int node = apicid_to_node[i];
817 if (node != NUMA_NO_NODE && node_online(node))
820 return first_node(node_online_map); /* Shouldn't happen */
825 * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
826 * Assumes number of cores is a power of two.
828 static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
831 int cpu = smp_processor_id();
835 unsigned apicid = phys_proc_id[cpu];
839 while ((1 << bits) < c->x86_max_cores)
842 /* Low order bits define the core id (index of core in socket) */
843 cpu_core_id[cpu] = phys_proc_id[cpu] & ((1 << bits)-1);
844 /* Convert the APIC ID into the socket ID */
845 phys_proc_id[cpu] >>= bits;
848 node = phys_proc_id[cpu];
849 if (apicid_to_node[apicid] != NUMA_NO_NODE)
850 node = apicid_to_node[apicid];
851 if (!node_online(node)) {
852 /* Two possibilities here:
853 - The CPU is missing memory and no node was created.
854 In that case try picking one from a nearby CPU
855 - The APIC IDs differ from the HyperTransport node IDs
856 which the K8 northbridge parsing fills in.
857 Assume they are all increased by a constant offset,
858 but in the same order as the HT nodeids.
859 If that doesn't result in a usable node fall back to the
860 path for the previous case. */
861 int ht_nodeid = apicid - (phys_proc_id[0] << bits);
862 if (ht_nodeid >= 0 &&
863 apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
864 node = apicid_to_node[ht_nodeid];
865 /* Pick a nearby node */
866 if (!node_online(node))
867 node = nearby_node(apicid);
869 numa_set_node(cpu, node);
871 printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n",
872 cpu, c->x86_max_cores, node, cpu_core_id[cpu]);
877 static int __init init_amd(struct cpuinfo_x86 *c)
886 * Disable TLB flush filter by setting HWCR.FFDIS on K8
887 * bit 6 of msr C001_0015
889 * Errata 63 for SH-B3 steppings
890 * Errata 122 for all steppings (F+ have it disabled by default)
893 rdmsrl(MSR_K8_HWCR, value);
895 wrmsrl(MSR_K8_HWCR, value);
899 /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
900 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
901 clear_bit(0*32+31, &c->x86_capability);
903 /* On C+ stepping K8 rep microcode works well for copy/memset */
904 level = cpuid_eax(1);
905 if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58))
906 set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
908 r = get_model_name(c);
912 /* Should distinguish Models here, but this is only
913 a fallback anyways. */
914 strcpy(c->x86_model_id, "Hammer");
918 display_cacheinfo(c);
920 /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
921 if (c->x86_power & (1<<8))
922 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
924 if (c->extended_cpuid_level >= 0x80000008) {
925 c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
926 if (c->x86_max_cores & (c->x86_max_cores - 1))
927 c->x86_max_cores = 1;
935 static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
938 u32 eax, ebx, ecx, edx;
939 int index_msb, core_bits;
940 int cpu = smp_processor_id();
942 cpuid(1, &eax, &ebx, &ecx, &edx);
944 c->apicid = phys_pkg_id(0);
946 if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
949 smp_num_siblings = (ebx & 0xff0000) >> 16;
951 if (smp_num_siblings == 1) {
952 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
953 } else if (smp_num_siblings > 1 ) {
955 if (smp_num_siblings > NR_CPUS) {
956 printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
957 smp_num_siblings = 1;
961 index_msb = get_count_order(smp_num_siblings);
962 phys_proc_id[cpu] = phys_pkg_id(index_msb);
964 printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
967 smp_num_siblings = smp_num_siblings / c->x86_max_cores;
969 index_msb = get_count_order(smp_num_siblings) ;
971 core_bits = get_count_order(c->x86_max_cores);
973 cpu_core_id[cpu] = phys_pkg_id(index_msb) &
974 ((1 << core_bits) - 1);
976 if (c->x86_max_cores > 1)
977 printk(KERN_INFO "CPU: Processor Core ID: %d\n",
984 * find out the number of processor cores on the die
986 static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
990 if (c->cpuid_level < 4)
999 return ((eax >> 26) + 1);
1004 static void srat_detect_node(void)
1008 int cpu = smp_processor_id();
1010 /* Don't do the funky fallback heuristics the AMD version employs
1012 node = apicid_to_node[hard_smp_processor_id()];
1013 if (node == NUMA_NO_NODE)
1015 numa_set_node(cpu, node);
1018 printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node);
1022 static void __cpuinit init_intel(struct cpuinfo_x86 *c)
1027 init_intel_cacheinfo(c);
1028 n = c->extended_cpuid_level;
1029 if (n >= 0x80000008) {
1030 unsigned eax = cpuid_eax(0x80000008);
1031 c->x86_virt_bits = (eax >> 8) & 0xff;
1032 c->x86_phys_bits = eax & 0xff;
1033 /* CPUID workaround for Intel 0F34 CPU */
1034 if (c->x86_vendor == X86_VENDOR_INTEL &&
1035 c->x86 == 0xF && c->x86_model == 0x3 &&
1037 c->x86_phys_bits = 36;
1041 c->x86_cache_alignment = c->x86_clflush_size * 2;
1042 if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
1043 (c->x86 == 0x6 && c->x86_model >= 0x0e))
1044 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
1045 set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
1046 c->x86_max_cores = intel_num_cpu_cores(c);
1051 static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
1053 char *v = c->x86_vendor_id;
1055 if (!strcmp(v, "AuthenticAMD"))
1056 c->x86_vendor = X86_VENDOR_AMD;
1057 else if (!strcmp(v, "GenuineIntel"))
1058 c->x86_vendor = X86_VENDOR_INTEL;
1060 c->x86_vendor = X86_VENDOR_UNKNOWN;
1063 struct cpu_model_info {
1066 char *model_names[16];
1069 /* Do some early cpuid on the boot CPU to get some parameter that are
1070 needed before check_bugs. Everything advanced is in identify_cpu
1072 void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
1076 c->loops_per_jiffy = loops_per_jiffy;
1077 c->x86_cache_size = -1;
1078 c->x86_vendor = X86_VENDOR_UNKNOWN;
1079 c->x86_model = c->x86_mask = 0; /* So far unknown... */
1080 c->x86_vendor_id[0] = '\0'; /* Unset */
1081 c->x86_model_id[0] = '\0'; /* Unset */
1082 c->x86_clflush_size = 64;
1083 c->x86_cache_alignment = c->x86_clflush_size;
1084 c->x86_max_cores = 1;
1085 c->extended_cpuid_level = 0;
1086 memset(&c->x86_capability, 0, sizeof c->x86_capability);
1088 /* Get vendor name */
1089 cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
1090 (unsigned int *)&c->x86_vendor_id[0],
1091 (unsigned int *)&c->x86_vendor_id[8],
1092 (unsigned int *)&c->x86_vendor_id[4]);
1096 /* Initialize the standard set of capabilities */
1097 /* Note that the vendor-specific code below might override */
1099 /* Intel-defined flags: level 0x00000001 */
1100 if (c->cpuid_level >= 0x00000001) {
1102 cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4],
1103 &c->x86_capability[0]);
1104 c->x86 = (tfms >> 8) & 0xf;
1105 c->x86_model = (tfms >> 4) & 0xf;
1106 c->x86_mask = tfms & 0xf;
1108 c->x86 += (tfms >> 20) & 0xff;
1110 c->x86_model += ((tfms >> 16) & 0xF) << 4;
1111 if (c->x86_capability[0] & (1<<19))
1112 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
1114 /* Have CPUID level 0 only - unheard of */
1119 phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff;
1124 * This does the hard work of actually picking apart the CPU stuff...
1126 void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
1131 early_identify_cpu(c);
1133 /* AMD-defined flags: level 0x80000001 */
1134 xlvl = cpuid_eax(0x80000000);
1135 c->extended_cpuid_level = xlvl;
1136 if ((xlvl & 0xffff0000) == 0x80000000) {
1137 if (xlvl >= 0x80000001) {
1138 c->x86_capability[1] = cpuid_edx(0x80000001);
1139 c->x86_capability[6] = cpuid_ecx(0x80000001);
1141 if (xlvl >= 0x80000004)
1142 get_model_name(c); /* Default name */
1145 /* Transmeta-defined flags: level 0x80860001 */
1146 xlvl = cpuid_eax(0x80860000);
1147 if ((xlvl & 0xffff0000) == 0x80860000) {
1148 /* Don't set x86_cpuid_level here for now to not confuse. */
1149 if (xlvl >= 0x80860001)
1150 c->x86_capability[2] = cpuid_edx(0x80860001);
1154 * Vendor-specific initialization. In this section we
1155 * canonicalize the feature flags, meaning if there are
1156 * features a certain CPU supports which CPUID doesn't
1157 * tell us, CPUID claiming incorrect flags, or other bugs,
1158 * we handle them here.
1160 * At the end of this section, c->x86_capability better
1161 * indicate the features this CPU genuinely supports!
1163 switch (c->x86_vendor) {
1164 case X86_VENDOR_AMD:
1168 case X86_VENDOR_INTEL:
1172 case X86_VENDOR_UNKNOWN:
1174 display_cacheinfo(c);
1178 select_idle_routine(c);
1182 * On SMP, boot_cpu_data holds the common feature set between
1183 * all CPUs; so make sure that we indicate which features are
1184 * common between the CPUs. The first time this routine gets
1185 * executed, c == &boot_cpu_data.
1187 if (c != &boot_cpu_data) {
1188 /* AND the already accumulated flags with these */
1189 for (i = 0 ; i < NCAPINTS ; i++)
1190 boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
1193 #ifdef CONFIG_X86_MCE
1196 if (c == &boot_cpu_data)
1201 numa_add_cpu(smp_processor_id());
1206 void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
1208 if (c->x86_model_id[0])
1209 printk("%s", c->x86_model_id);
1211 if (c->x86_mask || c->cpuid_level >= 0)
1212 printk(" stepping %02x\n", c->x86_mask);
1218 * Get CPU information for use by the procfs.
1221 static int show_cpuinfo(struct seq_file *m, void *v)
1223 struct cpuinfo_x86 *c = v;
1226 * These flag bits must match the definitions in <asm/cpufeature.h>.
1227 * NULL means this bit is undefined or reserved; either way it doesn't
1228 * have meaning as far as Linux is concerned. Note that it's important
1229 * to realize there is a difference between this table and CPUID -- if
1230 * applications want to get the raw CPUID data, they should access
1231 * /dev/cpu/<cpu_nr>/cpuid instead.
1233 static char *x86_cap_flags[] = {
1235 "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
1236 "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
1237 "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
1238 "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", NULL,
1241 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1242 NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
1243 NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL,
1244 NULL, "fxsr_opt", "rdtscp", NULL, NULL, "lm", "3dnowext", "3dnow",
1246 /* Transmeta-defined */
1247 "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
1248 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1249 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1250 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1252 /* Other (Linux-defined) */
1253 "cxmmx", NULL, "cyrix_arr", "centaur_mcr", NULL,
1254 "constant_tsc", NULL, NULL,
1255 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1256 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1257 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1259 /* Intel-defined (#2) */
1260 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", NULL, "est",
1261 "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL,
1262 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1263 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1265 /* VIA/Cyrix/Centaur-defined */
1266 NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
1267 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1268 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1269 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1271 /* AMD-defined (#2) */
1272 "lahf_lm", "cmp_legacy", "svm", NULL, "cr8_legacy", NULL, NULL, NULL,
1273 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1274 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1275 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1277 static char *x86_power_flags[] = {
1278 "ts", /* temperature sensor */
1279 "fid", /* frequency id control */
1280 "vid", /* voltage id control */
1281 "ttp", /* thermal trip */
1285 /* nothing */ /* constant_tsc - moved to flags */
1290 if (!cpu_online(c-cpu_data))
1294 seq_printf(m,"processor\t: %u\n"
1296 "cpu family\t: %d\n"
1298 "model name\t: %s\n",
1299 (unsigned)(c-cpu_data),
1300 c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
1303 c->x86_model_id[0] ? c->x86_model_id : "unknown");
1305 if (c->x86_mask || c->cpuid_level >= 0)
1306 seq_printf(m, "stepping\t: %d\n", c->x86_mask);
1308 seq_printf(m, "stepping\t: unknown\n");
1310 if (cpu_has(c,X86_FEATURE_TSC)) {
1311 unsigned int freq = cpufreq_quick_get((unsigned)(c-cpu_data));
1314 seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
1315 freq / 1000, (freq % 1000));
1319 if (c->x86_cache_size >= 0)
1320 seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
1323 if (smp_num_siblings * c->x86_max_cores > 1) {
1324 int cpu = c - cpu_data;
1325 seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]);
1326 seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu]));
1327 seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]);
1328 seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
1334 "fpu_exception\t: yes\n"
1335 "cpuid level\t: %d\n"
1342 for ( i = 0 ; i < 32*NCAPINTS ; i++ )
1343 if ( test_bit(i, &c->x86_capability) &&
1344 x86_cap_flags[i] != NULL )
1345 seq_printf(m, " %s", x86_cap_flags[i]);
1348 seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
1349 c->loops_per_jiffy/(500000/HZ),
1350 (c->loops_per_jiffy/(5000/HZ)) % 100);
1352 if (c->x86_tlbsize > 0)
1353 seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize);
1354 seq_printf(m, "clflush size\t: %d\n", c->x86_clflush_size);
1355 seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment);
1357 seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n",
1358 c->x86_phys_bits, c->x86_virt_bits);
1360 seq_printf(m, "power management:");
1363 for (i = 0; i < 32; i++)
1364 if (c->x86_power & (1 << i)) {
1365 if (i < ARRAY_SIZE(x86_power_flags) &&
1367 seq_printf(m, "%s%s",
1368 x86_power_flags[i][0]?" ":"",
1369 x86_power_flags[i]);
1371 seq_printf(m, " [%d]", i);
1375 seq_printf(m, "\n\n");
1380 static void *c_start(struct seq_file *m, loff_t *pos)
1382 return *pos < NR_CPUS ? cpu_data + *pos : NULL;
1385 static void *c_next(struct seq_file *m, void *v, loff_t *pos)
1388 return c_start(m, pos);
1391 static void c_stop(struct seq_file *m, void *v)
1395 struct seq_operations cpuinfo_op = {
1399 .show = show_cpuinfo,
1402 static int __init run_dmi_scan(void)
1407 core_initcall(run_dmi_scan);