]> Pileus Git - ~andy/linux/blob - arch/powerpc/kernel/fadump.c
fadump: Add PT_NOTE program header for vmcoreinfo
[~andy/linux] / arch / powerpc / kernel / fadump.c
1 /*
2  * Firmware Assisted dump: A robust mechanism to get reliable kernel crash
3  * dump with assistance from firmware. This approach does not use kexec,
4  * instead firmware assists in booting the kdump kernel while preserving
5  * memory contents. The most of the code implementation has been adapted
6  * from phyp assisted dump implementation written by Linas Vepstas and
7  * Manish Ahuja
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22  *
23  * Copyright 2011 IBM Corporation
24  * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
25  */
26
27 #undef DEBUG
28 #define pr_fmt(fmt) "fadump: " fmt
29
30 #include <linux/string.h>
31 #include <linux/memblock.h>
32 #include <linux/delay.h>
33 #include <linux/debugfs.h>
34 #include <linux/seq_file.h>
35 #include <linux/crash_dump.h>
36
37 #include <asm/page.h>
38 #include <asm/prom.h>
39 #include <asm/rtas.h>
40 #include <asm/fadump.h>
41
42 static struct fw_dump fw_dump;
43 static struct fadump_mem_struct fdm;
44 static const struct fadump_mem_struct *fdm_active;
45
46 static DEFINE_MUTEX(fadump_mutex);
47 struct fad_crash_memory_ranges crash_memory_ranges[INIT_CRASHMEM_RANGES];
48 int crash_mem_ranges;
49
50 /* Scan the Firmware Assisted dump configuration details. */
51 int __init early_init_dt_scan_fw_dump(unsigned long node,
52                         const char *uname, int depth, void *data)
53 {
54         __be32 *sections;
55         int i, num_sections;
56         unsigned long size;
57         const int *token;
58
59         if (depth != 1 || strcmp(uname, "rtas") != 0)
60                 return 0;
61
62         /*
63          * Check if Firmware Assisted dump is supported. if yes, check
64          * if dump has been initiated on last reboot.
65          */
66         token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL);
67         if (!token)
68                 return 0;
69
70         fw_dump.fadump_supported = 1;
71         fw_dump.ibm_configure_kernel_dump = *token;
72
73         /*
74          * The 'ibm,kernel-dump' rtas node is present only if there is
75          * dump data waiting for us.
76          */
77         fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL);
78         if (fdm_active)
79                 fw_dump.dump_active = 1;
80
81         /* Get the sizes required to store dump data for the firmware provided
82          * dump sections.
83          * For each dump section type supported, a 32bit cell which defines
84          * the ID of a supported section followed by two 32 bit cells which
85          * gives teh size of the section in bytes.
86          */
87         sections = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
88                                         &size);
89
90         if (!sections)
91                 return 0;
92
93         num_sections = size / (3 * sizeof(u32));
94
95         for (i = 0; i < num_sections; i++, sections += 3) {
96                 u32 type = (u32)of_read_number(sections, 1);
97
98                 switch (type) {
99                 case FADUMP_CPU_STATE_DATA:
100                         fw_dump.cpu_state_data_size =
101                                         of_read_ulong(&sections[1], 2);
102                         break;
103                 case FADUMP_HPTE_REGION:
104                         fw_dump.hpte_region_size =
105                                         of_read_ulong(&sections[1], 2);
106                         break;
107                 }
108         }
109         return 1;
110 }
111
112 int is_fadump_active(void)
113 {
114         return fw_dump.dump_active;
115 }
116
117 /* Print firmware assisted dump configurations for debugging purpose. */
118 static void fadump_show_config(void)
119 {
120         pr_debug("Support for firmware-assisted dump (fadump): %s\n",
121                         (fw_dump.fadump_supported ? "present" : "no support"));
122
123         if (!fw_dump.fadump_supported)
124                 return;
125
126         pr_debug("Fadump enabled    : %s\n",
127                                 (fw_dump.fadump_enabled ? "yes" : "no"));
128         pr_debug("Dump Active       : %s\n",
129                                 (fw_dump.dump_active ? "yes" : "no"));
130         pr_debug("Dump section sizes:\n");
131         pr_debug("    CPU state data size: %lx\n", fw_dump.cpu_state_data_size);
132         pr_debug("    HPTE region size   : %lx\n", fw_dump.hpte_region_size);
133         pr_debug("Boot memory size  : %lx\n", fw_dump.boot_memory_size);
134 }
135
136 static unsigned long init_fadump_mem_struct(struct fadump_mem_struct *fdm,
137                                 unsigned long addr)
138 {
139         if (!fdm)
140                 return 0;
141
142         memset(fdm, 0, sizeof(struct fadump_mem_struct));
143         addr = addr & PAGE_MASK;
144
145         fdm->header.dump_format_version = 0x00000001;
146         fdm->header.dump_num_sections = 3;
147         fdm->header.dump_status_flag = 0;
148         fdm->header.offset_first_dump_section =
149                 (u32)offsetof(struct fadump_mem_struct, cpu_state_data);
150
151         /*
152          * Fields for disk dump option.
153          * We are not using disk dump option, hence set these fields to 0.
154          */
155         fdm->header.dd_block_size = 0;
156         fdm->header.dd_block_offset = 0;
157         fdm->header.dd_num_blocks = 0;
158         fdm->header.dd_offset_disk_path = 0;
159
160         /* set 0 to disable an automatic dump-reboot. */
161         fdm->header.max_time_auto = 0;
162
163         /* Kernel dump sections */
164         /* cpu state data section. */
165         fdm->cpu_state_data.request_flag = FADUMP_REQUEST_FLAG;
166         fdm->cpu_state_data.source_data_type = FADUMP_CPU_STATE_DATA;
167         fdm->cpu_state_data.source_address = 0;
168         fdm->cpu_state_data.source_len = fw_dump.cpu_state_data_size;
169         fdm->cpu_state_data.destination_address = addr;
170         addr += fw_dump.cpu_state_data_size;
171
172         /* hpte region section */
173         fdm->hpte_region.request_flag = FADUMP_REQUEST_FLAG;
174         fdm->hpte_region.source_data_type = FADUMP_HPTE_REGION;
175         fdm->hpte_region.source_address = 0;
176         fdm->hpte_region.source_len = fw_dump.hpte_region_size;
177         fdm->hpte_region.destination_address = addr;
178         addr += fw_dump.hpte_region_size;
179
180         /* RMA region section */
181         fdm->rmr_region.request_flag = FADUMP_REQUEST_FLAG;
182         fdm->rmr_region.source_data_type = FADUMP_REAL_MODE_REGION;
183         fdm->rmr_region.source_address = RMA_START;
184         fdm->rmr_region.source_len = fw_dump.boot_memory_size;
185         fdm->rmr_region.destination_address = addr;
186         addr += fw_dump.boot_memory_size;
187
188         return addr;
189 }
190
191 /**
192  * fadump_calculate_reserve_size(): reserve variable boot area 5% of System RAM
193  *
194  * Function to find the largest memory size we need to reserve during early
195  * boot process. This will be the size of the memory that is required for a
196  * kernel to boot successfully.
197  *
198  * This function has been taken from phyp-assisted dump feature implementation.
199  *
200  * returns larger of 256MB or 5% rounded down to multiples of 256MB.
201  *
202  * TODO: Come up with better approach to find out more accurate memory size
203  * that is required for a kernel to boot successfully.
204  *
205  */
206 static inline unsigned long fadump_calculate_reserve_size(void)
207 {
208         unsigned long size;
209
210         /*
211          * Check if the size is specified through fadump_reserve_mem= cmdline
212          * option. If yes, then use that.
213          */
214         if (fw_dump.reserve_bootvar)
215                 return fw_dump.reserve_bootvar;
216
217         /* divide by 20 to get 5% of value */
218         size = memblock_end_of_DRAM() / 20;
219
220         /* round it down in multiples of 256 */
221         size = size & ~0x0FFFFFFFUL;
222
223         /* Truncate to memory_limit. We don't want to over reserve the memory.*/
224         if (memory_limit && size > memory_limit)
225                 size = memory_limit;
226
227         return (size > MIN_BOOT_MEM ? size : MIN_BOOT_MEM);
228 }
229
230 /*
231  * Calculate the total memory size required to be reserved for
232  * firmware-assisted dump registration.
233  */
234 static unsigned long get_fadump_area_size(void)
235 {
236         unsigned long size = 0;
237
238         size += fw_dump.cpu_state_data_size;
239         size += fw_dump.hpte_region_size;
240         size += fw_dump.boot_memory_size;
241         size += sizeof(struct fadump_crash_info_header);
242         size += sizeof(struct elfhdr); /* ELF core header.*/
243         size += sizeof(struct elf_phdr); /* place holder for cpu notes */
244         /* Program headers for crash memory regions. */
245         size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2);
246
247         size = PAGE_ALIGN(size);
248         return size;
249 }
250
251 int __init fadump_reserve_mem(void)
252 {
253         unsigned long base, size, memory_boundary;
254
255         if (!fw_dump.fadump_enabled)
256                 return 0;
257
258         if (!fw_dump.fadump_supported) {
259                 printk(KERN_INFO "Firmware-assisted dump is not supported on"
260                                 " this hardware\n");
261                 fw_dump.fadump_enabled = 0;
262                 return 0;
263         }
264         /*
265          * Initialize boot memory size
266          * If dump is active then we have already calculated the size during
267          * first kernel.
268          */
269         if (fdm_active)
270                 fw_dump.boot_memory_size = fdm_active->rmr_region.source_len;
271         else
272                 fw_dump.boot_memory_size = fadump_calculate_reserve_size();
273
274         /*
275          * Calculate the memory boundary.
276          * If memory_limit is less than actual memory boundary then reserve
277          * the memory for fadump beyond the memory_limit and adjust the
278          * memory_limit accordingly, so that the running kernel can run with
279          * specified memory_limit.
280          */
281         if (memory_limit && memory_limit < memblock_end_of_DRAM()) {
282                 size = get_fadump_area_size();
283                 if ((memory_limit + size) < memblock_end_of_DRAM())
284                         memory_limit += size;
285                 else
286                         memory_limit = memblock_end_of_DRAM();
287                 printk(KERN_INFO "Adjusted memory_limit for firmware-assisted"
288                                 " dump, now %#016llx\n",
289                                 (unsigned long long)memory_limit);
290         }
291         if (memory_limit)
292                 memory_boundary = memory_limit;
293         else
294                 memory_boundary = memblock_end_of_DRAM();
295
296         if (fw_dump.dump_active) {
297                 printk(KERN_INFO "Firmware-assisted dump is active.\n");
298                 /*
299                  * If last boot has crashed then reserve all the memory
300                  * above boot_memory_size so that we don't touch it until
301                  * dump is written to disk by userspace tool. This memory
302                  * will be released for general use once the dump is saved.
303                  */
304                 base = fw_dump.boot_memory_size;
305                 size = memory_boundary - base;
306                 memblock_reserve(base, size);
307                 printk(KERN_INFO "Reserved %ldMB of memory at %ldMB "
308                                 "for saving crash dump\n",
309                                 (unsigned long)(size >> 20),
310                                 (unsigned long)(base >> 20));
311
312                 fw_dump.fadumphdr_addr =
313                                 fdm_active->rmr_region.destination_address +
314                                 fdm_active->rmr_region.source_len;
315                 pr_debug("fadumphdr_addr = %p\n",
316                                 (void *) fw_dump.fadumphdr_addr);
317         } else {
318                 /* Reserve the memory at the top of memory. */
319                 size = get_fadump_area_size();
320                 base = memory_boundary - size;
321                 memblock_reserve(base, size);
322                 printk(KERN_INFO "Reserved %ldMB of memory at %ldMB "
323                                 "for firmware-assisted dump\n",
324                                 (unsigned long)(size >> 20),
325                                 (unsigned long)(base >> 20));
326         }
327         fw_dump.reserve_dump_area_start = base;
328         fw_dump.reserve_dump_area_size = size;
329         return 1;
330 }
331
332 /* Look for fadump= cmdline option. */
333 static int __init early_fadump_param(char *p)
334 {
335         if (!p)
336                 return 1;
337
338         if (strncmp(p, "on", 2) == 0)
339                 fw_dump.fadump_enabled = 1;
340         else if (strncmp(p, "off", 3) == 0)
341                 fw_dump.fadump_enabled = 0;
342
343         return 0;
344 }
345 early_param("fadump", early_fadump_param);
346
347 /* Look for fadump_reserve_mem= cmdline option */
348 static int __init early_fadump_reserve_mem(char *p)
349 {
350         if (p)
351                 fw_dump.reserve_bootvar = memparse(p, &p);
352         return 0;
353 }
354 early_param("fadump_reserve_mem", early_fadump_reserve_mem);
355
356 static void register_fw_dump(struct fadump_mem_struct *fdm)
357 {
358         int rc;
359         unsigned int wait_time;
360
361         pr_debug("Registering for firmware-assisted kernel dump...\n");
362
363         /* TODO: Add upper time limit for the delay */
364         do {
365                 rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
366                         FADUMP_REGISTER, fdm,
367                         sizeof(struct fadump_mem_struct));
368
369                 wait_time = rtas_busy_delay_time(rc);
370                 if (wait_time)
371                         mdelay(wait_time);
372
373         } while (wait_time);
374
375         switch (rc) {
376         case -1:
377                 printk(KERN_ERR "Failed to register firmware-assisted kernel"
378                         " dump. Hardware Error(%d).\n", rc);
379                 break;
380         case -3:
381                 printk(KERN_ERR "Failed to register firmware-assisted kernel"
382                         " dump. Parameter Error(%d).\n", rc);
383                 break;
384         case -9:
385                 printk(KERN_ERR "firmware-assisted kernel dump is already "
386                         " registered.");
387                 fw_dump.dump_registered = 1;
388                 break;
389         case 0:
390                 printk(KERN_INFO "firmware-assisted kernel dump registration"
391                         " is successful\n");
392                 fw_dump.dump_registered = 1;
393                 break;
394         }
395 }
396
397 void crash_fadump(struct pt_regs *regs, const char *str)
398 {
399         struct fadump_crash_info_header *fdh = NULL;
400
401         if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr)
402                 return;
403
404         fdh = __va(fw_dump.fadumphdr_addr);
405         crashing_cpu = smp_processor_id();
406         fdh->crashing_cpu = crashing_cpu;
407         crash_save_vmcoreinfo();
408
409         if (regs)
410                 fdh->regs = *regs;
411         else
412                 ppc_save_regs(&fdh->regs);
413
414         fdh->cpu_online_mask = *cpu_online_mask;
415
416         /* Call ibm,os-term rtas call to trigger firmware assisted dump */
417         rtas_os_term((char *)str);
418 }
419
420 #define GPR_MASK        0xffffff0000000000
421 static inline int fadump_gpr_index(u64 id)
422 {
423         int i = -1;
424         char str[3];
425
426         if ((id & GPR_MASK) == REG_ID("GPR")) {
427                 /* get the digits at the end */
428                 id &= ~GPR_MASK;
429                 id >>= 24;
430                 str[2] = '\0';
431                 str[1] = id & 0xff;
432                 str[0] = (id >> 8) & 0xff;
433                 sscanf(str, "%d", &i);
434                 if (i > 31)
435                         i = -1;
436         }
437         return i;
438 }
439
440 static inline void fadump_set_regval(struct pt_regs *regs, u64 reg_id,
441                                                                 u64 reg_val)
442 {
443         int i;
444
445         i = fadump_gpr_index(reg_id);
446         if (i >= 0)
447                 regs->gpr[i] = (unsigned long)reg_val;
448         else if (reg_id == REG_ID("NIA"))
449                 regs->nip = (unsigned long)reg_val;
450         else if (reg_id == REG_ID("MSR"))
451                 regs->msr = (unsigned long)reg_val;
452         else if (reg_id == REG_ID("CTR"))
453                 regs->ctr = (unsigned long)reg_val;
454         else if (reg_id == REG_ID("LR"))
455                 regs->link = (unsigned long)reg_val;
456         else if (reg_id == REG_ID("XER"))
457                 regs->xer = (unsigned long)reg_val;
458         else if (reg_id == REG_ID("CR"))
459                 regs->ccr = (unsigned long)reg_val;
460         else if (reg_id == REG_ID("DAR"))
461                 regs->dar = (unsigned long)reg_val;
462         else if (reg_id == REG_ID("DSISR"))
463                 regs->dsisr = (unsigned long)reg_val;
464 }
465
466 static struct fadump_reg_entry*
467 fadump_read_registers(struct fadump_reg_entry *reg_entry, struct pt_regs *regs)
468 {
469         memset(regs, 0, sizeof(struct pt_regs));
470
471         while (reg_entry->reg_id != REG_ID("CPUEND")) {
472                 fadump_set_regval(regs, reg_entry->reg_id,
473                                         reg_entry->reg_value);
474                 reg_entry++;
475         }
476         reg_entry++;
477         return reg_entry;
478 }
479
480 static u32 *fadump_append_elf_note(u32 *buf, char *name, unsigned type,
481                                                 void *data, size_t data_len)
482 {
483         struct elf_note note;
484
485         note.n_namesz = strlen(name) + 1;
486         note.n_descsz = data_len;
487         note.n_type   = type;
488         memcpy(buf, &note, sizeof(note));
489         buf += (sizeof(note) + 3)/4;
490         memcpy(buf, name, note.n_namesz);
491         buf += (note.n_namesz + 3)/4;
492         memcpy(buf, data, note.n_descsz);
493         buf += (note.n_descsz + 3)/4;
494
495         return buf;
496 }
497
498 static void fadump_final_note(u32 *buf)
499 {
500         struct elf_note note;
501
502         note.n_namesz = 0;
503         note.n_descsz = 0;
504         note.n_type   = 0;
505         memcpy(buf, &note, sizeof(note));
506 }
507
508 static u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
509 {
510         struct elf_prstatus prstatus;
511
512         memset(&prstatus, 0, sizeof(prstatus));
513         /*
514          * FIXME: How do i get PID? Do I really need it?
515          * prstatus.pr_pid = ????
516          */
517         elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
518         buf = fadump_append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
519                                 &prstatus, sizeof(prstatus));
520         return buf;
521 }
522
523 static void fadump_update_elfcore_header(char *bufp)
524 {
525         struct elfhdr *elf;
526         struct elf_phdr *phdr;
527
528         elf = (struct elfhdr *)bufp;
529         bufp += sizeof(struct elfhdr);
530
531         /* First note is a place holder for cpu notes info. */
532         phdr = (struct elf_phdr *)bufp;
533
534         if (phdr->p_type == PT_NOTE) {
535                 phdr->p_paddr = fw_dump.cpu_notes_buf;
536                 phdr->p_offset  = phdr->p_paddr;
537                 phdr->p_filesz  = fw_dump.cpu_notes_buf_size;
538                 phdr->p_memsz = fw_dump.cpu_notes_buf_size;
539         }
540         return;
541 }
542
543 static void *fadump_cpu_notes_buf_alloc(unsigned long size)
544 {
545         void *vaddr;
546         struct page *page;
547         unsigned long order, count, i;
548
549         order = get_order(size);
550         vaddr = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
551         if (!vaddr)
552                 return NULL;
553
554         count = 1 << order;
555         page = virt_to_page(vaddr);
556         for (i = 0; i < count; i++)
557                 SetPageReserved(page + i);
558         return vaddr;
559 }
560
561 static void fadump_cpu_notes_buf_free(unsigned long vaddr, unsigned long size)
562 {
563         struct page *page;
564         unsigned long order, count, i;
565
566         order = get_order(size);
567         count = 1 << order;
568         page = virt_to_page(vaddr);
569         for (i = 0; i < count; i++)
570                 ClearPageReserved(page + i);
571         __free_pages(page, order);
572 }
573
574 /*
575  * Read CPU state dump data and convert it into ELF notes.
576  * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be
577  * used to access the data to allow for additional fields to be added without
578  * affecting compatibility. Each list of registers for a CPU starts with
579  * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes,
580  * 8 Byte ASCII identifier and 8 Byte register value. The register entry
581  * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part
582  * of register value. For more details refer to PAPR document.
583  *
584  * Only for the crashing cpu we ignore the CPU dump data and get exact
585  * state from fadump crash info structure populated by first kernel at the
586  * time of crash.
587  */
588 static int __init fadump_build_cpu_notes(const struct fadump_mem_struct *fdm)
589 {
590         struct fadump_reg_save_area_header *reg_header;
591         struct fadump_reg_entry *reg_entry;
592         struct fadump_crash_info_header *fdh = NULL;
593         void *vaddr;
594         unsigned long addr;
595         u32 num_cpus, *note_buf;
596         struct pt_regs regs;
597         int i, rc = 0, cpu = 0;
598
599         if (!fdm->cpu_state_data.bytes_dumped)
600                 return -EINVAL;
601
602         addr = fdm->cpu_state_data.destination_address;
603         vaddr = __va(addr);
604
605         reg_header = vaddr;
606         if (reg_header->magic_number != REGSAVE_AREA_MAGIC) {
607                 printk(KERN_ERR "Unable to read register save area.\n");
608                 return -ENOENT;
609         }
610         pr_debug("--------CPU State Data------------\n");
611         pr_debug("Magic Number: %llx\n", reg_header->magic_number);
612         pr_debug("NumCpuOffset: %x\n", reg_header->num_cpu_offset);
613
614         vaddr += reg_header->num_cpu_offset;
615         num_cpus = *((u32 *)(vaddr));
616         pr_debug("NumCpus     : %u\n", num_cpus);
617         vaddr += sizeof(u32);
618         reg_entry = (struct fadump_reg_entry *)vaddr;
619
620         /* Allocate buffer to hold cpu crash notes. */
621         fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
622         fw_dump.cpu_notes_buf_size = PAGE_ALIGN(fw_dump.cpu_notes_buf_size);
623         note_buf = fadump_cpu_notes_buf_alloc(fw_dump.cpu_notes_buf_size);
624         if (!note_buf) {
625                 printk(KERN_ERR "Failed to allocate 0x%lx bytes for "
626                         "cpu notes buffer\n", fw_dump.cpu_notes_buf_size);
627                 return -ENOMEM;
628         }
629         fw_dump.cpu_notes_buf = __pa(note_buf);
630
631         pr_debug("Allocated buffer for cpu notes of size %ld at %p\n",
632                         (num_cpus * sizeof(note_buf_t)), note_buf);
633
634         if (fw_dump.fadumphdr_addr)
635                 fdh = __va(fw_dump.fadumphdr_addr);
636
637         for (i = 0; i < num_cpus; i++) {
638                 if (reg_entry->reg_id != REG_ID("CPUSTRT")) {
639                         printk(KERN_ERR "Unable to read CPU state data\n");
640                         rc = -ENOENT;
641                         goto error_out;
642                 }
643                 /* Lower 4 bytes of reg_value contains logical cpu id */
644                 cpu = reg_entry->reg_value & FADUMP_CPU_ID_MASK;
645                 if (!cpumask_test_cpu(cpu, &fdh->cpu_online_mask)) {
646                         SKIP_TO_NEXT_CPU(reg_entry);
647                         continue;
648                 }
649                 pr_debug("Reading register data for cpu %d...\n", cpu);
650                 if (fdh && fdh->crashing_cpu == cpu) {
651                         regs = fdh->regs;
652                         note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
653                         SKIP_TO_NEXT_CPU(reg_entry);
654                 } else {
655                         reg_entry++;
656                         reg_entry = fadump_read_registers(reg_entry, &regs);
657                         note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
658                 }
659         }
660         fadump_final_note(note_buf);
661
662         pr_debug("Updating elfcore header (%llx) with cpu notes\n",
663                                                         fdh->elfcorehdr_addr);
664         fadump_update_elfcore_header((char *)__va(fdh->elfcorehdr_addr));
665         return 0;
666
667 error_out:
668         fadump_cpu_notes_buf_free((unsigned long)__va(fw_dump.cpu_notes_buf),
669                                         fw_dump.cpu_notes_buf_size);
670         fw_dump.cpu_notes_buf = 0;
671         fw_dump.cpu_notes_buf_size = 0;
672         return rc;
673
674 }
675
676 /*
677  * Validate and process the dump data stored by firmware before exporting
678  * it through '/proc/vmcore'.
679  */
680 static int __init process_fadump(const struct fadump_mem_struct *fdm_active)
681 {
682         struct fadump_crash_info_header *fdh;
683         int rc = 0;
684
685         if (!fdm_active || !fw_dump.fadumphdr_addr)
686                 return -EINVAL;
687
688         /* Check if the dump data is valid. */
689         if ((fdm_active->header.dump_status_flag == FADUMP_ERROR_FLAG) ||
690                         (fdm_active->cpu_state_data.error_flags != 0) ||
691                         (fdm_active->rmr_region.error_flags != 0)) {
692                 printk(KERN_ERR "Dump taken by platform is not valid\n");
693                 return -EINVAL;
694         }
695         if ((fdm_active->rmr_region.bytes_dumped !=
696                         fdm_active->rmr_region.source_len) ||
697                         !fdm_active->cpu_state_data.bytes_dumped) {
698                 printk(KERN_ERR "Dump taken by platform is incomplete\n");
699                 return -EINVAL;
700         }
701
702         /* Validate the fadump crash info header */
703         fdh = __va(fw_dump.fadumphdr_addr);
704         if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
705                 printk(KERN_ERR "Crash info header is not valid.\n");
706                 return -EINVAL;
707         }
708
709         rc = fadump_build_cpu_notes(fdm_active);
710         if (rc)
711                 return rc;
712
713         /*
714          * We are done validating dump info and elfcore header is now ready
715          * to be exported. set elfcorehdr_addr so that vmcore module will
716          * export the elfcore header through '/proc/vmcore'.
717          */
718         elfcorehdr_addr = fdh->elfcorehdr_addr;
719
720         return 0;
721 }
722
723 static inline void fadump_add_crash_memory(unsigned long long base,
724                                         unsigned long long end)
725 {
726         if (base == end)
727                 return;
728
729         pr_debug("crash_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n",
730                 crash_mem_ranges, base, end - 1, (end - base));
731         crash_memory_ranges[crash_mem_ranges].base = base;
732         crash_memory_ranges[crash_mem_ranges].size = end - base;
733         crash_mem_ranges++;
734 }
735
736 static void fadump_exclude_reserved_area(unsigned long long start,
737                                         unsigned long long end)
738 {
739         unsigned long long ra_start, ra_end;
740
741         ra_start = fw_dump.reserve_dump_area_start;
742         ra_end = ra_start + fw_dump.reserve_dump_area_size;
743
744         if ((ra_start < end) && (ra_end > start)) {
745                 if ((start < ra_start) && (end > ra_end)) {
746                         fadump_add_crash_memory(start, ra_start);
747                         fadump_add_crash_memory(ra_end, end);
748                 } else if (start < ra_start) {
749                         fadump_add_crash_memory(start, ra_start);
750                 } else if (ra_end < end) {
751                         fadump_add_crash_memory(ra_end, end);
752                 }
753         } else
754                 fadump_add_crash_memory(start, end);
755 }
756
757 static int fadump_init_elfcore_header(char *bufp)
758 {
759         struct elfhdr *elf;
760
761         elf = (struct elfhdr *) bufp;
762         bufp += sizeof(struct elfhdr);
763         memcpy(elf->e_ident, ELFMAG, SELFMAG);
764         elf->e_ident[EI_CLASS] = ELF_CLASS;
765         elf->e_ident[EI_DATA] = ELF_DATA;
766         elf->e_ident[EI_VERSION] = EV_CURRENT;
767         elf->e_ident[EI_OSABI] = ELF_OSABI;
768         memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
769         elf->e_type = ET_CORE;
770         elf->e_machine = ELF_ARCH;
771         elf->e_version = EV_CURRENT;
772         elf->e_entry = 0;
773         elf->e_phoff = sizeof(struct elfhdr);
774         elf->e_shoff = 0;
775         elf->e_flags = ELF_CORE_EFLAGS;
776         elf->e_ehsize = sizeof(struct elfhdr);
777         elf->e_phentsize = sizeof(struct elf_phdr);
778         elf->e_phnum = 0;
779         elf->e_shentsize = 0;
780         elf->e_shnum = 0;
781         elf->e_shstrndx = 0;
782
783         return 0;
784 }
785
786 /*
787  * Traverse through memblock structure and setup crash memory ranges. These
788  * ranges will be used create PT_LOAD program headers in elfcore header.
789  */
790 static void fadump_setup_crash_memory_ranges(void)
791 {
792         struct memblock_region *reg;
793         unsigned long long start, end;
794
795         pr_debug("Setup crash memory ranges.\n");
796         crash_mem_ranges = 0;
797         /*
798          * add the first memory chunk (RMA_START through boot_memory_size) as
799          * a separate memory chunk. The reason is, at the time crash firmware
800          * will move the content of this memory chunk to different location
801          * specified during fadump registration. We need to create a separate
802          * program header for this chunk with the correct offset.
803          */
804         fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size);
805
806         for_each_memblock(memory, reg) {
807                 start = (unsigned long long)reg->base;
808                 end = start + (unsigned long long)reg->size;
809                 if (start == RMA_START && end >= fw_dump.boot_memory_size)
810                         start = fw_dump.boot_memory_size;
811
812                 /* add this range excluding the reserved dump area. */
813                 fadump_exclude_reserved_area(start, end);
814         }
815 }
816
817 /*
818  * If the given physical address falls within the boot memory region then
819  * return the relocated address that points to the dump region reserved
820  * for saving initial boot memory contents.
821  */
822 static inline unsigned long fadump_relocate(unsigned long paddr)
823 {
824         if (paddr > RMA_START && paddr < fw_dump.boot_memory_size)
825                 return fdm.rmr_region.destination_address + paddr;
826         else
827                 return paddr;
828 }
829
830 static int fadump_create_elfcore_headers(char *bufp)
831 {
832         struct elfhdr *elf;
833         struct elf_phdr *phdr;
834         int i;
835
836         fadump_init_elfcore_header(bufp);
837         elf = (struct elfhdr *)bufp;
838         bufp += sizeof(struct elfhdr);
839
840         /*
841          * setup ELF PT_NOTE, place holder for cpu notes info. The notes info
842          * will be populated during second kernel boot after crash. Hence
843          * this PT_NOTE will always be the first elf note.
844          *
845          * NOTE: Any new ELF note addition should be placed after this note.
846          */
847         phdr = (struct elf_phdr *)bufp;
848         bufp += sizeof(struct elf_phdr);
849         phdr->p_type = PT_NOTE;
850         phdr->p_flags = 0;
851         phdr->p_vaddr = 0;
852         phdr->p_align = 0;
853
854         phdr->p_offset = 0;
855         phdr->p_paddr = 0;
856         phdr->p_filesz = 0;
857         phdr->p_memsz = 0;
858
859         (elf->e_phnum)++;
860
861         /* setup ELF PT_NOTE for vmcoreinfo */
862         phdr = (struct elf_phdr *)bufp;
863         bufp += sizeof(struct elf_phdr);
864         phdr->p_type    = PT_NOTE;
865         phdr->p_flags   = 0;
866         phdr->p_vaddr   = 0;
867         phdr->p_align   = 0;
868
869         phdr->p_paddr   = fadump_relocate(paddr_vmcoreinfo_note());
870         phdr->p_offset  = phdr->p_paddr;
871         phdr->p_memsz   = vmcoreinfo_max_size;
872         phdr->p_filesz  = vmcoreinfo_max_size;
873
874         /* Increment number of program headers. */
875         (elf->e_phnum)++;
876
877         /* setup PT_LOAD sections. */
878
879         for (i = 0; i < crash_mem_ranges; i++) {
880                 unsigned long long mbase, msize;
881                 mbase = crash_memory_ranges[i].base;
882                 msize = crash_memory_ranges[i].size;
883
884                 if (!msize)
885                         continue;
886
887                 phdr = (struct elf_phdr *)bufp;
888                 bufp += sizeof(struct elf_phdr);
889                 phdr->p_type    = PT_LOAD;
890                 phdr->p_flags   = PF_R|PF_W|PF_X;
891                 phdr->p_offset  = mbase;
892
893                 if (mbase == RMA_START) {
894                         /*
895                          * The entire RMA region will be moved by firmware
896                          * to the specified destination_address. Hence set
897                          * the correct offset.
898                          */
899                         phdr->p_offset = fdm.rmr_region.destination_address;
900                 }
901
902                 phdr->p_paddr = mbase;
903                 phdr->p_vaddr = (unsigned long)__va(mbase);
904                 phdr->p_filesz = msize;
905                 phdr->p_memsz = msize;
906                 phdr->p_align = 0;
907
908                 /* Increment number of program headers. */
909                 (elf->e_phnum)++;
910         }
911         return 0;
912 }
913
914 static unsigned long init_fadump_header(unsigned long addr)
915 {
916         struct fadump_crash_info_header *fdh;
917
918         if (!addr)
919                 return 0;
920
921         fw_dump.fadumphdr_addr = addr;
922         fdh = __va(addr);
923         addr += sizeof(struct fadump_crash_info_header);
924
925         memset(fdh, 0, sizeof(struct fadump_crash_info_header));
926         fdh->magic_number = FADUMP_CRASH_INFO_MAGIC;
927         fdh->elfcorehdr_addr = addr;
928         /* We will set the crashing cpu id in crash_fadump() during crash. */
929         fdh->crashing_cpu = CPU_UNKNOWN;
930
931         return addr;
932 }
933
934 static void register_fadump(void)
935 {
936         unsigned long addr;
937         void *vaddr;
938
939         /*
940          * If no memory is reserved then we can not register for firmware-
941          * assisted dump.
942          */
943         if (!fw_dump.reserve_dump_area_size)
944                 return;
945
946         fadump_setup_crash_memory_ranges();
947
948         addr = fdm.rmr_region.destination_address + fdm.rmr_region.source_len;
949         /* Initialize fadump crash info header. */
950         addr = init_fadump_header(addr);
951         vaddr = __va(addr);
952
953         pr_debug("Creating ELF core headers at %#016lx\n", addr);
954         fadump_create_elfcore_headers(vaddr);
955
956         /* register the future kernel dump with firmware. */
957         register_fw_dump(&fdm);
958 }
959
960 static int fadump_unregister_dump(struct fadump_mem_struct *fdm)
961 {
962         int rc = 0;
963         unsigned int wait_time;
964
965         pr_debug("Un-register firmware-assisted dump\n");
966
967         /* TODO: Add upper time limit for the delay */
968         do {
969                 rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
970                         FADUMP_UNREGISTER, fdm,
971                         sizeof(struct fadump_mem_struct));
972
973                 wait_time = rtas_busy_delay_time(rc);
974                 if (wait_time)
975                         mdelay(wait_time);
976         } while (wait_time);
977
978         if (rc) {
979                 printk(KERN_ERR "Failed to un-register firmware-assisted dump."
980                         " unexpected error(%d).\n", rc);
981                 return rc;
982         }
983         fw_dump.dump_registered = 0;
984         return 0;
985 }
986
987 static ssize_t fadump_enabled_show(struct kobject *kobj,
988                                         struct kobj_attribute *attr,
989                                         char *buf)
990 {
991         return sprintf(buf, "%d\n", fw_dump.fadump_enabled);
992 }
993
994 static ssize_t fadump_register_show(struct kobject *kobj,
995                                         struct kobj_attribute *attr,
996                                         char *buf)
997 {
998         return sprintf(buf, "%d\n", fw_dump.dump_registered);
999 }
1000
1001 static ssize_t fadump_register_store(struct kobject *kobj,
1002                                         struct kobj_attribute *attr,
1003                                         const char *buf, size_t count)
1004 {
1005         int ret = 0;
1006
1007         if (!fw_dump.fadump_enabled || fdm_active)
1008                 return -EPERM;
1009
1010         mutex_lock(&fadump_mutex);
1011
1012         switch (buf[0]) {
1013         case '0':
1014                 if (fw_dump.dump_registered == 0) {
1015                         ret = -EINVAL;
1016                         goto unlock_out;
1017                 }
1018                 /* Un-register Firmware-assisted dump */
1019                 fadump_unregister_dump(&fdm);
1020                 break;
1021         case '1':
1022                 if (fw_dump.dump_registered == 1) {
1023                         ret = -EINVAL;
1024                         goto unlock_out;
1025                 }
1026                 /* Register Firmware-assisted dump */
1027                 register_fadump();
1028                 break;
1029         default:
1030                 ret = -EINVAL;
1031                 break;
1032         }
1033
1034 unlock_out:
1035         mutex_unlock(&fadump_mutex);
1036         return ret < 0 ? ret : count;
1037 }
1038
1039 static int fadump_region_show(struct seq_file *m, void *private)
1040 {
1041         const struct fadump_mem_struct *fdm_ptr;
1042
1043         if (!fw_dump.fadump_enabled)
1044                 return 0;
1045
1046         if (fdm_active)
1047                 fdm_ptr = fdm_active;
1048         else
1049                 fdm_ptr = &fdm;
1050
1051         seq_printf(m,
1052                         "CPU : [%#016llx-%#016llx] %#llx bytes, "
1053                         "Dumped: %#llx\n",
1054                         fdm_ptr->cpu_state_data.destination_address,
1055                         fdm_ptr->cpu_state_data.destination_address +
1056                         fdm_ptr->cpu_state_data.source_len - 1,
1057                         fdm_ptr->cpu_state_data.source_len,
1058                         fdm_ptr->cpu_state_data.bytes_dumped);
1059         seq_printf(m,
1060                         "HPTE: [%#016llx-%#016llx] %#llx bytes, "
1061                         "Dumped: %#llx\n",
1062                         fdm_ptr->hpte_region.destination_address,
1063                         fdm_ptr->hpte_region.destination_address +
1064                         fdm_ptr->hpte_region.source_len - 1,
1065                         fdm_ptr->hpte_region.source_len,
1066                         fdm_ptr->hpte_region.bytes_dumped);
1067         seq_printf(m,
1068                         "DUMP: [%#016llx-%#016llx] %#llx bytes, "
1069                         "Dumped: %#llx\n",
1070                         fdm_ptr->rmr_region.destination_address,
1071                         fdm_ptr->rmr_region.destination_address +
1072                         fdm_ptr->rmr_region.source_len - 1,
1073                         fdm_ptr->rmr_region.source_len,
1074                         fdm_ptr->rmr_region.bytes_dumped);
1075
1076         if (!fdm_active ||
1077                 (fw_dump.reserve_dump_area_start ==
1078                 fdm_ptr->cpu_state_data.destination_address))
1079                 return 0;
1080
1081         /* Dump is active. Show reserved memory region. */
1082         seq_printf(m,
1083                         "    : [%#016llx-%#016llx] %#llx bytes, "
1084                         "Dumped: %#llx\n",
1085                         (unsigned long long)fw_dump.reserve_dump_area_start,
1086                         fdm_ptr->cpu_state_data.destination_address - 1,
1087                         fdm_ptr->cpu_state_data.destination_address -
1088                         fw_dump.reserve_dump_area_start,
1089                         fdm_ptr->cpu_state_data.destination_address -
1090                         fw_dump.reserve_dump_area_start);
1091         return 0;
1092 }
1093
1094 static struct kobj_attribute fadump_attr = __ATTR(fadump_enabled,
1095                                                 0444, fadump_enabled_show,
1096                                                 NULL);
1097 static struct kobj_attribute fadump_register_attr = __ATTR(fadump_registered,
1098                                                 0644, fadump_register_show,
1099                                                 fadump_register_store);
1100
1101 static int fadump_region_open(struct inode *inode, struct file *file)
1102 {
1103         return single_open(file, fadump_region_show, inode->i_private);
1104 }
1105
1106 static const struct file_operations fadump_region_fops = {
1107         .open    = fadump_region_open,
1108         .read    = seq_read,
1109         .llseek  = seq_lseek,
1110         .release = single_release,
1111 };
1112
1113 static void fadump_init_files(void)
1114 {
1115         struct dentry *debugfs_file;
1116         int rc = 0;
1117
1118         rc = sysfs_create_file(kernel_kobj, &fadump_attr.attr);
1119         if (rc)
1120                 printk(KERN_ERR "fadump: unable to create sysfs file"
1121                         " fadump_enabled (%d)\n", rc);
1122
1123         rc = sysfs_create_file(kernel_kobj, &fadump_register_attr.attr);
1124         if (rc)
1125                 printk(KERN_ERR "fadump: unable to create sysfs file"
1126                         " fadump_registered (%d)\n", rc);
1127
1128         debugfs_file = debugfs_create_file("fadump_region", 0444,
1129                                         powerpc_debugfs_root, NULL,
1130                                         &fadump_region_fops);
1131         if (!debugfs_file)
1132                 printk(KERN_ERR "fadump: unable to create debugfs file"
1133                                 " fadump_region\n");
1134         return;
1135 }
1136
1137 /*
1138  * Prepare for firmware-assisted dump.
1139  */
1140 int __init setup_fadump(void)
1141 {
1142         if (!fw_dump.fadump_enabled)
1143                 return 0;
1144
1145         if (!fw_dump.fadump_supported) {
1146                 printk(KERN_ERR "Firmware-assisted dump is not supported on"
1147                         " this hardware\n");
1148                 return 0;
1149         }
1150
1151         fadump_show_config();
1152         /*
1153          * If dump data is available then see if it is valid and prepare for
1154          * saving it to the disk.
1155          */
1156         if (fw_dump.dump_active)
1157                 process_fadump(fdm_active);
1158         /* Initialize the kernel dump memory structure for FAD registration. */
1159         else if (fw_dump.reserve_dump_area_size)
1160                 init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
1161         fadump_init_files();
1162
1163         return 1;
1164 }
1165 subsys_initcall(setup_fadump);