]> Pileus Git - ~andy/linux/blob - fs/binfmt_elf.c
s390: convert to generic kernel_execve()
[~andy/linux] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/security.h>
31 #include <linux/random.h>
32 #include <linux/elf.h>
33 #include <linux/utsname.h>
34 #include <linux/coredump.h>
35 #include <asm/uaccess.h>
36 #include <asm/param.h>
37 #include <asm/page.h>
38
39 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
40 static int load_elf_library(struct file *);
41 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
42                                 int, int, unsigned long);
43
44 /*
45  * If we don't support core dumping, then supply a NULL so we
46  * don't even try.
47  */
48 #ifdef CONFIG_ELF_CORE
49 static int elf_core_dump(struct coredump_params *cprm);
50 #else
51 #define elf_core_dump   NULL
52 #endif
53
54 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
55 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
56 #else
57 #define ELF_MIN_ALIGN   PAGE_SIZE
58 #endif
59
60 #ifndef ELF_CORE_EFLAGS
61 #define ELF_CORE_EFLAGS 0
62 #endif
63
64 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
65 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
66 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
67
68 static struct linux_binfmt elf_format = {
69         .module         = THIS_MODULE,
70         .load_binary    = load_elf_binary,
71         .load_shlib     = load_elf_library,
72         .core_dump      = elf_core_dump,
73         .min_coredump   = ELF_EXEC_PAGESIZE,
74 };
75
76 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
77
78 static int set_brk(unsigned long start, unsigned long end)
79 {
80         start = ELF_PAGEALIGN(start);
81         end = ELF_PAGEALIGN(end);
82         if (end > start) {
83                 unsigned long addr;
84                 addr = vm_brk(start, end - start);
85                 if (BAD_ADDR(addr))
86                         return addr;
87         }
88         current->mm->start_brk = current->mm->brk = end;
89         return 0;
90 }
91
92 /* We need to explicitly zero any fractional pages
93    after the data section (i.e. bss).  This would
94    contain the junk from the file that should not
95    be in memory
96  */
97 static int padzero(unsigned long elf_bss)
98 {
99         unsigned long nbyte;
100
101         nbyte = ELF_PAGEOFFSET(elf_bss);
102         if (nbyte) {
103                 nbyte = ELF_MIN_ALIGN - nbyte;
104                 if (clear_user((void __user *) elf_bss, nbyte))
105                         return -EFAULT;
106         }
107         return 0;
108 }
109
110 /* Let's use some macros to make this stack manipulation a little clearer */
111 #ifdef CONFIG_STACK_GROWSUP
112 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
113 #define STACK_ROUND(sp, items) \
114         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
115 #define STACK_ALLOC(sp, len) ({ \
116         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
117         old_sp; })
118 #else
119 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
120 #define STACK_ROUND(sp, items) \
121         (((unsigned long) (sp - items)) &~ 15UL)
122 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
123 #endif
124
125 #ifndef ELF_BASE_PLATFORM
126 /*
127  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
128  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
129  * will be copied to the user stack in the same manner as AT_PLATFORM.
130  */
131 #define ELF_BASE_PLATFORM NULL
132 #endif
133
134 static int
135 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
136                 unsigned long load_addr, unsigned long interp_load_addr)
137 {
138         unsigned long p = bprm->p;
139         int argc = bprm->argc;
140         int envc = bprm->envc;
141         elf_addr_t __user *argv;
142         elf_addr_t __user *envp;
143         elf_addr_t __user *sp;
144         elf_addr_t __user *u_platform;
145         elf_addr_t __user *u_base_platform;
146         elf_addr_t __user *u_rand_bytes;
147         const char *k_platform = ELF_PLATFORM;
148         const char *k_base_platform = ELF_BASE_PLATFORM;
149         unsigned char k_rand_bytes[16];
150         int items;
151         elf_addr_t *elf_info;
152         int ei_index = 0;
153         const struct cred *cred = current_cred();
154         struct vm_area_struct *vma;
155
156         /*
157          * In some cases (e.g. Hyper-Threading), we want to avoid L1
158          * evictions by the processes running on the same package. One
159          * thing we can do is to shuffle the initial stack for them.
160          */
161
162         p = arch_align_stack(p);
163
164         /*
165          * If this architecture has a platform capability string, copy it
166          * to userspace.  In some cases (Sparc), this info is impossible
167          * for userspace to get any other way, in others (i386) it is
168          * merely difficult.
169          */
170         u_platform = NULL;
171         if (k_platform) {
172                 size_t len = strlen(k_platform) + 1;
173
174                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
175                 if (__copy_to_user(u_platform, k_platform, len))
176                         return -EFAULT;
177         }
178
179         /*
180          * If this architecture has a "base" platform capability
181          * string, copy it to userspace.
182          */
183         u_base_platform = NULL;
184         if (k_base_platform) {
185                 size_t len = strlen(k_base_platform) + 1;
186
187                 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
188                 if (__copy_to_user(u_base_platform, k_base_platform, len))
189                         return -EFAULT;
190         }
191
192         /*
193          * Generate 16 random bytes for userspace PRNG seeding.
194          */
195         get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
196         u_rand_bytes = (elf_addr_t __user *)
197                        STACK_ALLOC(p, sizeof(k_rand_bytes));
198         if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
199                 return -EFAULT;
200
201         /* Create the ELF interpreter info */
202         elf_info = (elf_addr_t *)current->mm->saved_auxv;
203         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
204 #define NEW_AUX_ENT(id, val) \
205         do { \
206                 elf_info[ei_index++] = id; \
207                 elf_info[ei_index++] = val; \
208         } while (0)
209
210 #ifdef ARCH_DLINFO
211         /* 
212          * ARCH_DLINFO must come first so PPC can do its special alignment of
213          * AUXV.
214          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
215          * ARCH_DLINFO changes
216          */
217         ARCH_DLINFO;
218 #endif
219         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
220         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
221         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
222         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
223         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
224         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
225         NEW_AUX_ENT(AT_BASE, interp_load_addr);
226         NEW_AUX_ENT(AT_FLAGS, 0);
227         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
228         NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
229         NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
230         NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
231         NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
232         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
233         NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
234         NEW_AUX_ENT(AT_EXECFN, bprm->exec);
235         if (k_platform) {
236                 NEW_AUX_ENT(AT_PLATFORM,
237                             (elf_addr_t)(unsigned long)u_platform);
238         }
239         if (k_base_platform) {
240                 NEW_AUX_ENT(AT_BASE_PLATFORM,
241                             (elf_addr_t)(unsigned long)u_base_platform);
242         }
243         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
244                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
245         }
246 #undef NEW_AUX_ENT
247         /* AT_NULL is zero; clear the rest too */
248         memset(&elf_info[ei_index], 0,
249                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
250
251         /* And advance past the AT_NULL entry.  */
252         ei_index += 2;
253
254         sp = STACK_ADD(p, ei_index);
255
256         items = (argc + 1) + (envc + 1) + 1;
257         bprm->p = STACK_ROUND(sp, items);
258
259         /* Point sp at the lowest address on the stack */
260 #ifdef CONFIG_STACK_GROWSUP
261         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
262         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
263 #else
264         sp = (elf_addr_t __user *)bprm->p;
265 #endif
266
267
268         /*
269          * Grow the stack manually; some architectures have a limit on how
270          * far ahead a user-space access may be in order to grow the stack.
271          */
272         vma = find_extend_vma(current->mm, bprm->p);
273         if (!vma)
274                 return -EFAULT;
275
276         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
277         if (__put_user(argc, sp++))
278                 return -EFAULT;
279         argv = sp;
280         envp = argv + argc + 1;
281
282         /* Populate argv and envp */
283         p = current->mm->arg_end = current->mm->arg_start;
284         while (argc-- > 0) {
285                 size_t len;
286                 if (__put_user((elf_addr_t)p, argv++))
287                         return -EFAULT;
288                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
289                 if (!len || len > MAX_ARG_STRLEN)
290                         return -EINVAL;
291                 p += len;
292         }
293         if (__put_user(0, argv))
294                 return -EFAULT;
295         current->mm->arg_end = current->mm->env_start = p;
296         while (envc-- > 0) {
297                 size_t len;
298                 if (__put_user((elf_addr_t)p, envp++))
299                         return -EFAULT;
300                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
301                 if (!len || len > MAX_ARG_STRLEN)
302                         return -EINVAL;
303                 p += len;
304         }
305         if (__put_user(0, envp))
306                 return -EFAULT;
307         current->mm->env_end = p;
308
309         /* Put the elf_info on the stack in the right place.  */
310         sp = (elf_addr_t __user *)envp + 1;
311         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
312                 return -EFAULT;
313         return 0;
314 }
315
316 static unsigned long elf_map(struct file *filep, unsigned long addr,
317                 struct elf_phdr *eppnt, int prot, int type,
318                 unsigned long total_size)
319 {
320         unsigned long map_addr;
321         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
322         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
323         addr = ELF_PAGESTART(addr);
324         size = ELF_PAGEALIGN(size);
325
326         /* mmap() will return -EINVAL if given a zero size, but a
327          * segment with zero filesize is perfectly valid */
328         if (!size)
329                 return addr;
330
331         /*
332         * total_size is the size of the ELF (interpreter) image.
333         * The _first_ mmap needs to know the full size, otherwise
334         * randomization might put this image into an overlapping
335         * position with the ELF binary image. (since size < total_size)
336         * So we first map the 'big' image - and unmap the remainder at
337         * the end. (which unmap is needed for ELF images with holes.)
338         */
339         if (total_size) {
340                 total_size = ELF_PAGEALIGN(total_size);
341                 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
342                 if (!BAD_ADDR(map_addr))
343                         vm_munmap(map_addr+size, total_size-size);
344         } else
345                 map_addr = vm_mmap(filep, addr, size, prot, type, off);
346
347         return(map_addr);
348 }
349
350 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
351 {
352         int i, first_idx = -1, last_idx = -1;
353
354         for (i = 0; i < nr; i++) {
355                 if (cmds[i].p_type == PT_LOAD) {
356                         last_idx = i;
357                         if (first_idx == -1)
358                                 first_idx = i;
359                 }
360         }
361         if (first_idx == -1)
362                 return 0;
363
364         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
365                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
366 }
367
368
369 /* This is much more generalized than the library routine read function,
370    so we keep this separate.  Technically the library read function
371    is only provided so that we can read a.out libraries that have
372    an ELF header */
373
374 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
375                 struct file *interpreter, unsigned long *interp_map_addr,
376                 unsigned long no_base)
377 {
378         struct elf_phdr *elf_phdata;
379         struct elf_phdr *eppnt;
380         unsigned long load_addr = 0;
381         int load_addr_set = 0;
382         unsigned long last_bss = 0, elf_bss = 0;
383         unsigned long error = ~0UL;
384         unsigned long total_size;
385         int retval, i, size;
386
387         /* First of all, some simple consistency checks */
388         if (interp_elf_ex->e_type != ET_EXEC &&
389             interp_elf_ex->e_type != ET_DYN)
390                 goto out;
391         if (!elf_check_arch(interp_elf_ex))
392                 goto out;
393         if (!interpreter->f_op || !interpreter->f_op->mmap)
394                 goto out;
395
396         /*
397          * If the size of this structure has changed, then punt, since
398          * we will be doing the wrong thing.
399          */
400         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
401                 goto out;
402         if (interp_elf_ex->e_phnum < 1 ||
403                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
404                 goto out;
405
406         /* Now read in all of the header information */
407         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
408         if (size > ELF_MIN_ALIGN)
409                 goto out;
410         elf_phdata = kmalloc(size, GFP_KERNEL);
411         if (!elf_phdata)
412                 goto out;
413
414         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
415                              (char *)elf_phdata, size);
416         error = -EIO;
417         if (retval != size) {
418                 if (retval < 0)
419                         error = retval; 
420                 goto out_close;
421         }
422
423         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
424         if (!total_size) {
425                 error = -EINVAL;
426                 goto out_close;
427         }
428
429         eppnt = elf_phdata;
430         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
431                 if (eppnt->p_type == PT_LOAD) {
432                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
433                         int elf_prot = 0;
434                         unsigned long vaddr = 0;
435                         unsigned long k, map_addr;
436
437                         if (eppnt->p_flags & PF_R)
438                                 elf_prot = PROT_READ;
439                         if (eppnt->p_flags & PF_W)
440                                 elf_prot |= PROT_WRITE;
441                         if (eppnt->p_flags & PF_X)
442                                 elf_prot |= PROT_EXEC;
443                         vaddr = eppnt->p_vaddr;
444                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
445                                 elf_type |= MAP_FIXED;
446                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
447                                 load_addr = -vaddr;
448
449                         map_addr = elf_map(interpreter, load_addr + vaddr,
450                                         eppnt, elf_prot, elf_type, total_size);
451                         total_size = 0;
452                         if (!*interp_map_addr)
453                                 *interp_map_addr = map_addr;
454                         error = map_addr;
455                         if (BAD_ADDR(map_addr))
456                                 goto out_close;
457
458                         if (!load_addr_set &&
459                             interp_elf_ex->e_type == ET_DYN) {
460                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
461                                 load_addr_set = 1;
462                         }
463
464                         /*
465                          * Check to see if the section's size will overflow the
466                          * allowed task size. Note that p_filesz must always be
467                          * <= p_memsize so it's only necessary to check p_memsz.
468                          */
469                         k = load_addr + eppnt->p_vaddr;
470                         if (BAD_ADDR(k) ||
471                             eppnt->p_filesz > eppnt->p_memsz ||
472                             eppnt->p_memsz > TASK_SIZE ||
473                             TASK_SIZE - eppnt->p_memsz < k) {
474                                 error = -ENOMEM;
475                                 goto out_close;
476                         }
477
478                         /*
479                          * Find the end of the file mapping for this phdr, and
480                          * keep track of the largest address we see for this.
481                          */
482                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
483                         if (k > elf_bss)
484                                 elf_bss = k;
485
486                         /*
487                          * Do the same thing for the memory mapping - between
488                          * elf_bss and last_bss is the bss section.
489                          */
490                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
491                         if (k > last_bss)
492                                 last_bss = k;
493                 }
494         }
495
496         if (last_bss > elf_bss) {
497                 /*
498                  * Now fill out the bss section.  First pad the last page up
499                  * to the page boundary, and then perform a mmap to make sure
500                  * that there are zero-mapped pages up to and including the
501                  * last bss page.
502                  */
503                 if (padzero(elf_bss)) {
504                         error = -EFAULT;
505                         goto out_close;
506                 }
507
508                 /* What we have mapped so far */
509                 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
510
511                 /* Map the last of the bss segment */
512                 error = vm_brk(elf_bss, last_bss - elf_bss);
513                 if (BAD_ADDR(error))
514                         goto out_close;
515         }
516
517         error = load_addr;
518
519 out_close:
520         kfree(elf_phdata);
521 out:
522         return error;
523 }
524
525 /*
526  * These are the functions used to load ELF style executables and shared
527  * libraries.  There is no binary dependent code anywhere else.
528  */
529
530 #define INTERPRETER_NONE 0
531 #define INTERPRETER_ELF 2
532
533 #ifndef STACK_RND_MASK
534 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
535 #endif
536
537 static unsigned long randomize_stack_top(unsigned long stack_top)
538 {
539         unsigned int random_variable = 0;
540
541         if ((current->flags & PF_RANDOMIZE) &&
542                 !(current->personality & ADDR_NO_RANDOMIZE)) {
543                 random_variable = get_random_int() & STACK_RND_MASK;
544                 random_variable <<= PAGE_SHIFT;
545         }
546 #ifdef CONFIG_STACK_GROWSUP
547         return PAGE_ALIGN(stack_top) + random_variable;
548 #else
549         return PAGE_ALIGN(stack_top) - random_variable;
550 #endif
551 }
552
553 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
554 {
555         struct file *interpreter = NULL; /* to shut gcc up */
556         unsigned long load_addr = 0, load_bias = 0;
557         int load_addr_set = 0;
558         char * elf_interpreter = NULL;
559         unsigned long error;
560         struct elf_phdr *elf_ppnt, *elf_phdata;
561         unsigned long elf_bss, elf_brk;
562         int retval, i;
563         unsigned int size;
564         unsigned long elf_entry;
565         unsigned long interp_load_addr = 0;
566         unsigned long start_code, end_code, start_data, end_data;
567         unsigned long reloc_func_desc __maybe_unused = 0;
568         int executable_stack = EXSTACK_DEFAULT;
569         unsigned long def_flags = 0;
570         struct {
571                 struct elfhdr elf_ex;
572                 struct elfhdr interp_elf_ex;
573         } *loc;
574
575         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
576         if (!loc) {
577                 retval = -ENOMEM;
578                 goto out_ret;
579         }
580         
581         /* Get the exec-header */
582         loc->elf_ex = *((struct elfhdr *)bprm->buf);
583
584         retval = -ENOEXEC;
585         /* First of all, some simple consistency checks */
586         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
587                 goto out;
588
589         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
590                 goto out;
591         if (!elf_check_arch(&loc->elf_ex))
592                 goto out;
593         if (!bprm->file->f_op || !bprm->file->f_op->mmap)
594                 goto out;
595
596         /* Now read in all of the header information */
597         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
598                 goto out;
599         if (loc->elf_ex.e_phnum < 1 ||
600                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
601                 goto out;
602         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
603         retval = -ENOMEM;
604         elf_phdata = kmalloc(size, GFP_KERNEL);
605         if (!elf_phdata)
606                 goto out;
607
608         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
609                              (char *)elf_phdata, size);
610         if (retval != size) {
611                 if (retval >= 0)
612                         retval = -EIO;
613                 goto out_free_ph;
614         }
615
616         elf_ppnt = elf_phdata;
617         elf_bss = 0;
618         elf_brk = 0;
619
620         start_code = ~0UL;
621         end_code = 0;
622         start_data = 0;
623         end_data = 0;
624
625         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
626                 if (elf_ppnt->p_type == PT_INTERP) {
627                         /* This is the program interpreter used for
628                          * shared libraries - for now assume that this
629                          * is an a.out format binary
630                          */
631                         retval = -ENOEXEC;
632                         if (elf_ppnt->p_filesz > PATH_MAX || 
633                             elf_ppnt->p_filesz < 2)
634                                 goto out_free_ph;
635
636                         retval = -ENOMEM;
637                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
638                                                   GFP_KERNEL);
639                         if (!elf_interpreter)
640                                 goto out_free_ph;
641
642                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
643                                              elf_interpreter,
644                                              elf_ppnt->p_filesz);
645                         if (retval != elf_ppnt->p_filesz) {
646                                 if (retval >= 0)
647                                         retval = -EIO;
648                                 goto out_free_interp;
649                         }
650                         /* make sure path is NULL terminated */
651                         retval = -ENOEXEC;
652                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
653                                 goto out_free_interp;
654
655                         interpreter = open_exec(elf_interpreter);
656                         retval = PTR_ERR(interpreter);
657                         if (IS_ERR(interpreter))
658                                 goto out_free_interp;
659
660                         /*
661                          * If the binary is not readable then enforce
662                          * mm->dumpable = 0 regardless of the interpreter's
663                          * permissions.
664                          */
665                         would_dump(bprm, interpreter);
666
667                         retval = kernel_read(interpreter, 0, bprm->buf,
668                                              BINPRM_BUF_SIZE);
669                         if (retval != BINPRM_BUF_SIZE) {
670                                 if (retval >= 0)
671                                         retval = -EIO;
672                                 goto out_free_dentry;
673                         }
674
675                         /* Get the exec headers */
676                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
677                         break;
678                 }
679                 elf_ppnt++;
680         }
681
682         elf_ppnt = elf_phdata;
683         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
684                 if (elf_ppnt->p_type == PT_GNU_STACK) {
685                         if (elf_ppnt->p_flags & PF_X)
686                                 executable_stack = EXSTACK_ENABLE_X;
687                         else
688                                 executable_stack = EXSTACK_DISABLE_X;
689                         break;
690                 }
691
692         /* Some simple consistency checks for the interpreter */
693         if (elf_interpreter) {
694                 retval = -ELIBBAD;
695                 /* Not an ELF interpreter */
696                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
697                         goto out_free_dentry;
698                 /* Verify the interpreter has a valid arch */
699                 if (!elf_check_arch(&loc->interp_elf_ex))
700                         goto out_free_dentry;
701         }
702
703         /* Flush all traces of the currently running executable */
704         retval = flush_old_exec(bprm);
705         if (retval)
706                 goto out_free_dentry;
707
708         /* OK, This is the point of no return */
709         current->mm->def_flags = def_flags;
710
711         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
712            may depend on the personality.  */
713         SET_PERSONALITY(loc->elf_ex);
714         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
715                 current->personality |= READ_IMPLIES_EXEC;
716
717         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
718                 current->flags |= PF_RANDOMIZE;
719
720         setup_new_exec(bprm);
721
722         /* Do this so that we can load the interpreter, if need be.  We will
723            change some of these later */
724         current->mm->free_area_cache = current->mm->mmap_base;
725         current->mm->cached_hole_size = 0;
726         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
727                                  executable_stack);
728         if (retval < 0) {
729                 send_sig(SIGKILL, current, 0);
730                 goto out_free_dentry;
731         }
732         
733         current->mm->start_stack = bprm->p;
734
735         /* Now we do a little grungy work by mmapping the ELF image into
736            the correct location in memory. */
737         for(i = 0, elf_ppnt = elf_phdata;
738             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
739                 int elf_prot = 0, elf_flags;
740                 unsigned long k, vaddr;
741
742                 if (elf_ppnt->p_type != PT_LOAD)
743                         continue;
744
745                 if (unlikely (elf_brk > elf_bss)) {
746                         unsigned long nbyte;
747                     
748                         /* There was a PT_LOAD segment with p_memsz > p_filesz
749                            before this one. Map anonymous pages, if needed,
750                            and clear the area.  */
751                         retval = set_brk(elf_bss + load_bias,
752                                          elf_brk + load_bias);
753                         if (retval) {
754                                 send_sig(SIGKILL, current, 0);
755                                 goto out_free_dentry;
756                         }
757                         nbyte = ELF_PAGEOFFSET(elf_bss);
758                         if (nbyte) {
759                                 nbyte = ELF_MIN_ALIGN - nbyte;
760                                 if (nbyte > elf_brk - elf_bss)
761                                         nbyte = elf_brk - elf_bss;
762                                 if (clear_user((void __user *)elf_bss +
763                                                         load_bias, nbyte)) {
764                                         /*
765                                          * This bss-zeroing can fail if the ELF
766                                          * file specifies odd protections. So
767                                          * we don't check the return value
768                                          */
769                                 }
770                         }
771                 }
772
773                 if (elf_ppnt->p_flags & PF_R)
774                         elf_prot |= PROT_READ;
775                 if (elf_ppnt->p_flags & PF_W)
776                         elf_prot |= PROT_WRITE;
777                 if (elf_ppnt->p_flags & PF_X)
778                         elf_prot |= PROT_EXEC;
779
780                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
781
782                 vaddr = elf_ppnt->p_vaddr;
783                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
784                         elf_flags |= MAP_FIXED;
785                 } else if (loc->elf_ex.e_type == ET_DYN) {
786                         /* Try and get dynamic programs out of the way of the
787                          * default mmap base, as well as whatever program they
788                          * might try to exec.  This is because the brk will
789                          * follow the loader, and is not movable.  */
790 #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
791                         /* Memory randomization might have been switched off
792                          * in runtime via sysctl.
793                          * If that is the case, retain the original non-zero
794                          * load_bias value in order to establish proper
795                          * non-randomized mappings.
796                          */
797                         if (current->flags & PF_RANDOMIZE)
798                                 load_bias = 0;
799                         else
800                                 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
801 #else
802                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
803 #endif
804                 }
805
806                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
807                                 elf_prot, elf_flags, 0);
808                 if (BAD_ADDR(error)) {
809                         send_sig(SIGKILL, current, 0);
810                         retval = IS_ERR((void *)error) ?
811                                 PTR_ERR((void*)error) : -EINVAL;
812                         goto out_free_dentry;
813                 }
814
815                 if (!load_addr_set) {
816                         load_addr_set = 1;
817                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
818                         if (loc->elf_ex.e_type == ET_DYN) {
819                                 load_bias += error -
820                                              ELF_PAGESTART(load_bias + vaddr);
821                                 load_addr += load_bias;
822                                 reloc_func_desc = load_bias;
823                         }
824                 }
825                 k = elf_ppnt->p_vaddr;
826                 if (k < start_code)
827                         start_code = k;
828                 if (start_data < k)
829                         start_data = k;
830
831                 /*
832                  * Check to see if the section's size will overflow the
833                  * allowed task size. Note that p_filesz must always be
834                  * <= p_memsz so it is only necessary to check p_memsz.
835                  */
836                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
837                     elf_ppnt->p_memsz > TASK_SIZE ||
838                     TASK_SIZE - elf_ppnt->p_memsz < k) {
839                         /* set_brk can never work. Avoid overflows. */
840                         send_sig(SIGKILL, current, 0);
841                         retval = -EINVAL;
842                         goto out_free_dentry;
843                 }
844
845                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
846
847                 if (k > elf_bss)
848                         elf_bss = k;
849                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
850                         end_code = k;
851                 if (end_data < k)
852                         end_data = k;
853                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
854                 if (k > elf_brk)
855                         elf_brk = k;
856         }
857
858         loc->elf_ex.e_entry += load_bias;
859         elf_bss += load_bias;
860         elf_brk += load_bias;
861         start_code += load_bias;
862         end_code += load_bias;
863         start_data += load_bias;
864         end_data += load_bias;
865
866         /* Calling set_brk effectively mmaps the pages that we need
867          * for the bss and break sections.  We must do this before
868          * mapping in the interpreter, to make sure it doesn't wind
869          * up getting placed where the bss needs to go.
870          */
871         retval = set_brk(elf_bss, elf_brk);
872         if (retval) {
873                 send_sig(SIGKILL, current, 0);
874                 goto out_free_dentry;
875         }
876         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
877                 send_sig(SIGSEGV, current, 0);
878                 retval = -EFAULT; /* Nobody gets to see this, but.. */
879                 goto out_free_dentry;
880         }
881
882         if (elf_interpreter) {
883                 unsigned long uninitialized_var(interp_map_addr);
884
885                 elf_entry = load_elf_interp(&loc->interp_elf_ex,
886                                             interpreter,
887                                             &interp_map_addr,
888                                             load_bias);
889                 if (!IS_ERR((void *)elf_entry)) {
890                         /*
891                          * load_elf_interp() returns relocation
892                          * adjustment
893                          */
894                         interp_load_addr = elf_entry;
895                         elf_entry += loc->interp_elf_ex.e_entry;
896                 }
897                 if (BAD_ADDR(elf_entry)) {
898                         force_sig(SIGSEGV, current);
899                         retval = IS_ERR((void *)elf_entry) ?
900                                         (int)elf_entry : -EINVAL;
901                         goto out_free_dentry;
902                 }
903                 reloc_func_desc = interp_load_addr;
904
905                 allow_write_access(interpreter);
906                 fput(interpreter);
907                 kfree(elf_interpreter);
908         } else {
909                 elf_entry = loc->elf_ex.e_entry;
910                 if (BAD_ADDR(elf_entry)) {
911                         force_sig(SIGSEGV, current);
912                         retval = -EINVAL;
913                         goto out_free_dentry;
914                 }
915         }
916
917         kfree(elf_phdata);
918
919         set_binfmt(&elf_format);
920
921 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
922         retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
923         if (retval < 0) {
924                 send_sig(SIGKILL, current, 0);
925                 goto out;
926         }
927 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
928
929         install_exec_creds(bprm);
930         retval = create_elf_tables(bprm, &loc->elf_ex,
931                           load_addr, interp_load_addr);
932         if (retval < 0) {
933                 send_sig(SIGKILL, current, 0);
934                 goto out;
935         }
936         /* N.B. passed_fileno might not be initialized? */
937         current->mm->end_code = end_code;
938         current->mm->start_code = start_code;
939         current->mm->start_data = start_data;
940         current->mm->end_data = end_data;
941         current->mm->start_stack = bprm->p;
942
943 #ifdef arch_randomize_brk
944         if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
945                 current->mm->brk = current->mm->start_brk =
946                         arch_randomize_brk(current->mm);
947 #ifdef CONFIG_COMPAT_BRK
948                 current->brk_randomized = 1;
949 #endif
950         }
951 #endif
952
953         if (current->personality & MMAP_PAGE_ZERO) {
954                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
955                    and some applications "depend" upon this behavior.
956                    Since we do not have the power to recompile these, we
957                    emulate the SVr4 behavior. Sigh. */
958                 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
959                                 MAP_FIXED | MAP_PRIVATE, 0);
960         }
961
962 #ifdef ELF_PLAT_INIT
963         /*
964          * The ABI may specify that certain registers be set up in special
965          * ways (on i386 %edx is the address of a DT_FINI function, for
966          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
967          * that the e_entry field is the address of the function descriptor
968          * for the startup routine, rather than the address of the startup
969          * routine itself.  This macro performs whatever initialization to
970          * the regs structure is required as well as any relocations to the
971          * function descriptor entries when executing dynamically links apps.
972          */
973         ELF_PLAT_INIT(regs, reloc_func_desc);
974 #endif
975
976         start_thread(regs, elf_entry, bprm->p);
977         retval = 0;
978 out:
979         kfree(loc);
980 out_ret:
981         return retval;
982
983         /* error cleanup */
984 out_free_dentry:
985         allow_write_access(interpreter);
986         if (interpreter)
987                 fput(interpreter);
988 out_free_interp:
989         kfree(elf_interpreter);
990 out_free_ph:
991         kfree(elf_phdata);
992         goto out;
993 }
994
995 /* This is really simpleminded and specialized - we are loading an
996    a.out library that is given an ELF header. */
997 static int load_elf_library(struct file *file)
998 {
999         struct elf_phdr *elf_phdata;
1000         struct elf_phdr *eppnt;
1001         unsigned long elf_bss, bss, len;
1002         int retval, error, i, j;
1003         struct elfhdr elf_ex;
1004
1005         error = -ENOEXEC;
1006         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1007         if (retval != sizeof(elf_ex))
1008                 goto out;
1009
1010         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1011                 goto out;
1012
1013         /* First of all, some simple consistency checks */
1014         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1015             !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1016                 goto out;
1017
1018         /* Now read in all of the header information */
1019
1020         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1021         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1022
1023         error = -ENOMEM;
1024         elf_phdata = kmalloc(j, GFP_KERNEL);
1025         if (!elf_phdata)
1026                 goto out;
1027
1028         eppnt = elf_phdata;
1029         error = -ENOEXEC;
1030         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1031         if (retval != j)
1032                 goto out_free_ph;
1033
1034         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1035                 if ((eppnt + i)->p_type == PT_LOAD)
1036                         j++;
1037         if (j != 1)
1038                 goto out_free_ph;
1039
1040         while (eppnt->p_type != PT_LOAD)
1041                 eppnt++;
1042
1043         /* Now use mmap to map the library into memory. */
1044         error = vm_mmap(file,
1045                         ELF_PAGESTART(eppnt->p_vaddr),
1046                         (eppnt->p_filesz +
1047                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1048                         PROT_READ | PROT_WRITE | PROT_EXEC,
1049                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1050                         (eppnt->p_offset -
1051                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1052         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1053                 goto out_free_ph;
1054
1055         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1056         if (padzero(elf_bss)) {
1057                 error = -EFAULT;
1058                 goto out_free_ph;
1059         }
1060
1061         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1062                             ELF_MIN_ALIGN - 1);
1063         bss = eppnt->p_memsz + eppnt->p_vaddr;
1064         if (bss > len)
1065                 vm_brk(len, bss - len);
1066         error = 0;
1067
1068 out_free_ph:
1069         kfree(elf_phdata);
1070 out:
1071         return error;
1072 }
1073
1074 #ifdef CONFIG_ELF_CORE
1075 /*
1076  * ELF core dumper
1077  *
1078  * Modelled on fs/exec.c:aout_core_dump()
1079  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1080  */
1081
1082 /*
1083  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1084  * that are useful for post-mortem analysis are included in every core dump.
1085  * In that way we ensure that the core dump is fully interpretable later
1086  * without matching up the same kernel and hardware config to see what PC values
1087  * meant. These special mappings include - vDSO, vsyscall, and other
1088  * architecture specific mappings
1089  */
1090 static bool always_dump_vma(struct vm_area_struct *vma)
1091 {
1092         /* Any vsyscall mappings? */
1093         if (vma == get_gate_vma(vma->vm_mm))
1094                 return true;
1095         /*
1096          * arch_vma_name() returns non-NULL for special architecture mappings,
1097          * such as vDSO sections.
1098          */
1099         if (arch_vma_name(vma))
1100                 return true;
1101
1102         return false;
1103 }
1104
1105 /*
1106  * Decide what to dump of a segment, part, all or none.
1107  */
1108 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1109                                    unsigned long mm_flags)
1110 {
1111 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1112
1113         /* always dump the vdso and vsyscall sections */
1114         if (always_dump_vma(vma))
1115                 goto whole;
1116
1117         if (vma->vm_flags & VM_NODUMP)
1118                 return 0;
1119
1120         /* Hugetlb memory check */
1121         if (vma->vm_flags & VM_HUGETLB) {
1122                 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1123                         goto whole;
1124                 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1125                         goto whole;
1126         }
1127
1128         /* Do not dump I/O mapped devices or special mappings */
1129         if (vma->vm_flags & (VM_IO | VM_RESERVED))
1130                 return 0;
1131
1132         /* By default, dump shared memory if mapped from an anonymous file. */
1133         if (vma->vm_flags & VM_SHARED) {
1134                 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1135                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1136                         goto whole;
1137                 return 0;
1138         }
1139
1140         /* Dump segments that have been written to.  */
1141         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1142                 goto whole;
1143         if (vma->vm_file == NULL)
1144                 return 0;
1145
1146         if (FILTER(MAPPED_PRIVATE))
1147                 goto whole;
1148
1149         /*
1150          * If this looks like the beginning of a DSO or executable mapping,
1151          * check for an ELF header.  If we find one, dump the first page to
1152          * aid in determining what was mapped here.
1153          */
1154         if (FILTER(ELF_HEADERS) &&
1155             vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1156                 u32 __user *header = (u32 __user *) vma->vm_start;
1157                 u32 word;
1158                 mm_segment_t fs = get_fs();
1159                 /*
1160                  * Doing it this way gets the constant folded by GCC.
1161                  */
1162                 union {
1163                         u32 cmp;
1164                         char elfmag[SELFMAG];
1165                 } magic;
1166                 BUILD_BUG_ON(SELFMAG != sizeof word);
1167                 magic.elfmag[EI_MAG0] = ELFMAG0;
1168                 magic.elfmag[EI_MAG1] = ELFMAG1;
1169                 magic.elfmag[EI_MAG2] = ELFMAG2;
1170                 magic.elfmag[EI_MAG3] = ELFMAG3;
1171                 /*
1172                  * Switch to the user "segment" for get_user(),
1173                  * then put back what elf_core_dump() had in place.
1174                  */
1175                 set_fs(USER_DS);
1176                 if (unlikely(get_user(word, header)))
1177                         word = 0;
1178                 set_fs(fs);
1179                 if (word == magic.cmp)
1180                         return PAGE_SIZE;
1181         }
1182
1183 #undef  FILTER
1184
1185         return 0;
1186
1187 whole:
1188         return vma->vm_end - vma->vm_start;
1189 }
1190
1191 /* An ELF note in memory */
1192 struct memelfnote
1193 {
1194         const char *name;
1195         int type;
1196         unsigned int datasz;
1197         void *data;
1198 };
1199
1200 static int notesize(struct memelfnote *en)
1201 {
1202         int sz;
1203
1204         sz = sizeof(struct elf_note);
1205         sz += roundup(strlen(en->name) + 1, 4);
1206         sz += roundup(en->datasz, 4);
1207
1208         return sz;
1209 }
1210
1211 #define DUMP_WRITE(addr, nr, foffset)   \
1212         do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1213
1214 static int alignfile(struct file *file, loff_t *foffset)
1215 {
1216         static const char buf[4] = { 0, };
1217         DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1218         return 1;
1219 }
1220
1221 static int writenote(struct memelfnote *men, struct file *file,
1222                         loff_t *foffset)
1223 {
1224         struct elf_note en;
1225         en.n_namesz = strlen(men->name) + 1;
1226         en.n_descsz = men->datasz;
1227         en.n_type = men->type;
1228
1229         DUMP_WRITE(&en, sizeof(en), foffset);
1230         DUMP_WRITE(men->name, en.n_namesz, foffset);
1231         if (!alignfile(file, foffset))
1232                 return 0;
1233         DUMP_WRITE(men->data, men->datasz, foffset);
1234         if (!alignfile(file, foffset))
1235                 return 0;
1236
1237         return 1;
1238 }
1239 #undef DUMP_WRITE
1240
1241 static void fill_elf_header(struct elfhdr *elf, int segs,
1242                             u16 machine, u32 flags, u8 osabi)
1243 {
1244         memset(elf, 0, sizeof(*elf));
1245
1246         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1247         elf->e_ident[EI_CLASS] = ELF_CLASS;
1248         elf->e_ident[EI_DATA] = ELF_DATA;
1249         elf->e_ident[EI_VERSION] = EV_CURRENT;
1250         elf->e_ident[EI_OSABI] = ELF_OSABI;
1251
1252         elf->e_type = ET_CORE;
1253         elf->e_machine = machine;
1254         elf->e_version = EV_CURRENT;
1255         elf->e_phoff = sizeof(struct elfhdr);
1256         elf->e_flags = flags;
1257         elf->e_ehsize = sizeof(struct elfhdr);
1258         elf->e_phentsize = sizeof(struct elf_phdr);
1259         elf->e_phnum = segs;
1260
1261         return;
1262 }
1263
1264 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1265 {
1266         phdr->p_type = PT_NOTE;
1267         phdr->p_offset = offset;
1268         phdr->p_vaddr = 0;
1269         phdr->p_paddr = 0;
1270         phdr->p_filesz = sz;
1271         phdr->p_memsz = 0;
1272         phdr->p_flags = 0;
1273         phdr->p_align = 0;
1274         return;
1275 }
1276
1277 static void fill_note(struct memelfnote *note, const char *name, int type, 
1278                 unsigned int sz, void *data)
1279 {
1280         note->name = name;
1281         note->type = type;
1282         note->datasz = sz;
1283         note->data = data;
1284         return;
1285 }
1286
1287 /*
1288  * fill up all the fields in prstatus from the given task struct, except
1289  * registers which need to be filled up separately.
1290  */
1291 static void fill_prstatus(struct elf_prstatus *prstatus,
1292                 struct task_struct *p, long signr)
1293 {
1294         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1295         prstatus->pr_sigpend = p->pending.signal.sig[0];
1296         prstatus->pr_sighold = p->blocked.sig[0];
1297         rcu_read_lock();
1298         prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1299         rcu_read_unlock();
1300         prstatus->pr_pid = task_pid_vnr(p);
1301         prstatus->pr_pgrp = task_pgrp_vnr(p);
1302         prstatus->pr_sid = task_session_vnr(p);
1303         if (thread_group_leader(p)) {
1304                 struct task_cputime cputime;
1305
1306                 /*
1307                  * This is the record for the group leader.  It shows the
1308                  * group-wide total, not its individual thread total.
1309                  */
1310                 thread_group_cputime(p, &cputime);
1311                 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1312                 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1313         } else {
1314                 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1315                 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1316         }
1317         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1318         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1319 }
1320
1321 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1322                        struct mm_struct *mm)
1323 {
1324         const struct cred *cred;
1325         unsigned int i, len;
1326         
1327         /* first copy the parameters from user space */
1328         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1329
1330         len = mm->arg_end - mm->arg_start;
1331         if (len >= ELF_PRARGSZ)
1332                 len = ELF_PRARGSZ-1;
1333         if (copy_from_user(&psinfo->pr_psargs,
1334                            (const char __user *)mm->arg_start, len))
1335                 return -EFAULT;
1336         for(i = 0; i < len; i++)
1337                 if (psinfo->pr_psargs[i] == 0)
1338                         psinfo->pr_psargs[i] = ' ';
1339         psinfo->pr_psargs[len] = 0;
1340
1341         rcu_read_lock();
1342         psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1343         rcu_read_unlock();
1344         psinfo->pr_pid = task_pid_vnr(p);
1345         psinfo->pr_pgrp = task_pgrp_vnr(p);
1346         psinfo->pr_sid = task_session_vnr(p);
1347
1348         i = p->state ? ffz(~p->state) + 1 : 0;
1349         psinfo->pr_state = i;
1350         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1351         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1352         psinfo->pr_nice = task_nice(p);
1353         psinfo->pr_flag = p->flags;
1354         rcu_read_lock();
1355         cred = __task_cred(p);
1356         SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1357         SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1358         rcu_read_unlock();
1359         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1360         
1361         return 0;
1362 }
1363
1364 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1365 {
1366         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1367         int i = 0;
1368         do
1369                 i += 2;
1370         while (auxv[i - 2] != AT_NULL);
1371         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1372 }
1373
1374 #ifdef CORE_DUMP_USE_REGSET
1375 #include <linux/regset.h>
1376
1377 struct elf_thread_core_info {
1378         struct elf_thread_core_info *next;
1379         struct task_struct *task;
1380         struct elf_prstatus prstatus;
1381         struct memelfnote notes[0];
1382 };
1383
1384 struct elf_note_info {
1385         struct elf_thread_core_info *thread;
1386         struct memelfnote psinfo;
1387         struct memelfnote auxv;
1388         size_t size;
1389         int thread_notes;
1390 };
1391
1392 /*
1393  * When a regset has a writeback hook, we call it on each thread before
1394  * dumping user memory.  On register window machines, this makes sure the
1395  * user memory backing the register data is up to date before we read it.
1396  */
1397 static void do_thread_regset_writeback(struct task_struct *task,
1398                                        const struct user_regset *regset)
1399 {
1400         if (regset->writeback)
1401                 regset->writeback(task, regset, 1);
1402 }
1403
1404 #ifndef PR_REG_SIZE
1405 #define PR_REG_SIZE(S) sizeof(S)
1406 #endif
1407
1408 #ifndef PRSTATUS_SIZE
1409 #define PRSTATUS_SIZE(S) sizeof(S)
1410 #endif
1411
1412 #ifndef PR_REG_PTR
1413 #define PR_REG_PTR(S) (&((S)->pr_reg))
1414 #endif
1415
1416 #ifndef SET_PR_FPVALID
1417 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1418 #endif
1419
1420 static int fill_thread_core_info(struct elf_thread_core_info *t,
1421                                  const struct user_regset_view *view,
1422                                  long signr, size_t *total)
1423 {
1424         unsigned int i;
1425
1426         /*
1427          * NT_PRSTATUS is the one special case, because the regset data
1428          * goes into the pr_reg field inside the note contents, rather
1429          * than being the whole note contents.  We fill the reset in here.
1430          * We assume that regset 0 is NT_PRSTATUS.
1431          */
1432         fill_prstatus(&t->prstatus, t->task, signr);
1433         (void) view->regsets[0].get(t->task, &view->regsets[0],
1434                                     0, PR_REG_SIZE(t->prstatus.pr_reg),
1435                                     PR_REG_PTR(&t->prstatus), NULL);
1436
1437         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1438                   PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1439         *total += notesize(&t->notes[0]);
1440
1441         do_thread_regset_writeback(t->task, &view->regsets[0]);
1442
1443         /*
1444          * Each other regset might generate a note too.  For each regset
1445          * that has no core_note_type or is inactive, we leave t->notes[i]
1446          * all zero and we'll know to skip writing it later.
1447          */
1448         for (i = 1; i < view->n; ++i) {
1449                 const struct user_regset *regset = &view->regsets[i];
1450                 do_thread_regset_writeback(t->task, regset);
1451                 if (regset->core_note_type && regset->get &&
1452                     (!regset->active || regset->active(t->task, regset))) {
1453                         int ret;
1454                         size_t size = regset->n * regset->size;
1455                         void *data = kmalloc(size, GFP_KERNEL);
1456                         if (unlikely(!data))
1457                                 return 0;
1458                         ret = regset->get(t->task, regset,
1459                                           0, size, data, NULL);
1460                         if (unlikely(ret))
1461                                 kfree(data);
1462                         else {
1463                                 if (regset->core_note_type != NT_PRFPREG)
1464                                         fill_note(&t->notes[i], "LINUX",
1465                                                   regset->core_note_type,
1466                                                   size, data);
1467                                 else {
1468                                         SET_PR_FPVALID(&t->prstatus, 1);
1469                                         fill_note(&t->notes[i], "CORE",
1470                                                   NT_PRFPREG, size, data);
1471                                 }
1472                                 *total += notesize(&t->notes[i]);
1473                         }
1474                 }
1475         }
1476
1477         return 1;
1478 }
1479
1480 static int fill_note_info(struct elfhdr *elf, int phdrs,
1481                           struct elf_note_info *info,
1482                           long signr, struct pt_regs *regs)
1483 {
1484         struct task_struct *dump_task = current;
1485         const struct user_regset_view *view = task_user_regset_view(dump_task);
1486         struct elf_thread_core_info *t;
1487         struct elf_prpsinfo *psinfo;
1488         struct core_thread *ct;
1489         unsigned int i;
1490
1491         info->size = 0;
1492         info->thread = NULL;
1493
1494         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1495         if (psinfo == NULL)
1496                 return 0;
1497
1498         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1499
1500         /*
1501          * Figure out how many notes we're going to need for each thread.
1502          */
1503         info->thread_notes = 0;
1504         for (i = 0; i < view->n; ++i)
1505                 if (view->regsets[i].core_note_type != 0)
1506                         ++info->thread_notes;
1507
1508         /*
1509          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1510          * since it is our one special case.
1511          */
1512         if (unlikely(info->thread_notes == 0) ||
1513             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1514                 WARN_ON(1);
1515                 return 0;
1516         }
1517
1518         /*
1519          * Initialize the ELF file header.
1520          */
1521         fill_elf_header(elf, phdrs,
1522                         view->e_machine, view->e_flags, view->ei_osabi);
1523
1524         /*
1525          * Allocate a structure for each thread.
1526          */
1527         for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1528                 t = kzalloc(offsetof(struct elf_thread_core_info,
1529                                      notes[info->thread_notes]),
1530                             GFP_KERNEL);
1531                 if (unlikely(!t))
1532                         return 0;
1533
1534                 t->task = ct->task;
1535                 if (ct->task == dump_task || !info->thread) {
1536                         t->next = info->thread;
1537                         info->thread = t;
1538                 } else {
1539                         /*
1540                          * Make sure to keep the original task at
1541                          * the head of the list.
1542                          */
1543                         t->next = info->thread->next;
1544                         info->thread->next = t;
1545                 }
1546         }
1547
1548         /*
1549          * Now fill in each thread's information.
1550          */
1551         for (t = info->thread; t != NULL; t = t->next)
1552                 if (!fill_thread_core_info(t, view, signr, &info->size))
1553                         return 0;
1554
1555         /*
1556          * Fill in the two process-wide notes.
1557          */
1558         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1559         info->size += notesize(&info->psinfo);
1560
1561         fill_auxv_note(&info->auxv, current->mm);
1562         info->size += notesize(&info->auxv);
1563
1564         return 1;
1565 }
1566
1567 static size_t get_note_info_size(struct elf_note_info *info)
1568 {
1569         return info->size;
1570 }
1571
1572 /*
1573  * Write all the notes for each thread.  When writing the first thread, the
1574  * process-wide notes are interleaved after the first thread-specific note.
1575  */
1576 static int write_note_info(struct elf_note_info *info,
1577                            struct file *file, loff_t *foffset)
1578 {
1579         bool first = 1;
1580         struct elf_thread_core_info *t = info->thread;
1581
1582         do {
1583                 int i;
1584
1585                 if (!writenote(&t->notes[0], file, foffset))
1586                         return 0;
1587
1588                 if (first && !writenote(&info->psinfo, file, foffset))
1589                         return 0;
1590                 if (first && !writenote(&info->auxv, file, foffset))
1591                         return 0;
1592
1593                 for (i = 1; i < info->thread_notes; ++i)
1594                         if (t->notes[i].data &&
1595                             !writenote(&t->notes[i], file, foffset))
1596                                 return 0;
1597
1598                 first = 0;
1599                 t = t->next;
1600         } while (t);
1601
1602         return 1;
1603 }
1604
1605 static void free_note_info(struct elf_note_info *info)
1606 {
1607         struct elf_thread_core_info *threads = info->thread;
1608         while (threads) {
1609                 unsigned int i;
1610                 struct elf_thread_core_info *t = threads;
1611                 threads = t->next;
1612                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1613                 for (i = 1; i < info->thread_notes; ++i)
1614                         kfree(t->notes[i].data);
1615                 kfree(t);
1616         }
1617         kfree(info->psinfo.data);
1618 }
1619
1620 #else
1621
1622 /* Here is the structure in which status of each thread is captured. */
1623 struct elf_thread_status
1624 {
1625         struct list_head list;
1626         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1627         elf_fpregset_t fpu;             /* NT_PRFPREG */
1628         struct task_struct *thread;
1629 #ifdef ELF_CORE_COPY_XFPREGS
1630         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1631 #endif
1632         struct memelfnote notes[3];
1633         int num_notes;
1634 };
1635
1636 /*
1637  * In order to add the specific thread information for the elf file format,
1638  * we need to keep a linked list of every threads pr_status and then create
1639  * a single section for them in the final core file.
1640  */
1641 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1642 {
1643         int sz = 0;
1644         struct task_struct *p = t->thread;
1645         t->num_notes = 0;
1646
1647         fill_prstatus(&t->prstatus, p, signr);
1648         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1649         
1650         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1651                   &(t->prstatus));
1652         t->num_notes++;
1653         sz += notesize(&t->notes[0]);
1654
1655         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1656                                                                 &t->fpu))) {
1657                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1658                           &(t->fpu));
1659                 t->num_notes++;
1660                 sz += notesize(&t->notes[1]);
1661         }
1662
1663 #ifdef ELF_CORE_COPY_XFPREGS
1664         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1665                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1666                           sizeof(t->xfpu), &t->xfpu);
1667                 t->num_notes++;
1668                 sz += notesize(&t->notes[2]);
1669         }
1670 #endif  
1671         return sz;
1672 }
1673
1674 struct elf_note_info {
1675         struct memelfnote *notes;
1676         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1677         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1678         struct list_head thread_list;
1679         elf_fpregset_t *fpu;
1680 #ifdef ELF_CORE_COPY_XFPREGS
1681         elf_fpxregset_t *xfpu;
1682 #endif
1683         int thread_status_size;
1684         int numnote;
1685 };
1686
1687 static int elf_note_info_init(struct elf_note_info *info)
1688 {
1689         memset(info, 0, sizeof(*info));
1690         INIT_LIST_HEAD(&info->thread_list);
1691
1692         /* Allocate space for six ELF notes */
1693         info->notes = kmalloc(6 * sizeof(struct memelfnote), GFP_KERNEL);
1694         if (!info->notes)
1695                 return 0;
1696         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1697         if (!info->psinfo)
1698                 goto notes_free;
1699         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1700         if (!info->prstatus)
1701                 goto psinfo_free;
1702         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1703         if (!info->fpu)
1704                 goto prstatus_free;
1705 #ifdef ELF_CORE_COPY_XFPREGS
1706         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1707         if (!info->xfpu)
1708                 goto fpu_free;
1709 #endif
1710         return 1;
1711 #ifdef ELF_CORE_COPY_XFPREGS
1712  fpu_free:
1713         kfree(info->fpu);
1714 #endif
1715  prstatus_free:
1716         kfree(info->prstatus);
1717  psinfo_free:
1718         kfree(info->psinfo);
1719  notes_free:
1720         kfree(info->notes);
1721         return 0;
1722 }
1723
1724 static int fill_note_info(struct elfhdr *elf, int phdrs,
1725                           struct elf_note_info *info,
1726                           long signr, struct pt_regs *regs)
1727 {
1728         struct list_head *t;
1729
1730         if (!elf_note_info_init(info))
1731                 return 0;
1732
1733         if (signr) {
1734                 struct core_thread *ct;
1735                 struct elf_thread_status *ets;
1736
1737                 for (ct = current->mm->core_state->dumper.next;
1738                                                 ct; ct = ct->next) {
1739                         ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1740                         if (!ets)
1741                                 return 0;
1742
1743                         ets->thread = ct->task;
1744                         list_add(&ets->list, &info->thread_list);
1745                 }
1746
1747                 list_for_each(t, &info->thread_list) {
1748                         int sz;
1749
1750                         ets = list_entry(t, struct elf_thread_status, list);
1751                         sz = elf_dump_thread_status(signr, ets);
1752                         info->thread_status_size += sz;
1753                 }
1754         }
1755         /* now collect the dump for the current */
1756         memset(info->prstatus, 0, sizeof(*info->prstatus));
1757         fill_prstatus(info->prstatus, current, signr);
1758         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1759
1760         /* Set up header */
1761         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1762
1763         /*
1764          * Set up the notes in similar form to SVR4 core dumps made
1765          * with info from their /proc.
1766          */
1767
1768         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1769                   sizeof(*info->prstatus), info->prstatus);
1770         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1771         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1772                   sizeof(*info->psinfo), info->psinfo);
1773
1774         info->numnote = 2;
1775
1776         fill_auxv_note(&info->notes[info->numnote++], current->mm);
1777
1778         /* Try to dump the FPU. */
1779         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1780                                                                info->fpu);
1781         if (info->prstatus->pr_fpvalid)
1782                 fill_note(info->notes + info->numnote++,
1783                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1784 #ifdef ELF_CORE_COPY_XFPREGS
1785         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1786                 fill_note(info->notes + info->numnote++,
1787                           "LINUX", ELF_CORE_XFPREG_TYPE,
1788                           sizeof(*info->xfpu), info->xfpu);
1789 #endif
1790
1791         return 1;
1792 }
1793
1794 static size_t get_note_info_size(struct elf_note_info *info)
1795 {
1796         int sz = 0;
1797         int i;
1798
1799         for (i = 0; i < info->numnote; i++)
1800                 sz += notesize(info->notes + i);
1801
1802         sz += info->thread_status_size;
1803
1804         return sz;
1805 }
1806
1807 static int write_note_info(struct elf_note_info *info,
1808                            struct file *file, loff_t *foffset)
1809 {
1810         int i;
1811         struct list_head *t;
1812
1813         for (i = 0; i < info->numnote; i++)
1814                 if (!writenote(info->notes + i, file, foffset))
1815                         return 0;
1816
1817         /* write out the thread status notes section */
1818         list_for_each(t, &info->thread_list) {
1819                 struct elf_thread_status *tmp =
1820                                 list_entry(t, struct elf_thread_status, list);
1821
1822                 for (i = 0; i < tmp->num_notes; i++)
1823                         if (!writenote(&tmp->notes[i], file, foffset))
1824                                 return 0;
1825         }
1826
1827         return 1;
1828 }
1829
1830 static void free_note_info(struct elf_note_info *info)
1831 {
1832         while (!list_empty(&info->thread_list)) {
1833                 struct list_head *tmp = info->thread_list.next;
1834                 list_del(tmp);
1835                 kfree(list_entry(tmp, struct elf_thread_status, list));
1836         }
1837
1838         kfree(info->prstatus);
1839         kfree(info->psinfo);
1840         kfree(info->notes);
1841         kfree(info->fpu);
1842 #ifdef ELF_CORE_COPY_XFPREGS
1843         kfree(info->xfpu);
1844 #endif
1845 }
1846
1847 #endif
1848
1849 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1850                                         struct vm_area_struct *gate_vma)
1851 {
1852         struct vm_area_struct *ret = tsk->mm->mmap;
1853
1854         if (ret)
1855                 return ret;
1856         return gate_vma;
1857 }
1858 /*
1859  * Helper function for iterating across a vma list.  It ensures that the caller
1860  * will visit `gate_vma' prior to terminating the search.
1861  */
1862 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1863                                         struct vm_area_struct *gate_vma)
1864 {
1865         struct vm_area_struct *ret;
1866
1867         ret = this_vma->vm_next;
1868         if (ret)
1869                 return ret;
1870         if (this_vma == gate_vma)
1871                 return NULL;
1872         return gate_vma;
1873 }
1874
1875 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
1876                              elf_addr_t e_shoff, int segs)
1877 {
1878         elf->e_shoff = e_shoff;
1879         elf->e_shentsize = sizeof(*shdr4extnum);
1880         elf->e_shnum = 1;
1881         elf->e_shstrndx = SHN_UNDEF;
1882
1883         memset(shdr4extnum, 0, sizeof(*shdr4extnum));
1884
1885         shdr4extnum->sh_type = SHT_NULL;
1886         shdr4extnum->sh_size = elf->e_shnum;
1887         shdr4extnum->sh_link = elf->e_shstrndx;
1888         shdr4extnum->sh_info = segs;
1889 }
1890
1891 static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
1892                                      unsigned long mm_flags)
1893 {
1894         struct vm_area_struct *vma;
1895         size_t size = 0;
1896
1897         for (vma = first_vma(current, gate_vma); vma != NULL;
1898              vma = next_vma(vma, gate_vma))
1899                 size += vma_dump_size(vma, mm_flags);
1900         return size;
1901 }
1902
1903 /*
1904  * Actual dumper
1905  *
1906  * This is a two-pass process; first we find the offsets of the bits,
1907  * and then they are actually written out.  If we run out of core limit
1908  * we just truncate.
1909  */
1910 static int elf_core_dump(struct coredump_params *cprm)
1911 {
1912         int has_dumped = 0;
1913         mm_segment_t fs;
1914         int segs;
1915         size_t size = 0;
1916         struct vm_area_struct *vma, *gate_vma;
1917         struct elfhdr *elf = NULL;
1918         loff_t offset = 0, dataoff, foffset;
1919         struct elf_note_info info;
1920         struct elf_phdr *phdr4note = NULL;
1921         struct elf_shdr *shdr4extnum = NULL;
1922         Elf_Half e_phnum;
1923         elf_addr_t e_shoff;
1924
1925         /*
1926          * We no longer stop all VM operations.
1927          * 
1928          * This is because those proceses that could possibly change map_count
1929          * or the mmap / vma pages are now blocked in do_exit on current
1930          * finishing this core dump.
1931          *
1932          * Only ptrace can touch these memory addresses, but it doesn't change
1933          * the map_count or the pages allocated. So no possibility of crashing
1934          * exists while dumping the mm->vm_next areas to the core file.
1935          */
1936   
1937         /* alloc memory for large data structures: too large to be on stack */
1938         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1939         if (!elf)
1940                 goto out;
1941         /*
1942          * The number of segs are recored into ELF header as 16bit value.
1943          * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
1944          */
1945         segs = current->mm->map_count;
1946         segs += elf_core_extra_phdrs();
1947
1948         gate_vma = get_gate_vma(current->mm);
1949         if (gate_vma != NULL)
1950                 segs++;
1951
1952         /* for notes section */
1953         segs++;
1954
1955         /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
1956          * this, kernel supports extended numbering. Have a look at
1957          * include/linux/elf.h for further information. */
1958         e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
1959
1960         /*
1961          * Collect all the non-memory information about the process for the
1962          * notes.  This also sets up the file header.
1963          */
1964         if (!fill_note_info(elf, e_phnum, &info, cprm->signr, cprm->regs))
1965                 goto cleanup;
1966
1967         has_dumped = 1;
1968         current->flags |= PF_DUMPCORE;
1969   
1970         fs = get_fs();
1971         set_fs(KERNEL_DS);
1972
1973         offset += sizeof(*elf);                         /* Elf header */
1974         offset += segs * sizeof(struct elf_phdr);       /* Program headers */
1975         foffset = offset;
1976
1977         /* Write notes phdr entry */
1978         {
1979                 size_t sz = get_note_info_size(&info);
1980
1981                 sz += elf_coredump_extra_notes_size();
1982
1983                 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
1984                 if (!phdr4note)
1985                         goto end_coredump;
1986
1987                 fill_elf_note_phdr(phdr4note, sz, offset);
1988                 offset += sz;
1989         }
1990
1991         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1992
1993         offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
1994         offset += elf_core_extra_data_size();
1995         e_shoff = offset;
1996
1997         if (e_phnum == PN_XNUM) {
1998                 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
1999                 if (!shdr4extnum)
2000                         goto end_coredump;
2001                 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2002         }
2003
2004         offset = dataoff;
2005
2006         size += sizeof(*elf);
2007         if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
2008                 goto end_coredump;
2009
2010         size += sizeof(*phdr4note);
2011         if (size > cprm->limit
2012             || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
2013                 goto end_coredump;
2014
2015         /* Write program headers for segments dump */
2016         for (vma = first_vma(current, gate_vma); vma != NULL;
2017                         vma = next_vma(vma, gate_vma)) {
2018                 struct elf_phdr phdr;
2019
2020                 phdr.p_type = PT_LOAD;
2021                 phdr.p_offset = offset;
2022                 phdr.p_vaddr = vma->vm_start;
2023                 phdr.p_paddr = 0;
2024                 phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2025                 phdr.p_memsz = vma->vm_end - vma->vm_start;
2026                 offset += phdr.p_filesz;
2027                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2028                 if (vma->vm_flags & VM_WRITE)
2029                         phdr.p_flags |= PF_W;
2030                 if (vma->vm_flags & VM_EXEC)
2031                         phdr.p_flags |= PF_X;
2032                 phdr.p_align = ELF_EXEC_PAGESIZE;
2033
2034                 size += sizeof(phdr);
2035                 if (size > cprm->limit
2036                     || !dump_write(cprm->file, &phdr, sizeof(phdr)))
2037                         goto end_coredump;
2038         }
2039
2040         if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
2041                 goto end_coredump;
2042
2043         /* write out the notes section */
2044         if (!write_note_info(&info, cprm->file, &foffset))
2045                 goto end_coredump;
2046
2047         if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2048                 goto end_coredump;
2049
2050         /* Align to page */
2051         if (!dump_seek(cprm->file, dataoff - foffset))
2052                 goto end_coredump;
2053
2054         for (vma = first_vma(current, gate_vma); vma != NULL;
2055                         vma = next_vma(vma, gate_vma)) {
2056                 unsigned long addr;
2057                 unsigned long end;
2058
2059                 end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2060
2061                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2062                         struct page *page;
2063                         int stop;
2064
2065                         page = get_dump_page(addr);
2066                         if (page) {
2067                                 void *kaddr = kmap(page);
2068                                 stop = ((size += PAGE_SIZE) > cprm->limit) ||
2069                                         !dump_write(cprm->file, kaddr,
2070                                                     PAGE_SIZE);
2071                                 kunmap(page);
2072                                 page_cache_release(page);
2073                         } else
2074                                 stop = !dump_seek(cprm->file, PAGE_SIZE);
2075                         if (stop)
2076                                 goto end_coredump;
2077                 }
2078         }
2079
2080         if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
2081                 goto end_coredump;
2082
2083         if (e_phnum == PN_XNUM) {
2084                 size += sizeof(*shdr4extnum);
2085                 if (size > cprm->limit
2086                     || !dump_write(cprm->file, shdr4extnum,
2087                                    sizeof(*shdr4extnum)))
2088                         goto end_coredump;
2089         }
2090
2091 end_coredump:
2092         set_fs(fs);
2093
2094 cleanup:
2095         free_note_info(&info);
2096         kfree(shdr4extnum);
2097         kfree(phdr4note);
2098         kfree(elf);
2099 out:
2100         return has_dumped;
2101 }
2102
2103 #endif          /* CONFIG_ELF_CORE */
2104
2105 static int __init init_elf_binfmt(void)
2106 {
2107         register_binfmt(&elf_format);
2108         return 0;
2109 }
2110
2111 static void __exit exit_elf_binfmt(void)
2112 {
2113         /* Remove the COFF and ELF loaders. */
2114         unregister_binfmt(&elf_format);
2115 }
2116
2117 core_initcall(init_elf_binfmt);
2118 module_exit(exit_elf_binfmt);
2119 MODULE_LICENSE("GPL");