]> Pileus Git - ~andy/linux/blob - fs/binfmt_elf.c
core, nfqueue, openvswitch: Orphan frags in skb_zerocopy and handle errors
[~andy/linux] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/utsname.h>
35 #include <linux/coredump.h>
36 #include <linux/sched.h>
37 #include <asm/uaccess.h>
38 #include <asm/param.h>
39 #include <asm/page.h>
40
41 #ifndef user_long_t
42 #define user_long_t long
43 #endif
44 #ifndef user_siginfo_t
45 #define user_siginfo_t siginfo_t
46 #endif
47
48 static int load_elf_binary(struct linux_binprm *bprm);
49 static int load_elf_library(struct file *);
50 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
51                                 int, int, unsigned long);
52
53 /*
54  * If we don't support core dumping, then supply a NULL so we
55  * don't even try.
56  */
57 #ifdef CONFIG_ELF_CORE
58 static int elf_core_dump(struct coredump_params *cprm);
59 #else
60 #define elf_core_dump   NULL
61 #endif
62
63 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
64 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
65 #else
66 #define ELF_MIN_ALIGN   PAGE_SIZE
67 #endif
68
69 #ifndef ELF_CORE_EFLAGS
70 #define ELF_CORE_EFLAGS 0
71 #endif
72
73 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
74 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
75 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
76
77 static struct linux_binfmt elf_format = {
78         .module         = THIS_MODULE,
79         .load_binary    = load_elf_binary,
80         .load_shlib     = load_elf_library,
81         .core_dump      = elf_core_dump,
82         .min_coredump   = ELF_EXEC_PAGESIZE,
83 };
84
85 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
86
87 static int set_brk(unsigned long start, unsigned long end)
88 {
89         start = ELF_PAGEALIGN(start);
90         end = ELF_PAGEALIGN(end);
91         if (end > start) {
92                 unsigned long addr;
93                 addr = vm_brk(start, end - start);
94                 if (BAD_ADDR(addr))
95                         return addr;
96         }
97         current->mm->start_brk = current->mm->brk = end;
98         return 0;
99 }
100
101 /* We need to explicitly zero any fractional pages
102    after the data section (i.e. bss).  This would
103    contain the junk from the file that should not
104    be in memory
105  */
106 static int padzero(unsigned long elf_bss)
107 {
108         unsigned long nbyte;
109
110         nbyte = ELF_PAGEOFFSET(elf_bss);
111         if (nbyte) {
112                 nbyte = ELF_MIN_ALIGN - nbyte;
113                 if (clear_user((void __user *) elf_bss, nbyte))
114                         return -EFAULT;
115         }
116         return 0;
117 }
118
119 /* Let's use some macros to make this stack manipulation a little clearer */
120 #ifdef CONFIG_STACK_GROWSUP
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122 #define STACK_ROUND(sp, items) \
123         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ \
125         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
126         old_sp; })
127 #else
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129 #define STACK_ROUND(sp, items) \
130         (((unsigned long) (sp - items)) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132 #endif
133
134 #ifndef ELF_BASE_PLATFORM
135 /*
136  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
137  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
138  * will be copied to the user stack in the same manner as AT_PLATFORM.
139  */
140 #define ELF_BASE_PLATFORM NULL
141 #endif
142
143 static int
144 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
145                 unsigned long load_addr, unsigned long interp_load_addr)
146 {
147         unsigned long p = bprm->p;
148         int argc = bprm->argc;
149         int envc = bprm->envc;
150         elf_addr_t __user *argv;
151         elf_addr_t __user *envp;
152         elf_addr_t __user *sp;
153         elf_addr_t __user *u_platform;
154         elf_addr_t __user *u_base_platform;
155         elf_addr_t __user *u_rand_bytes;
156         const char *k_platform = ELF_PLATFORM;
157         const char *k_base_platform = ELF_BASE_PLATFORM;
158         unsigned char k_rand_bytes[16];
159         int items;
160         elf_addr_t *elf_info;
161         int ei_index = 0;
162         const struct cred *cred = current_cred();
163         struct vm_area_struct *vma;
164
165         /*
166          * In some cases (e.g. Hyper-Threading), we want to avoid L1
167          * evictions by the processes running on the same package. One
168          * thing we can do is to shuffle the initial stack for them.
169          */
170
171         p = arch_align_stack(p);
172
173         /*
174          * If this architecture has a platform capability string, copy it
175          * to userspace.  In some cases (Sparc), this info is impossible
176          * for userspace to get any other way, in others (i386) it is
177          * merely difficult.
178          */
179         u_platform = NULL;
180         if (k_platform) {
181                 size_t len = strlen(k_platform) + 1;
182
183                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
184                 if (__copy_to_user(u_platform, k_platform, len))
185                         return -EFAULT;
186         }
187
188         /*
189          * If this architecture has a "base" platform capability
190          * string, copy it to userspace.
191          */
192         u_base_platform = NULL;
193         if (k_base_platform) {
194                 size_t len = strlen(k_base_platform) + 1;
195
196                 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
197                 if (__copy_to_user(u_base_platform, k_base_platform, len))
198                         return -EFAULT;
199         }
200
201         /*
202          * Generate 16 random bytes for userspace PRNG seeding.
203          */
204         get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
205         u_rand_bytes = (elf_addr_t __user *)
206                        STACK_ALLOC(p, sizeof(k_rand_bytes));
207         if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
208                 return -EFAULT;
209
210         /* Create the ELF interpreter info */
211         elf_info = (elf_addr_t *)current->mm->saved_auxv;
212         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
213 #define NEW_AUX_ENT(id, val) \
214         do { \
215                 elf_info[ei_index++] = id; \
216                 elf_info[ei_index++] = val; \
217         } while (0)
218
219 #ifdef ARCH_DLINFO
220         /* 
221          * ARCH_DLINFO must come first so PPC can do its special alignment of
222          * AUXV.
223          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
224          * ARCH_DLINFO changes
225          */
226         ARCH_DLINFO;
227 #endif
228         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
229         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
230         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
231         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
232         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
233         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
234         NEW_AUX_ENT(AT_BASE, interp_load_addr);
235         NEW_AUX_ENT(AT_FLAGS, 0);
236         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
237         NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
238         NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
239         NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
240         NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
241         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
242         NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
243 #ifdef ELF_HWCAP2
244         NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
245 #endif
246         NEW_AUX_ENT(AT_EXECFN, bprm->exec);
247         if (k_platform) {
248                 NEW_AUX_ENT(AT_PLATFORM,
249                             (elf_addr_t)(unsigned long)u_platform);
250         }
251         if (k_base_platform) {
252                 NEW_AUX_ENT(AT_BASE_PLATFORM,
253                             (elf_addr_t)(unsigned long)u_base_platform);
254         }
255         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
256                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
257         }
258 #undef NEW_AUX_ENT
259         /* AT_NULL is zero; clear the rest too */
260         memset(&elf_info[ei_index], 0,
261                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
262
263         /* And advance past the AT_NULL entry.  */
264         ei_index += 2;
265
266         sp = STACK_ADD(p, ei_index);
267
268         items = (argc + 1) + (envc + 1) + 1;
269         bprm->p = STACK_ROUND(sp, items);
270
271         /* Point sp at the lowest address on the stack */
272 #ifdef CONFIG_STACK_GROWSUP
273         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
274         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
275 #else
276         sp = (elf_addr_t __user *)bprm->p;
277 #endif
278
279
280         /*
281          * Grow the stack manually; some architectures have a limit on how
282          * far ahead a user-space access may be in order to grow the stack.
283          */
284         vma = find_extend_vma(current->mm, bprm->p);
285         if (!vma)
286                 return -EFAULT;
287
288         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
289         if (__put_user(argc, sp++))
290                 return -EFAULT;
291         argv = sp;
292         envp = argv + argc + 1;
293
294         /* Populate argv and envp */
295         p = current->mm->arg_end = current->mm->arg_start;
296         while (argc-- > 0) {
297                 size_t len;
298                 if (__put_user((elf_addr_t)p, argv++))
299                         return -EFAULT;
300                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
301                 if (!len || len > MAX_ARG_STRLEN)
302                         return -EINVAL;
303                 p += len;
304         }
305         if (__put_user(0, argv))
306                 return -EFAULT;
307         current->mm->arg_end = current->mm->env_start = p;
308         while (envc-- > 0) {
309                 size_t len;
310                 if (__put_user((elf_addr_t)p, envp++))
311                         return -EFAULT;
312                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
313                 if (!len || len > MAX_ARG_STRLEN)
314                         return -EINVAL;
315                 p += len;
316         }
317         if (__put_user(0, envp))
318                 return -EFAULT;
319         current->mm->env_end = p;
320
321         /* Put the elf_info on the stack in the right place.  */
322         sp = (elf_addr_t __user *)envp + 1;
323         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
324                 return -EFAULT;
325         return 0;
326 }
327
328 #ifndef elf_map
329
330 static unsigned long elf_map(struct file *filep, unsigned long addr,
331                 struct elf_phdr *eppnt, int prot, int type,
332                 unsigned long total_size)
333 {
334         unsigned long map_addr;
335         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
336         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
337         addr = ELF_PAGESTART(addr);
338         size = ELF_PAGEALIGN(size);
339
340         /* mmap() will return -EINVAL if given a zero size, but a
341          * segment with zero filesize is perfectly valid */
342         if (!size)
343                 return addr;
344
345         /*
346         * total_size is the size of the ELF (interpreter) image.
347         * The _first_ mmap needs to know the full size, otherwise
348         * randomization might put this image into an overlapping
349         * position with the ELF binary image. (since size < total_size)
350         * So we first map the 'big' image - and unmap the remainder at
351         * the end. (which unmap is needed for ELF images with holes.)
352         */
353         if (total_size) {
354                 total_size = ELF_PAGEALIGN(total_size);
355                 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
356                 if (!BAD_ADDR(map_addr))
357                         vm_munmap(map_addr+size, total_size-size);
358         } else
359                 map_addr = vm_mmap(filep, addr, size, prot, type, off);
360
361         return(map_addr);
362 }
363
364 #endif /* !elf_map */
365
366 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
367 {
368         int i, first_idx = -1, last_idx = -1;
369
370         for (i = 0; i < nr; i++) {
371                 if (cmds[i].p_type == PT_LOAD) {
372                         last_idx = i;
373                         if (first_idx == -1)
374                                 first_idx = i;
375                 }
376         }
377         if (first_idx == -1)
378                 return 0;
379
380         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
381                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
382 }
383
384
385 /* This is much more generalized than the library routine read function,
386    so we keep this separate.  Technically the library read function
387    is only provided so that we can read a.out libraries that have
388    an ELF header */
389
390 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
391                 struct file *interpreter, unsigned long *interp_map_addr,
392                 unsigned long no_base)
393 {
394         struct elf_phdr *elf_phdata;
395         struct elf_phdr *eppnt;
396         unsigned long load_addr = 0;
397         int load_addr_set = 0;
398         unsigned long last_bss = 0, elf_bss = 0;
399         unsigned long error = ~0UL;
400         unsigned long total_size;
401         int retval, i, size;
402
403         /* First of all, some simple consistency checks */
404         if (interp_elf_ex->e_type != ET_EXEC &&
405             interp_elf_ex->e_type != ET_DYN)
406                 goto out;
407         if (!elf_check_arch(interp_elf_ex))
408                 goto out;
409         if (!interpreter->f_op->mmap)
410                 goto out;
411
412         /*
413          * If the size of this structure has changed, then punt, since
414          * we will be doing the wrong thing.
415          */
416         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
417                 goto out;
418         if (interp_elf_ex->e_phnum < 1 ||
419                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
420                 goto out;
421
422         /* Now read in all of the header information */
423         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
424         if (size > ELF_MIN_ALIGN)
425                 goto out;
426         elf_phdata = kmalloc(size, GFP_KERNEL);
427         if (!elf_phdata)
428                 goto out;
429
430         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
431                              (char *)elf_phdata, size);
432         error = -EIO;
433         if (retval != size) {
434                 if (retval < 0)
435                         error = retval; 
436                 goto out_close;
437         }
438
439         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
440         if (!total_size) {
441                 error = -EINVAL;
442                 goto out_close;
443         }
444
445         eppnt = elf_phdata;
446         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
447                 if (eppnt->p_type == PT_LOAD) {
448                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
449                         int elf_prot = 0;
450                         unsigned long vaddr = 0;
451                         unsigned long k, map_addr;
452
453                         if (eppnt->p_flags & PF_R)
454                                 elf_prot = PROT_READ;
455                         if (eppnt->p_flags & PF_W)
456                                 elf_prot |= PROT_WRITE;
457                         if (eppnt->p_flags & PF_X)
458                                 elf_prot |= PROT_EXEC;
459                         vaddr = eppnt->p_vaddr;
460                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
461                                 elf_type |= MAP_FIXED;
462                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
463                                 load_addr = -vaddr;
464
465                         map_addr = elf_map(interpreter, load_addr + vaddr,
466                                         eppnt, elf_prot, elf_type, total_size);
467                         total_size = 0;
468                         if (!*interp_map_addr)
469                                 *interp_map_addr = map_addr;
470                         error = map_addr;
471                         if (BAD_ADDR(map_addr))
472                                 goto out_close;
473
474                         if (!load_addr_set &&
475                             interp_elf_ex->e_type == ET_DYN) {
476                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
477                                 load_addr_set = 1;
478                         }
479
480                         /*
481                          * Check to see if the section's size will overflow the
482                          * allowed task size. Note that p_filesz must always be
483                          * <= p_memsize so it's only necessary to check p_memsz.
484                          */
485                         k = load_addr + eppnt->p_vaddr;
486                         if (BAD_ADDR(k) ||
487                             eppnt->p_filesz > eppnt->p_memsz ||
488                             eppnt->p_memsz > TASK_SIZE ||
489                             TASK_SIZE - eppnt->p_memsz < k) {
490                                 error = -ENOMEM;
491                                 goto out_close;
492                         }
493
494                         /*
495                          * Find the end of the file mapping for this phdr, and
496                          * keep track of the largest address we see for this.
497                          */
498                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
499                         if (k > elf_bss)
500                                 elf_bss = k;
501
502                         /*
503                          * Do the same thing for the memory mapping - between
504                          * elf_bss and last_bss is the bss section.
505                          */
506                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
507                         if (k > last_bss)
508                                 last_bss = k;
509                 }
510         }
511
512         if (last_bss > elf_bss) {
513                 /*
514                  * Now fill out the bss section.  First pad the last page up
515                  * to the page boundary, and then perform a mmap to make sure
516                  * that there are zero-mapped pages up to and including the
517                  * last bss page.
518                  */
519                 if (padzero(elf_bss)) {
520                         error = -EFAULT;
521                         goto out_close;
522                 }
523
524                 /* What we have mapped so far */
525                 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
526
527                 /* Map the last of the bss segment */
528                 error = vm_brk(elf_bss, last_bss - elf_bss);
529                 if (BAD_ADDR(error))
530                         goto out_close;
531         }
532
533         error = load_addr;
534
535 out_close:
536         kfree(elf_phdata);
537 out:
538         return error;
539 }
540
541 /*
542  * These are the functions used to load ELF style executables and shared
543  * libraries.  There is no binary dependent code anywhere else.
544  */
545
546 #ifndef STACK_RND_MASK
547 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
548 #endif
549
550 static unsigned long randomize_stack_top(unsigned long stack_top)
551 {
552         unsigned int random_variable = 0;
553
554         if ((current->flags & PF_RANDOMIZE) &&
555                 !(current->personality & ADDR_NO_RANDOMIZE)) {
556                 random_variable = get_random_int() & STACK_RND_MASK;
557                 random_variable <<= PAGE_SHIFT;
558         }
559 #ifdef CONFIG_STACK_GROWSUP
560         return PAGE_ALIGN(stack_top) + random_variable;
561 #else
562         return PAGE_ALIGN(stack_top) - random_variable;
563 #endif
564 }
565
566 static int load_elf_binary(struct linux_binprm *bprm)
567 {
568         struct file *interpreter = NULL; /* to shut gcc up */
569         unsigned long load_addr = 0, load_bias = 0;
570         int load_addr_set = 0;
571         char * elf_interpreter = NULL;
572         unsigned long error;
573         struct elf_phdr *elf_ppnt, *elf_phdata;
574         unsigned long elf_bss, elf_brk;
575         int retval, i;
576         unsigned int size;
577         unsigned long elf_entry;
578         unsigned long interp_load_addr = 0;
579         unsigned long start_code, end_code, start_data, end_data;
580         unsigned long reloc_func_desc __maybe_unused = 0;
581         int executable_stack = EXSTACK_DEFAULT;
582         unsigned long def_flags = 0;
583         struct pt_regs *regs = current_pt_regs();
584         struct {
585                 struct elfhdr elf_ex;
586                 struct elfhdr interp_elf_ex;
587         } *loc;
588
589         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
590         if (!loc) {
591                 retval = -ENOMEM;
592                 goto out_ret;
593         }
594         
595         /* Get the exec-header */
596         loc->elf_ex = *((struct elfhdr *)bprm->buf);
597
598         retval = -ENOEXEC;
599         /* First of all, some simple consistency checks */
600         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
601                 goto out;
602
603         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
604                 goto out;
605         if (!elf_check_arch(&loc->elf_ex))
606                 goto out;
607         if (!bprm->file->f_op->mmap)
608                 goto out;
609
610         /* Now read in all of the header information */
611         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
612                 goto out;
613         if (loc->elf_ex.e_phnum < 1 ||
614                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
615                 goto out;
616         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
617         retval = -ENOMEM;
618         elf_phdata = kmalloc(size, GFP_KERNEL);
619         if (!elf_phdata)
620                 goto out;
621
622         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
623                              (char *)elf_phdata, size);
624         if (retval != size) {
625                 if (retval >= 0)
626                         retval = -EIO;
627                 goto out_free_ph;
628         }
629
630         elf_ppnt = elf_phdata;
631         elf_bss = 0;
632         elf_brk = 0;
633
634         start_code = ~0UL;
635         end_code = 0;
636         start_data = 0;
637         end_data = 0;
638
639         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
640                 if (elf_ppnt->p_type == PT_INTERP) {
641                         /* This is the program interpreter used for
642                          * shared libraries - for now assume that this
643                          * is an a.out format binary
644                          */
645                         retval = -ENOEXEC;
646                         if (elf_ppnt->p_filesz > PATH_MAX || 
647                             elf_ppnt->p_filesz < 2)
648                                 goto out_free_ph;
649
650                         retval = -ENOMEM;
651                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
652                                                   GFP_KERNEL);
653                         if (!elf_interpreter)
654                                 goto out_free_ph;
655
656                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
657                                              elf_interpreter,
658                                              elf_ppnt->p_filesz);
659                         if (retval != elf_ppnt->p_filesz) {
660                                 if (retval >= 0)
661                                         retval = -EIO;
662                                 goto out_free_interp;
663                         }
664                         /* make sure path is NULL terminated */
665                         retval = -ENOEXEC;
666                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
667                                 goto out_free_interp;
668
669                         interpreter = open_exec(elf_interpreter);
670                         retval = PTR_ERR(interpreter);
671                         if (IS_ERR(interpreter))
672                                 goto out_free_interp;
673
674                         /*
675                          * If the binary is not readable then enforce
676                          * mm->dumpable = 0 regardless of the interpreter's
677                          * permissions.
678                          */
679                         would_dump(bprm, interpreter);
680
681                         retval = kernel_read(interpreter, 0, bprm->buf,
682                                              BINPRM_BUF_SIZE);
683                         if (retval != BINPRM_BUF_SIZE) {
684                                 if (retval >= 0)
685                                         retval = -EIO;
686                                 goto out_free_dentry;
687                         }
688
689                         /* Get the exec headers */
690                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
691                         break;
692                 }
693                 elf_ppnt++;
694         }
695
696         elf_ppnt = elf_phdata;
697         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
698                 if (elf_ppnt->p_type == PT_GNU_STACK) {
699                         if (elf_ppnt->p_flags & PF_X)
700                                 executable_stack = EXSTACK_ENABLE_X;
701                         else
702                                 executable_stack = EXSTACK_DISABLE_X;
703                         break;
704                 }
705
706         /* Some simple consistency checks for the interpreter */
707         if (elf_interpreter) {
708                 retval = -ELIBBAD;
709                 /* Not an ELF interpreter */
710                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
711                         goto out_free_dentry;
712                 /* Verify the interpreter has a valid arch */
713                 if (!elf_check_arch(&loc->interp_elf_ex))
714                         goto out_free_dentry;
715         }
716
717         /* Flush all traces of the currently running executable */
718         retval = flush_old_exec(bprm);
719         if (retval)
720                 goto out_free_dentry;
721
722         /* OK, This is the point of no return */
723         current->mm->def_flags = def_flags;
724
725         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
726            may depend on the personality.  */
727         SET_PERSONALITY(loc->elf_ex);
728         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
729                 current->personality |= READ_IMPLIES_EXEC;
730
731         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
732                 current->flags |= PF_RANDOMIZE;
733
734         setup_new_exec(bprm);
735
736         /* Do this so that we can load the interpreter, if need be.  We will
737            change some of these later */
738         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
739                                  executable_stack);
740         if (retval < 0) {
741                 send_sig(SIGKILL, current, 0);
742                 goto out_free_dentry;
743         }
744         
745         current->mm->start_stack = bprm->p;
746
747         /* Now we do a little grungy work by mmapping the ELF image into
748            the correct location in memory. */
749         for(i = 0, elf_ppnt = elf_phdata;
750             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
751                 int elf_prot = 0, elf_flags;
752                 unsigned long k, vaddr;
753
754                 if (elf_ppnt->p_type != PT_LOAD)
755                         continue;
756
757                 if (unlikely (elf_brk > elf_bss)) {
758                         unsigned long nbyte;
759                     
760                         /* There was a PT_LOAD segment with p_memsz > p_filesz
761                            before this one. Map anonymous pages, if needed,
762                            and clear the area.  */
763                         retval = set_brk(elf_bss + load_bias,
764                                          elf_brk + load_bias);
765                         if (retval) {
766                                 send_sig(SIGKILL, current, 0);
767                                 goto out_free_dentry;
768                         }
769                         nbyte = ELF_PAGEOFFSET(elf_bss);
770                         if (nbyte) {
771                                 nbyte = ELF_MIN_ALIGN - nbyte;
772                                 if (nbyte > elf_brk - elf_bss)
773                                         nbyte = elf_brk - elf_bss;
774                                 if (clear_user((void __user *)elf_bss +
775                                                         load_bias, nbyte)) {
776                                         /*
777                                          * This bss-zeroing can fail if the ELF
778                                          * file specifies odd protections. So
779                                          * we don't check the return value
780                                          */
781                                 }
782                         }
783                 }
784
785                 if (elf_ppnt->p_flags & PF_R)
786                         elf_prot |= PROT_READ;
787                 if (elf_ppnt->p_flags & PF_W)
788                         elf_prot |= PROT_WRITE;
789                 if (elf_ppnt->p_flags & PF_X)
790                         elf_prot |= PROT_EXEC;
791
792                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
793
794                 vaddr = elf_ppnt->p_vaddr;
795                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
796                         elf_flags |= MAP_FIXED;
797                 } else if (loc->elf_ex.e_type == ET_DYN) {
798                         /* Try and get dynamic programs out of the way of the
799                          * default mmap base, as well as whatever program they
800                          * might try to exec.  This is because the brk will
801                          * follow the loader, and is not movable.  */
802 #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
803                         /* Memory randomization might have been switched off
804                          * in runtime via sysctl or explicit setting of
805                          * personality flags.
806                          * If that is the case, retain the original non-zero
807                          * load_bias value in order to establish proper
808                          * non-randomized mappings.
809                          */
810                         if (current->flags & PF_RANDOMIZE)
811                                 load_bias = 0;
812                         else
813                                 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
814 #else
815                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
816 #endif
817                 }
818
819                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
820                                 elf_prot, elf_flags, 0);
821                 if (BAD_ADDR(error)) {
822                         send_sig(SIGKILL, current, 0);
823                         retval = IS_ERR((void *)error) ?
824                                 PTR_ERR((void*)error) : -EINVAL;
825                         goto out_free_dentry;
826                 }
827
828                 if (!load_addr_set) {
829                         load_addr_set = 1;
830                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
831                         if (loc->elf_ex.e_type == ET_DYN) {
832                                 load_bias += error -
833                                              ELF_PAGESTART(load_bias + vaddr);
834                                 load_addr += load_bias;
835                                 reloc_func_desc = load_bias;
836                         }
837                 }
838                 k = elf_ppnt->p_vaddr;
839                 if (k < start_code)
840                         start_code = k;
841                 if (start_data < k)
842                         start_data = k;
843
844                 /*
845                  * Check to see if the section's size will overflow the
846                  * allowed task size. Note that p_filesz must always be
847                  * <= p_memsz so it is only necessary to check p_memsz.
848                  */
849                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
850                     elf_ppnt->p_memsz > TASK_SIZE ||
851                     TASK_SIZE - elf_ppnt->p_memsz < k) {
852                         /* set_brk can never work. Avoid overflows. */
853                         send_sig(SIGKILL, current, 0);
854                         retval = -EINVAL;
855                         goto out_free_dentry;
856                 }
857
858                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
859
860                 if (k > elf_bss)
861                         elf_bss = k;
862                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
863                         end_code = k;
864                 if (end_data < k)
865                         end_data = k;
866                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
867                 if (k > elf_brk)
868                         elf_brk = k;
869         }
870
871         loc->elf_ex.e_entry += load_bias;
872         elf_bss += load_bias;
873         elf_brk += load_bias;
874         start_code += load_bias;
875         end_code += load_bias;
876         start_data += load_bias;
877         end_data += load_bias;
878
879         /* Calling set_brk effectively mmaps the pages that we need
880          * for the bss and break sections.  We must do this before
881          * mapping in the interpreter, to make sure it doesn't wind
882          * up getting placed where the bss needs to go.
883          */
884         retval = set_brk(elf_bss, elf_brk);
885         if (retval) {
886                 send_sig(SIGKILL, current, 0);
887                 goto out_free_dentry;
888         }
889         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
890                 send_sig(SIGSEGV, current, 0);
891                 retval = -EFAULT; /* Nobody gets to see this, but.. */
892                 goto out_free_dentry;
893         }
894
895         if (elf_interpreter) {
896                 unsigned long interp_map_addr = 0;
897
898                 elf_entry = load_elf_interp(&loc->interp_elf_ex,
899                                             interpreter,
900                                             &interp_map_addr,
901                                             load_bias);
902                 if (!IS_ERR((void *)elf_entry)) {
903                         /*
904                          * load_elf_interp() returns relocation
905                          * adjustment
906                          */
907                         interp_load_addr = elf_entry;
908                         elf_entry += loc->interp_elf_ex.e_entry;
909                 }
910                 if (BAD_ADDR(elf_entry)) {
911                         force_sig(SIGSEGV, current);
912                         retval = IS_ERR((void *)elf_entry) ?
913                                         (int)elf_entry : -EINVAL;
914                         goto out_free_dentry;
915                 }
916                 reloc_func_desc = interp_load_addr;
917
918                 allow_write_access(interpreter);
919                 fput(interpreter);
920                 kfree(elf_interpreter);
921         } else {
922                 elf_entry = loc->elf_ex.e_entry;
923                 if (BAD_ADDR(elf_entry)) {
924                         force_sig(SIGSEGV, current);
925                         retval = -EINVAL;
926                         goto out_free_dentry;
927                 }
928         }
929
930         kfree(elf_phdata);
931
932         set_binfmt(&elf_format);
933
934 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
935         retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
936         if (retval < 0) {
937                 send_sig(SIGKILL, current, 0);
938                 goto out;
939         }
940 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
941
942         install_exec_creds(bprm);
943         retval = create_elf_tables(bprm, &loc->elf_ex,
944                           load_addr, interp_load_addr);
945         if (retval < 0) {
946                 send_sig(SIGKILL, current, 0);
947                 goto out;
948         }
949         /* N.B. passed_fileno might not be initialized? */
950         current->mm->end_code = end_code;
951         current->mm->start_code = start_code;
952         current->mm->start_data = start_data;
953         current->mm->end_data = end_data;
954         current->mm->start_stack = bprm->p;
955
956 #ifdef arch_randomize_brk
957         if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
958                 current->mm->brk = current->mm->start_brk =
959                         arch_randomize_brk(current->mm);
960 #ifdef CONFIG_COMPAT_BRK
961                 current->brk_randomized = 1;
962 #endif
963         }
964 #endif
965
966         if (current->personality & MMAP_PAGE_ZERO) {
967                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
968                    and some applications "depend" upon this behavior.
969                    Since we do not have the power to recompile these, we
970                    emulate the SVr4 behavior. Sigh. */
971                 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
972                                 MAP_FIXED | MAP_PRIVATE, 0);
973         }
974
975 #ifdef ELF_PLAT_INIT
976         /*
977          * The ABI may specify that certain registers be set up in special
978          * ways (on i386 %edx is the address of a DT_FINI function, for
979          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
980          * that the e_entry field is the address of the function descriptor
981          * for the startup routine, rather than the address of the startup
982          * routine itself.  This macro performs whatever initialization to
983          * the regs structure is required as well as any relocations to the
984          * function descriptor entries when executing dynamically links apps.
985          */
986         ELF_PLAT_INIT(regs, reloc_func_desc);
987 #endif
988
989         start_thread(regs, elf_entry, bprm->p);
990         retval = 0;
991 out:
992         kfree(loc);
993 out_ret:
994         return retval;
995
996         /* error cleanup */
997 out_free_dentry:
998         allow_write_access(interpreter);
999         if (interpreter)
1000                 fput(interpreter);
1001 out_free_interp:
1002         kfree(elf_interpreter);
1003 out_free_ph:
1004         kfree(elf_phdata);
1005         goto out;
1006 }
1007
1008 /* This is really simpleminded and specialized - we are loading an
1009    a.out library that is given an ELF header. */
1010 static int load_elf_library(struct file *file)
1011 {
1012         struct elf_phdr *elf_phdata;
1013         struct elf_phdr *eppnt;
1014         unsigned long elf_bss, bss, len;
1015         int retval, error, i, j;
1016         struct elfhdr elf_ex;
1017
1018         error = -ENOEXEC;
1019         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1020         if (retval != sizeof(elf_ex))
1021                 goto out;
1022
1023         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1024                 goto out;
1025
1026         /* First of all, some simple consistency checks */
1027         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1028             !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1029                 goto out;
1030
1031         /* Now read in all of the header information */
1032
1033         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1034         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1035
1036         error = -ENOMEM;
1037         elf_phdata = kmalloc(j, GFP_KERNEL);
1038         if (!elf_phdata)
1039                 goto out;
1040
1041         eppnt = elf_phdata;
1042         error = -ENOEXEC;
1043         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1044         if (retval != j)
1045                 goto out_free_ph;
1046
1047         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1048                 if ((eppnt + i)->p_type == PT_LOAD)
1049                         j++;
1050         if (j != 1)
1051                 goto out_free_ph;
1052
1053         while (eppnt->p_type != PT_LOAD)
1054                 eppnt++;
1055
1056         /* Now use mmap to map the library into memory. */
1057         error = vm_mmap(file,
1058                         ELF_PAGESTART(eppnt->p_vaddr),
1059                         (eppnt->p_filesz +
1060                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1061                         PROT_READ | PROT_WRITE | PROT_EXEC,
1062                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1063                         (eppnt->p_offset -
1064                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1065         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1066                 goto out_free_ph;
1067
1068         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1069         if (padzero(elf_bss)) {
1070                 error = -EFAULT;
1071                 goto out_free_ph;
1072         }
1073
1074         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1075                             ELF_MIN_ALIGN - 1);
1076         bss = eppnt->p_memsz + eppnt->p_vaddr;
1077         if (bss > len)
1078                 vm_brk(len, bss - len);
1079         error = 0;
1080
1081 out_free_ph:
1082         kfree(elf_phdata);
1083 out:
1084         return error;
1085 }
1086
1087 #ifdef CONFIG_ELF_CORE
1088 /*
1089  * ELF core dumper
1090  *
1091  * Modelled on fs/exec.c:aout_core_dump()
1092  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1093  */
1094
1095 /*
1096  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1097  * that are useful for post-mortem analysis are included in every core dump.
1098  * In that way we ensure that the core dump is fully interpretable later
1099  * without matching up the same kernel and hardware config to see what PC values
1100  * meant. These special mappings include - vDSO, vsyscall, and other
1101  * architecture specific mappings
1102  */
1103 static bool always_dump_vma(struct vm_area_struct *vma)
1104 {
1105         /* Any vsyscall mappings? */
1106         if (vma == get_gate_vma(vma->vm_mm))
1107                 return true;
1108         /*
1109          * arch_vma_name() returns non-NULL for special architecture mappings,
1110          * such as vDSO sections.
1111          */
1112         if (arch_vma_name(vma))
1113                 return true;
1114
1115         return false;
1116 }
1117
1118 /*
1119  * Decide what to dump of a segment, part, all or none.
1120  */
1121 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1122                                    unsigned long mm_flags)
1123 {
1124 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1125
1126         /* always dump the vdso and vsyscall sections */
1127         if (always_dump_vma(vma))
1128                 goto whole;
1129
1130         if (vma->vm_flags & VM_DONTDUMP)
1131                 return 0;
1132
1133         /* Hugetlb memory check */
1134         if (vma->vm_flags & VM_HUGETLB) {
1135                 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1136                         goto whole;
1137                 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1138                         goto whole;
1139                 return 0;
1140         }
1141
1142         /* Do not dump I/O mapped devices or special mappings */
1143         if (vma->vm_flags & VM_IO)
1144                 return 0;
1145
1146         /* By default, dump shared memory if mapped from an anonymous file. */
1147         if (vma->vm_flags & VM_SHARED) {
1148                 if (file_inode(vma->vm_file)->i_nlink == 0 ?
1149                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1150                         goto whole;
1151                 return 0;
1152         }
1153
1154         /* Dump segments that have been written to.  */
1155         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1156                 goto whole;
1157         if (vma->vm_file == NULL)
1158                 return 0;
1159
1160         if (FILTER(MAPPED_PRIVATE))
1161                 goto whole;
1162
1163         /*
1164          * If this looks like the beginning of a DSO or executable mapping,
1165          * check for an ELF header.  If we find one, dump the first page to
1166          * aid in determining what was mapped here.
1167          */
1168         if (FILTER(ELF_HEADERS) &&
1169             vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1170                 u32 __user *header = (u32 __user *) vma->vm_start;
1171                 u32 word;
1172                 mm_segment_t fs = get_fs();
1173                 /*
1174                  * Doing it this way gets the constant folded by GCC.
1175                  */
1176                 union {
1177                         u32 cmp;
1178                         char elfmag[SELFMAG];
1179                 } magic;
1180                 BUILD_BUG_ON(SELFMAG != sizeof word);
1181                 magic.elfmag[EI_MAG0] = ELFMAG0;
1182                 magic.elfmag[EI_MAG1] = ELFMAG1;
1183                 magic.elfmag[EI_MAG2] = ELFMAG2;
1184                 magic.elfmag[EI_MAG3] = ELFMAG3;
1185                 /*
1186                  * Switch to the user "segment" for get_user(),
1187                  * then put back what elf_core_dump() had in place.
1188                  */
1189                 set_fs(USER_DS);
1190                 if (unlikely(get_user(word, header)))
1191                         word = 0;
1192                 set_fs(fs);
1193                 if (word == magic.cmp)
1194                         return PAGE_SIZE;
1195         }
1196
1197 #undef  FILTER
1198
1199         return 0;
1200
1201 whole:
1202         return vma->vm_end - vma->vm_start;
1203 }
1204
1205 /* An ELF note in memory */
1206 struct memelfnote
1207 {
1208         const char *name;
1209         int type;
1210         unsigned int datasz;
1211         void *data;
1212 };
1213
1214 static int notesize(struct memelfnote *en)
1215 {
1216         int sz;
1217
1218         sz = sizeof(struct elf_note);
1219         sz += roundup(strlen(en->name) + 1, 4);
1220         sz += roundup(en->datasz, 4);
1221
1222         return sz;
1223 }
1224
1225 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1226 {
1227         struct elf_note en;
1228         en.n_namesz = strlen(men->name) + 1;
1229         en.n_descsz = men->datasz;
1230         en.n_type = men->type;
1231
1232         return dump_emit(cprm, &en, sizeof(en)) &&
1233             dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1234             dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1235 }
1236
1237 static void fill_elf_header(struct elfhdr *elf, int segs,
1238                             u16 machine, u32 flags)
1239 {
1240         memset(elf, 0, sizeof(*elf));
1241
1242         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1243         elf->e_ident[EI_CLASS] = ELF_CLASS;
1244         elf->e_ident[EI_DATA] = ELF_DATA;
1245         elf->e_ident[EI_VERSION] = EV_CURRENT;
1246         elf->e_ident[EI_OSABI] = ELF_OSABI;
1247
1248         elf->e_type = ET_CORE;
1249         elf->e_machine = machine;
1250         elf->e_version = EV_CURRENT;
1251         elf->e_phoff = sizeof(struct elfhdr);
1252         elf->e_flags = flags;
1253         elf->e_ehsize = sizeof(struct elfhdr);
1254         elf->e_phentsize = sizeof(struct elf_phdr);
1255         elf->e_phnum = segs;
1256
1257         return;
1258 }
1259
1260 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1261 {
1262         phdr->p_type = PT_NOTE;
1263         phdr->p_offset = offset;
1264         phdr->p_vaddr = 0;
1265         phdr->p_paddr = 0;
1266         phdr->p_filesz = sz;
1267         phdr->p_memsz = 0;
1268         phdr->p_flags = 0;
1269         phdr->p_align = 0;
1270         return;
1271 }
1272
1273 static void fill_note(struct memelfnote *note, const char *name, int type, 
1274                 unsigned int sz, void *data)
1275 {
1276         note->name = name;
1277         note->type = type;
1278         note->datasz = sz;
1279         note->data = data;
1280         return;
1281 }
1282
1283 /*
1284  * fill up all the fields in prstatus from the given task struct, except
1285  * registers which need to be filled up separately.
1286  */
1287 static void fill_prstatus(struct elf_prstatus *prstatus,
1288                 struct task_struct *p, long signr)
1289 {
1290         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1291         prstatus->pr_sigpend = p->pending.signal.sig[0];
1292         prstatus->pr_sighold = p->blocked.sig[0];
1293         rcu_read_lock();
1294         prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1295         rcu_read_unlock();
1296         prstatus->pr_pid = task_pid_vnr(p);
1297         prstatus->pr_pgrp = task_pgrp_vnr(p);
1298         prstatus->pr_sid = task_session_vnr(p);
1299         if (thread_group_leader(p)) {
1300                 struct task_cputime cputime;
1301
1302                 /*
1303                  * This is the record for the group leader.  It shows the
1304                  * group-wide total, not its individual thread total.
1305                  */
1306                 thread_group_cputime(p, &cputime);
1307                 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1308                 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1309         } else {
1310                 cputime_t utime, stime;
1311
1312                 task_cputime(p, &utime, &stime);
1313                 cputime_to_timeval(utime, &prstatus->pr_utime);
1314                 cputime_to_timeval(stime, &prstatus->pr_stime);
1315         }
1316         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1317         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1318 }
1319
1320 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1321                        struct mm_struct *mm)
1322 {
1323         const struct cred *cred;
1324         unsigned int i, len;
1325         
1326         /* first copy the parameters from user space */
1327         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1328
1329         len = mm->arg_end - mm->arg_start;
1330         if (len >= ELF_PRARGSZ)
1331                 len = ELF_PRARGSZ-1;
1332         if (copy_from_user(&psinfo->pr_psargs,
1333                            (const char __user *)mm->arg_start, len))
1334                 return -EFAULT;
1335         for(i = 0; i < len; i++)
1336                 if (psinfo->pr_psargs[i] == 0)
1337                         psinfo->pr_psargs[i] = ' ';
1338         psinfo->pr_psargs[len] = 0;
1339
1340         rcu_read_lock();
1341         psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1342         rcu_read_unlock();
1343         psinfo->pr_pid = task_pid_vnr(p);
1344         psinfo->pr_pgrp = task_pgrp_vnr(p);
1345         psinfo->pr_sid = task_session_vnr(p);
1346
1347         i = p->state ? ffz(~p->state) + 1 : 0;
1348         psinfo->pr_state = i;
1349         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1350         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1351         psinfo->pr_nice = task_nice(p);
1352         psinfo->pr_flag = p->flags;
1353         rcu_read_lock();
1354         cred = __task_cred(p);
1355         SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1356         SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1357         rcu_read_unlock();
1358         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1359         
1360         return 0;
1361 }
1362
1363 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1364 {
1365         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1366         int i = 0;
1367         do
1368                 i += 2;
1369         while (auxv[i - 2] != AT_NULL);
1370         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1371 }
1372
1373 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1374                 const siginfo_t *siginfo)
1375 {
1376         mm_segment_t old_fs = get_fs();
1377         set_fs(KERNEL_DS);
1378         copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1379         set_fs(old_fs);
1380         fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1381 }
1382
1383 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1384 /*
1385  * Format of NT_FILE note:
1386  *
1387  * long count     -- how many files are mapped
1388  * long page_size -- units for file_ofs
1389  * array of [COUNT] elements of
1390  *   long start
1391  *   long end
1392  *   long file_ofs
1393  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1394  */
1395 static int fill_files_note(struct memelfnote *note)
1396 {
1397         struct vm_area_struct *vma;
1398         unsigned count, size, names_ofs, remaining, n;
1399         user_long_t *data;
1400         user_long_t *start_end_ofs;
1401         char *name_base, *name_curpos;
1402
1403         /* *Estimated* file count and total data size needed */
1404         count = current->mm->map_count;
1405         size = count * 64;
1406
1407         names_ofs = (2 + 3 * count) * sizeof(data[0]);
1408  alloc:
1409         if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1410                 return -EINVAL;
1411         size = round_up(size, PAGE_SIZE);
1412         data = vmalloc(size);
1413         if (!data)
1414                 return -ENOMEM;
1415
1416         start_end_ofs = data + 2;
1417         name_base = name_curpos = ((char *)data) + names_ofs;
1418         remaining = size - names_ofs;
1419         count = 0;
1420         for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1421                 struct file *file;
1422                 const char *filename;
1423
1424                 file = vma->vm_file;
1425                 if (!file)
1426                         continue;
1427                 filename = d_path(&file->f_path, name_curpos, remaining);
1428                 if (IS_ERR(filename)) {
1429                         if (PTR_ERR(filename) == -ENAMETOOLONG) {
1430                                 vfree(data);
1431                                 size = size * 5 / 4;
1432                                 goto alloc;
1433                         }
1434                         continue;
1435                 }
1436
1437                 /* d_path() fills at the end, move name down */
1438                 /* n = strlen(filename) + 1: */
1439                 n = (name_curpos + remaining) - filename;
1440                 remaining = filename - name_curpos;
1441                 memmove(name_curpos, filename, n);
1442                 name_curpos += n;
1443
1444                 *start_end_ofs++ = vma->vm_start;
1445                 *start_end_ofs++ = vma->vm_end;
1446                 *start_end_ofs++ = vma->vm_pgoff;
1447                 count++;
1448         }
1449
1450         /* Now we know exact count of files, can store it */
1451         data[0] = count;
1452         data[1] = PAGE_SIZE;
1453         /*
1454          * Count usually is less than current->mm->map_count,
1455          * we need to move filenames down.
1456          */
1457         n = current->mm->map_count - count;
1458         if (n != 0) {
1459                 unsigned shift_bytes = n * 3 * sizeof(data[0]);
1460                 memmove(name_base - shift_bytes, name_base,
1461                         name_curpos - name_base);
1462                 name_curpos -= shift_bytes;
1463         }
1464
1465         size = name_curpos - (char *)data;
1466         fill_note(note, "CORE", NT_FILE, size, data);
1467         return 0;
1468 }
1469
1470 #ifdef CORE_DUMP_USE_REGSET
1471 #include <linux/regset.h>
1472
1473 struct elf_thread_core_info {
1474         struct elf_thread_core_info *next;
1475         struct task_struct *task;
1476         struct elf_prstatus prstatus;
1477         struct memelfnote notes[0];
1478 };
1479
1480 struct elf_note_info {
1481         struct elf_thread_core_info *thread;
1482         struct memelfnote psinfo;
1483         struct memelfnote signote;
1484         struct memelfnote auxv;
1485         struct memelfnote files;
1486         user_siginfo_t csigdata;
1487         size_t size;
1488         int thread_notes;
1489 };
1490
1491 /*
1492  * When a regset has a writeback hook, we call it on each thread before
1493  * dumping user memory.  On register window machines, this makes sure the
1494  * user memory backing the register data is up to date before we read it.
1495  */
1496 static void do_thread_regset_writeback(struct task_struct *task,
1497                                        const struct user_regset *regset)
1498 {
1499         if (regset->writeback)
1500                 regset->writeback(task, regset, 1);
1501 }
1502
1503 #ifndef PR_REG_SIZE
1504 #define PR_REG_SIZE(S) sizeof(S)
1505 #endif
1506
1507 #ifndef PRSTATUS_SIZE
1508 #define PRSTATUS_SIZE(S) sizeof(S)
1509 #endif
1510
1511 #ifndef PR_REG_PTR
1512 #define PR_REG_PTR(S) (&((S)->pr_reg))
1513 #endif
1514
1515 #ifndef SET_PR_FPVALID
1516 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1517 #endif
1518
1519 static int fill_thread_core_info(struct elf_thread_core_info *t,
1520                                  const struct user_regset_view *view,
1521                                  long signr, size_t *total)
1522 {
1523         unsigned int i;
1524
1525         /*
1526          * NT_PRSTATUS is the one special case, because the regset data
1527          * goes into the pr_reg field inside the note contents, rather
1528          * than being the whole note contents.  We fill the reset in here.
1529          * We assume that regset 0 is NT_PRSTATUS.
1530          */
1531         fill_prstatus(&t->prstatus, t->task, signr);
1532         (void) view->regsets[0].get(t->task, &view->regsets[0],
1533                                     0, PR_REG_SIZE(t->prstatus.pr_reg),
1534                                     PR_REG_PTR(&t->prstatus), NULL);
1535
1536         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1537                   PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1538         *total += notesize(&t->notes[0]);
1539
1540         do_thread_regset_writeback(t->task, &view->regsets[0]);
1541
1542         /*
1543          * Each other regset might generate a note too.  For each regset
1544          * that has no core_note_type or is inactive, we leave t->notes[i]
1545          * all zero and we'll know to skip writing it later.
1546          */
1547         for (i = 1; i < view->n; ++i) {
1548                 const struct user_regset *regset = &view->regsets[i];
1549                 do_thread_regset_writeback(t->task, regset);
1550                 if (regset->core_note_type && regset->get &&
1551                     (!regset->active || regset->active(t->task, regset))) {
1552                         int ret;
1553                         size_t size = regset->n * regset->size;
1554                         void *data = kmalloc(size, GFP_KERNEL);
1555                         if (unlikely(!data))
1556                                 return 0;
1557                         ret = regset->get(t->task, regset,
1558                                           0, size, data, NULL);
1559                         if (unlikely(ret))
1560                                 kfree(data);
1561                         else {
1562                                 if (regset->core_note_type != NT_PRFPREG)
1563                                         fill_note(&t->notes[i], "LINUX",
1564                                                   regset->core_note_type,
1565                                                   size, data);
1566                                 else {
1567                                         SET_PR_FPVALID(&t->prstatus, 1);
1568                                         fill_note(&t->notes[i], "CORE",
1569                                                   NT_PRFPREG, size, data);
1570                                 }
1571                                 *total += notesize(&t->notes[i]);
1572                         }
1573                 }
1574         }
1575
1576         return 1;
1577 }
1578
1579 static int fill_note_info(struct elfhdr *elf, int phdrs,
1580                           struct elf_note_info *info,
1581                           const siginfo_t *siginfo, struct pt_regs *regs)
1582 {
1583         struct task_struct *dump_task = current;
1584         const struct user_regset_view *view = task_user_regset_view(dump_task);
1585         struct elf_thread_core_info *t;
1586         struct elf_prpsinfo *psinfo;
1587         struct core_thread *ct;
1588         unsigned int i;
1589
1590         info->size = 0;
1591         info->thread = NULL;
1592
1593         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1594         if (psinfo == NULL) {
1595                 info->psinfo.data = NULL; /* So we don't free this wrongly */
1596                 return 0;
1597         }
1598
1599         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1600
1601         /*
1602          * Figure out how many notes we're going to need for each thread.
1603          */
1604         info->thread_notes = 0;
1605         for (i = 0; i < view->n; ++i)
1606                 if (view->regsets[i].core_note_type != 0)
1607                         ++info->thread_notes;
1608
1609         /*
1610          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1611          * since it is our one special case.
1612          */
1613         if (unlikely(info->thread_notes == 0) ||
1614             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1615                 WARN_ON(1);
1616                 return 0;
1617         }
1618
1619         /*
1620          * Initialize the ELF file header.
1621          */
1622         fill_elf_header(elf, phdrs,
1623                         view->e_machine, view->e_flags);
1624
1625         /*
1626          * Allocate a structure for each thread.
1627          */
1628         for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1629                 t = kzalloc(offsetof(struct elf_thread_core_info,
1630                                      notes[info->thread_notes]),
1631                             GFP_KERNEL);
1632                 if (unlikely(!t))
1633                         return 0;
1634
1635                 t->task = ct->task;
1636                 if (ct->task == dump_task || !info->thread) {
1637                         t->next = info->thread;
1638                         info->thread = t;
1639                 } else {
1640                         /*
1641                          * Make sure to keep the original task at
1642                          * the head of the list.
1643                          */
1644                         t->next = info->thread->next;
1645                         info->thread->next = t;
1646                 }
1647         }
1648
1649         /*
1650          * Now fill in each thread's information.
1651          */
1652         for (t = info->thread; t != NULL; t = t->next)
1653                 if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1654                         return 0;
1655
1656         /*
1657          * Fill in the two process-wide notes.
1658          */
1659         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1660         info->size += notesize(&info->psinfo);
1661
1662         fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1663         info->size += notesize(&info->signote);
1664
1665         fill_auxv_note(&info->auxv, current->mm);
1666         info->size += notesize(&info->auxv);
1667
1668         if (fill_files_note(&info->files) == 0)
1669                 info->size += notesize(&info->files);
1670
1671         return 1;
1672 }
1673
1674 static size_t get_note_info_size(struct elf_note_info *info)
1675 {
1676         return info->size;
1677 }
1678
1679 /*
1680  * Write all the notes for each thread.  When writing the first thread, the
1681  * process-wide notes are interleaved after the first thread-specific note.
1682  */
1683 static int write_note_info(struct elf_note_info *info,
1684                            struct coredump_params *cprm)
1685 {
1686         bool first = 1;
1687         struct elf_thread_core_info *t = info->thread;
1688
1689         do {
1690                 int i;
1691
1692                 if (!writenote(&t->notes[0], cprm))
1693                         return 0;
1694
1695                 if (first && !writenote(&info->psinfo, cprm))
1696                         return 0;
1697                 if (first && !writenote(&info->signote, cprm))
1698                         return 0;
1699                 if (first && !writenote(&info->auxv, cprm))
1700                         return 0;
1701                 if (first && info->files.data &&
1702                                 !writenote(&info->files, cprm))
1703                         return 0;
1704
1705                 for (i = 1; i < info->thread_notes; ++i)
1706                         if (t->notes[i].data &&
1707                             !writenote(&t->notes[i], cprm))
1708                                 return 0;
1709
1710                 first = 0;
1711                 t = t->next;
1712         } while (t);
1713
1714         return 1;
1715 }
1716
1717 static void free_note_info(struct elf_note_info *info)
1718 {
1719         struct elf_thread_core_info *threads = info->thread;
1720         while (threads) {
1721                 unsigned int i;
1722                 struct elf_thread_core_info *t = threads;
1723                 threads = t->next;
1724                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1725                 for (i = 1; i < info->thread_notes; ++i)
1726                         kfree(t->notes[i].data);
1727                 kfree(t);
1728         }
1729         kfree(info->psinfo.data);
1730         vfree(info->files.data);
1731 }
1732
1733 #else
1734
1735 /* Here is the structure in which status of each thread is captured. */
1736 struct elf_thread_status
1737 {
1738         struct list_head list;
1739         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1740         elf_fpregset_t fpu;             /* NT_PRFPREG */
1741         struct task_struct *thread;
1742 #ifdef ELF_CORE_COPY_XFPREGS
1743         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1744 #endif
1745         struct memelfnote notes[3];
1746         int num_notes;
1747 };
1748
1749 /*
1750  * In order to add the specific thread information for the elf file format,
1751  * we need to keep a linked list of every threads pr_status and then create
1752  * a single section for them in the final core file.
1753  */
1754 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1755 {
1756         int sz = 0;
1757         struct task_struct *p = t->thread;
1758         t->num_notes = 0;
1759
1760         fill_prstatus(&t->prstatus, p, signr);
1761         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1762         
1763         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1764                   &(t->prstatus));
1765         t->num_notes++;
1766         sz += notesize(&t->notes[0]);
1767
1768         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1769                                                                 &t->fpu))) {
1770                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1771                           &(t->fpu));
1772                 t->num_notes++;
1773                 sz += notesize(&t->notes[1]);
1774         }
1775
1776 #ifdef ELF_CORE_COPY_XFPREGS
1777         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1778                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1779                           sizeof(t->xfpu), &t->xfpu);
1780                 t->num_notes++;
1781                 sz += notesize(&t->notes[2]);
1782         }
1783 #endif  
1784         return sz;
1785 }
1786
1787 struct elf_note_info {
1788         struct memelfnote *notes;
1789         struct memelfnote *notes_files;
1790         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1791         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1792         struct list_head thread_list;
1793         elf_fpregset_t *fpu;
1794 #ifdef ELF_CORE_COPY_XFPREGS
1795         elf_fpxregset_t *xfpu;
1796 #endif
1797         user_siginfo_t csigdata;
1798         int thread_status_size;
1799         int numnote;
1800 };
1801
1802 static int elf_note_info_init(struct elf_note_info *info)
1803 {
1804         memset(info, 0, sizeof(*info));
1805         INIT_LIST_HEAD(&info->thread_list);
1806
1807         /* Allocate space for ELF notes */
1808         info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1809         if (!info->notes)
1810                 return 0;
1811         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1812         if (!info->psinfo)
1813                 return 0;
1814         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1815         if (!info->prstatus)
1816                 return 0;
1817         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1818         if (!info->fpu)
1819                 return 0;
1820 #ifdef ELF_CORE_COPY_XFPREGS
1821         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1822         if (!info->xfpu)
1823                 return 0;
1824 #endif
1825         return 1;
1826 }
1827
1828 static int fill_note_info(struct elfhdr *elf, int phdrs,
1829                           struct elf_note_info *info,
1830                           const siginfo_t *siginfo, struct pt_regs *regs)
1831 {
1832         struct list_head *t;
1833         struct core_thread *ct;
1834         struct elf_thread_status *ets;
1835
1836         if (!elf_note_info_init(info))
1837                 return 0;
1838
1839         for (ct = current->mm->core_state->dumper.next;
1840                                         ct; ct = ct->next) {
1841                 ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1842                 if (!ets)
1843                         return 0;
1844
1845                 ets->thread = ct->task;
1846                 list_add(&ets->list, &info->thread_list);
1847         }
1848
1849         list_for_each(t, &info->thread_list) {
1850                 int sz;
1851
1852                 ets = list_entry(t, struct elf_thread_status, list);
1853                 sz = elf_dump_thread_status(siginfo->si_signo, ets);
1854                 info->thread_status_size += sz;
1855         }
1856         /* now collect the dump for the current */
1857         memset(info->prstatus, 0, sizeof(*info->prstatus));
1858         fill_prstatus(info->prstatus, current, siginfo->si_signo);
1859         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1860
1861         /* Set up header */
1862         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1863
1864         /*
1865          * Set up the notes in similar form to SVR4 core dumps made
1866          * with info from their /proc.
1867          */
1868
1869         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1870                   sizeof(*info->prstatus), info->prstatus);
1871         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1872         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1873                   sizeof(*info->psinfo), info->psinfo);
1874
1875         fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1876         fill_auxv_note(info->notes + 3, current->mm);
1877         info->numnote = 4;
1878
1879         if (fill_files_note(info->notes + info->numnote) == 0) {
1880                 info->notes_files = info->notes + info->numnote;
1881                 info->numnote++;
1882         }
1883
1884         /* Try to dump the FPU. */
1885         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1886                                                                info->fpu);
1887         if (info->prstatus->pr_fpvalid)
1888                 fill_note(info->notes + info->numnote++,
1889                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1890 #ifdef ELF_CORE_COPY_XFPREGS
1891         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1892                 fill_note(info->notes + info->numnote++,
1893                           "LINUX", ELF_CORE_XFPREG_TYPE,
1894                           sizeof(*info->xfpu), info->xfpu);
1895 #endif
1896
1897         return 1;
1898 }
1899
1900 static size_t get_note_info_size(struct elf_note_info *info)
1901 {
1902         int sz = 0;
1903         int i;
1904
1905         for (i = 0; i < info->numnote; i++)
1906                 sz += notesize(info->notes + i);
1907
1908         sz += info->thread_status_size;
1909
1910         return sz;
1911 }
1912
1913 static int write_note_info(struct elf_note_info *info,
1914                            struct coredump_params *cprm)
1915 {
1916         int i;
1917         struct list_head *t;
1918
1919         for (i = 0; i < info->numnote; i++)
1920                 if (!writenote(info->notes + i, cprm))
1921                         return 0;
1922
1923         /* write out the thread status notes section */
1924         list_for_each(t, &info->thread_list) {
1925                 struct elf_thread_status *tmp =
1926                                 list_entry(t, struct elf_thread_status, list);
1927
1928                 for (i = 0; i < tmp->num_notes; i++)
1929                         if (!writenote(&tmp->notes[i], cprm))
1930                                 return 0;
1931         }
1932
1933         return 1;
1934 }
1935
1936 static void free_note_info(struct elf_note_info *info)
1937 {
1938         while (!list_empty(&info->thread_list)) {
1939                 struct list_head *tmp = info->thread_list.next;
1940                 list_del(tmp);
1941                 kfree(list_entry(tmp, struct elf_thread_status, list));
1942         }
1943
1944         /* Free data possibly allocated by fill_files_note(): */
1945         if (info->notes_files)
1946                 vfree(info->notes_files->data);
1947
1948         kfree(info->prstatus);
1949         kfree(info->psinfo);
1950         kfree(info->notes);
1951         kfree(info->fpu);
1952 #ifdef ELF_CORE_COPY_XFPREGS
1953         kfree(info->xfpu);
1954 #endif
1955 }
1956
1957 #endif
1958
1959 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1960                                         struct vm_area_struct *gate_vma)
1961 {
1962         struct vm_area_struct *ret = tsk->mm->mmap;
1963
1964         if (ret)
1965                 return ret;
1966         return gate_vma;
1967 }
1968 /*
1969  * Helper function for iterating across a vma list.  It ensures that the caller
1970  * will visit `gate_vma' prior to terminating the search.
1971  */
1972 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1973                                         struct vm_area_struct *gate_vma)
1974 {
1975         struct vm_area_struct *ret;
1976
1977         ret = this_vma->vm_next;
1978         if (ret)
1979                 return ret;
1980         if (this_vma == gate_vma)
1981                 return NULL;
1982         return gate_vma;
1983 }
1984
1985 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
1986                              elf_addr_t e_shoff, int segs)
1987 {
1988         elf->e_shoff = e_shoff;
1989         elf->e_shentsize = sizeof(*shdr4extnum);
1990         elf->e_shnum = 1;
1991         elf->e_shstrndx = SHN_UNDEF;
1992
1993         memset(shdr4extnum, 0, sizeof(*shdr4extnum));
1994
1995         shdr4extnum->sh_type = SHT_NULL;
1996         shdr4extnum->sh_size = elf->e_shnum;
1997         shdr4extnum->sh_link = elf->e_shstrndx;
1998         shdr4extnum->sh_info = segs;
1999 }
2000
2001 static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
2002                                      unsigned long mm_flags)
2003 {
2004         struct vm_area_struct *vma;
2005         size_t size = 0;
2006
2007         for (vma = first_vma(current, gate_vma); vma != NULL;
2008              vma = next_vma(vma, gate_vma))
2009                 size += vma_dump_size(vma, mm_flags);
2010         return size;
2011 }
2012
2013 /*
2014  * Actual dumper
2015  *
2016  * This is a two-pass process; first we find the offsets of the bits,
2017  * and then they are actually written out.  If we run out of core limit
2018  * we just truncate.
2019  */
2020 static int elf_core_dump(struct coredump_params *cprm)
2021 {
2022         int has_dumped = 0;
2023         mm_segment_t fs;
2024         int segs;
2025         struct vm_area_struct *vma, *gate_vma;
2026         struct elfhdr *elf = NULL;
2027         loff_t offset = 0, dataoff;
2028         struct elf_note_info info = { };
2029         struct elf_phdr *phdr4note = NULL;
2030         struct elf_shdr *shdr4extnum = NULL;
2031         Elf_Half e_phnum;
2032         elf_addr_t e_shoff;
2033
2034         /*
2035          * We no longer stop all VM operations.
2036          * 
2037          * This is because those proceses that could possibly change map_count
2038          * or the mmap / vma pages are now blocked in do_exit on current
2039          * finishing this core dump.
2040          *
2041          * Only ptrace can touch these memory addresses, but it doesn't change
2042          * the map_count or the pages allocated. So no possibility of crashing
2043          * exists while dumping the mm->vm_next areas to the core file.
2044          */
2045   
2046         /* alloc memory for large data structures: too large to be on stack */
2047         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2048         if (!elf)
2049                 goto out;
2050         /*
2051          * The number of segs are recored into ELF header as 16bit value.
2052          * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2053          */
2054         segs = current->mm->map_count;
2055         segs += elf_core_extra_phdrs();
2056
2057         gate_vma = get_gate_vma(current->mm);
2058         if (gate_vma != NULL)
2059                 segs++;
2060
2061         /* for notes section */
2062         segs++;
2063
2064         /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2065          * this, kernel supports extended numbering. Have a look at
2066          * include/linux/elf.h for further information. */
2067         e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2068
2069         /*
2070          * Collect all the non-memory information about the process for the
2071          * notes.  This also sets up the file header.
2072          */
2073         if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2074                 goto cleanup;
2075
2076         has_dumped = 1;
2077
2078         fs = get_fs();
2079         set_fs(KERNEL_DS);
2080
2081         offset += sizeof(*elf);                         /* Elf header */
2082         offset += segs * sizeof(struct elf_phdr);       /* Program headers */
2083
2084         /* Write notes phdr entry */
2085         {
2086                 size_t sz = get_note_info_size(&info);
2087
2088                 sz += elf_coredump_extra_notes_size();
2089
2090                 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2091                 if (!phdr4note)
2092                         goto end_coredump;
2093
2094                 fill_elf_note_phdr(phdr4note, sz, offset);
2095                 offset += sz;
2096         }
2097
2098         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2099
2100         offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
2101         offset += elf_core_extra_data_size();
2102         e_shoff = offset;
2103
2104         if (e_phnum == PN_XNUM) {
2105                 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2106                 if (!shdr4extnum)
2107                         goto end_coredump;
2108                 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2109         }
2110
2111         offset = dataoff;
2112
2113         if (!dump_emit(cprm, elf, sizeof(*elf)))
2114                 goto end_coredump;
2115
2116         if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2117                 goto end_coredump;
2118
2119         /* Write program headers for segments dump */
2120         for (vma = first_vma(current, gate_vma); vma != NULL;
2121                         vma = next_vma(vma, gate_vma)) {
2122                 struct elf_phdr phdr;
2123
2124                 phdr.p_type = PT_LOAD;
2125                 phdr.p_offset = offset;
2126                 phdr.p_vaddr = vma->vm_start;
2127                 phdr.p_paddr = 0;
2128                 phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2129                 phdr.p_memsz = vma->vm_end - vma->vm_start;
2130                 offset += phdr.p_filesz;
2131                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2132                 if (vma->vm_flags & VM_WRITE)
2133                         phdr.p_flags |= PF_W;
2134                 if (vma->vm_flags & VM_EXEC)
2135                         phdr.p_flags |= PF_X;
2136                 phdr.p_align = ELF_EXEC_PAGESIZE;
2137
2138                 if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2139                         goto end_coredump;
2140         }
2141
2142         if (!elf_core_write_extra_phdrs(cprm, offset))
2143                 goto end_coredump;
2144
2145         /* write out the notes section */
2146         if (!write_note_info(&info, cprm))
2147                 goto end_coredump;
2148
2149         if (elf_coredump_extra_notes_write(cprm))
2150                 goto end_coredump;
2151
2152         /* Align to page */
2153         if (!dump_skip(cprm, dataoff - cprm->written))
2154                 goto end_coredump;
2155
2156         for (vma = first_vma(current, gate_vma); vma != NULL;
2157                         vma = next_vma(vma, gate_vma)) {
2158                 unsigned long addr;
2159                 unsigned long end;
2160
2161                 end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2162
2163                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2164                         struct page *page;
2165                         int stop;
2166
2167                         page = get_dump_page(addr);
2168                         if (page) {
2169                                 void *kaddr = kmap(page);
2170                                 stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2171                                 kunmap(page);
2172                                 page_cache_release(page);
2173                         } else
2174                                 stop = !dump_skip(cprm, PAGE_SIZE);
2175                         if (stop)
2176                                 goto end_coredump;
2177                 }
2178         }
2179
2180         if (!elf_core_write_extra_data(cprm))
2181                 goto end_coredump;
2182
2183         if (e_phnum == PN_XNUM) {
2184                 if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2185                         goto end_coredump;
2186         }
2187
2188 end_coredump:
2189         set_fs(fs);
2190
2191 cleanup:
2192         free_note_info(&info);
2193         kfree(shdr4extnum);
2194         kfree(phdr4note);
2195         kfree(elf);
2196 out:
2197         return has_dumped;
2198 }
2199
2200 #endif          /* CONFIG_ELF_CORE */
2201
2202 static int __init init_elf_binfmt(void)
2203 {
2204         register_binfmt(&elf_format);
2205         return 0;
2206 }
2207
2208 static void __exit exit_elf_binfmt(void)
2209 {
2210         /* Remove the COFF and ELF loaders. */
2211         unregister_binfmt(&elf_format);
2212 }
2213
2214 core_initcall(init_elf_binfmt);
2215 module_exit(exit_elf_binfmt);
2216 MODULE_LICENSE("GPL");