]> Pileus Git - ~andy/linux/blob - fs/binfmt_elf.c
xfrm: allow to avoid copying DSCP during encapsulation
[~andy/linux] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/utsname.h>
35 #include <linux/coredump.h>
36 #include <linux/sched.h>
37 #include <asm/uaccess.h>
38 #include <asm/param.h>
39 #include <asm/page.h>
40
41 #ifndef user_long_t
42 #define user_long_t long
43 #endif
44 #ifndef user_siginfo_t
45 #define user_siginfo_t siginfo_t
46 #endif
47
48 static int load_elf_binary(struct linux_binprm *bprm);
49 static int load_elf_library(struct file *);
50 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
51                                 int, int, unsigned long);
52
53 /*
54  * If we don't support core dumping, then supply a NULL so we
55  * don't even try.
56  */
57 #ifdef CONFIG_ELF_CORE
58 static int elf_core_dump(struct coredump_params *cprm);
59 #else
60 #define elf_core_dump   NULL
61 #endif
62
63 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
64 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
65 #else
66 #define ELF_MIN_ALIGN   PAGE_SIZE
67 #endif
68
69 #ifndef ELF_CORE_EFLAGS
70 #define ELF_CORE_EFLAGS 0
71 #endif
72
73 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
74 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
75 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
76
77 static struct linux_binfmt elf_format = {
78         .module         = THIS_MODULE,
79         .load_binary    = load_elf_binary,
80         .load_shlib     = load_elf_library,
81         .core_dump      = elf_core_dump,
82         .min_coredump   = ELF_EXEC_PAGESIZE,
83 };
84
85 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
86
87 static int set_brk(unsigned long start, unsigned long end)
88 {
89         start = ELF_PAGEALIGN(start);
90         end = ELF_PAGEALIGN(end);
91         if (end > start) {
92                 unsigned long addr;
93                 addr = vm_brk(start, end - start);
94                 if (BAD_ADDR(addr))
95                         return addr;
96         }
97         current->mm->start_brk = current->mm->brk = end;
98         return 0;
99 }
100
101 /* We need to explicitly zero any fractional pages
102    after the data section (i.e. bss).  This would
103    contain the junk from the file that should not
104    be in memory
105  */
106 static int padzero(unsigned long elf_bss)
107 {
108         unsigned long nbyte;
109
110         nbyte = ELF_PAGEOFFSET(elf_bss);
111         if (nbyte) {
112                 nbyte = ELF_MIN_ALIGN - nbyte;
113                 if (clear_user((void __user *) elf_bss, nbyte))
114                         return -EFAULT;
115         }
116         return 0;
117 }
118
119 /* Let's use some macros to make this stack manipulation a little clearer */
120 #ifdef CONFIG_STACK_GROWSUP
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122 #define STACK_ROUND(sp, items) \
123         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ \
125         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
126         old_sp; })
127 #else
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129 #define STACK_ROUND(sp, items) \
130         (((unsigned long) (sp - items)) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132 #endif
133
134 #ifndef ELF_BASE_PLATFORM
135 /*
136  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
137  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
138  * will be copied to the user stack in the same manner as AT_PLATFORM.
139  */
140 #define ELF_BASE_PLATFORM NULL
141 #endif
142
143 static int
144 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
145                 unsigned long load_addr, unsigned long interp_load_addr)
146 {
147         unsigned long p = bprm->p;
148         int argc = bprm->argc;
149         int envc = bprm->envc;
150         elf_addr_t __user *argv;
151         elf_addr_t __user *envp;
152         elf_addr_t __user *sp;
153         elf_addr_t __user *u_platform;
154         elf_addr_t __user *u_base_platform;
155         elf_addr_t __user *u_rand_bytes;
156         const char *k_platform = ELF_PLATFORM;
157         const char *k_base_platform = ELF_BASE_PLATFORM;
158         unsigned char k_rand_bytes[16];
159         int items;
160         elf_addr_t *elf_info;
161         int ei_index = 0;
162         const struct cred *cred = current_cred();
163         struct vm_area_struct *vma;
164
165         /*
166          * In some cases (e.g. Hyper-Threading), we want to avoid L1
167          * evictions by the processes running on the same package. One
168          * thing we can do is to shuffle the initial stack for them.
169          */
170
171         p = arch_align_stack(p);
172
173         /*
174          * If this architecture has a platform capability string, copy it
175          * to userspace.  In some cases (Sparc), this info is impossible
176          * for userspace to get any other way, in others (i386) it is
177          * merely difficult.
178          */
179         u_platform = NULL;
180         if (k_platform) {
181                 size_t len = strlen(k_platform) + 1;
182
183                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
184                 if (__copy_to_user(u_platform, k_platform, len))
185                         return -EFAULT;
186         }
187
188         /*
189          * If this architecture has a "base" platform capability
190          * string, copy it to userspace.
191          */
192         u_base_platform = NULL;
193         if (k_base_platform) {
194                 size_t len = strlen(k_base_platform) + 1;
195
196                 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
197                 if (__copy_to_user(u_base_platform, k_base_platform, len))
198                         return -EFAULT;
199         }
200
201         /*
202          * Generate 16 random bytes for userspace PRNG seeding.
203          */
204         get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
205         u_rand_bytes = (elf_addr_t __user *)
206                        STACK_ALLOC(p, sizeof(k_rand_bytes));
207         if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
208                 return -EFAULT;
209
210         /* Create the ELF interpreter info */
211         elf_info = (elf_addr_t *)current->mm->saved_auxv;
212         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
213 #define NEW_AUX_ENT(id, val) \
214         do { \
215                 elf_info[ei_index++] = id; \
216                 elf_info[ei_index++] = val; \
217         } while (0)
218
219 #ifdef ARCH_DLINFO
220         /* 
221          * ARCH_DLINFO must come first so PPC can do its special alignment of
222          * AUXV.
223          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
224          * ARCH_DLINFO changes
225          */
226         ARCH_DLINFO;
227 #endif
228         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
229         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
230         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
231         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
232         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
233         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
234         NEW_AUX_ENT(AT_BASE, interp_load_addr);
235         NEW_AUX_ENT(AT_FLAGS, 0);
236         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
237         NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
238         NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
239         NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
240         NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
241         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
242         NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
243         NEW_AUX_ENT(AT_EXECFN, bprm->exec);
244         if (k_platform) {
245                 NEW_AUX_ENT(AT_PLATFORM,
246                             (elf_addr_t)(unsigned long)u_platform);
247         }
248         if (k_base_platform) {
249                 NEW_AUX_ENT(AT_BASE_PLATFORM,
250                             (elf_addr_t)(unsigned long)u_base_platform);
251         }
252         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
253                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
254         }
255 #undef NEW_AUX_ENT
256         /* AT_NULL is zero; clear the rest too */
257         memset(&elf_info[ei_index], 0,
258                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
259
260         /* And advance past the AT_NULL entry.  */
261         ei_index += 2;
262
263         sp = STACK_ADD(p, ei_index);
264
265         items = (argc + 1) + (envc + 1) + 1;
266         bprm->p = STACK_ROUND(sp, items);
267
268         /* Point sp at the lowest address on the stack */
269 #ifdef CONFIG_STACK_GROWSUP
270         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
271         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
272 #else
273         sp = (elf_addr_t __user *)bprm->p;
274 #endif
275
276
277         /*
278          * Grow the stack manually; some architectures have a limit on how
279          * far ahead a user-space access may be in order to grow the stack.
280          */
281         vma = find_extend_vma(current->mm, bprm->p);
282         if (!vma)
283                 return -EFAULT;
284
285         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
286         if (__put_user(argc, sp++))
287                 return -EFAULT;
288         argv = sp;
289         envp = argv + argc + 1;
290
291         /* Populate argv and envp */
292         p = current->mm->arg_end = current->mm->arg_start;
293         while (argc-- > 0) {
294                 size_t len;
295                 if (__put_user((elf_addr_t)p, argv++))
296                         return -EFAULT;
297                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
298                 if (!len || len > MAX_ARG_STRLEN)
299                         return -EINVAL;
300                 p += len;
301         }
302         if (__put_user(0, argv))
303                 return -EFAULT;
304         current->mm->arg_end = current->mm->env_start = p;
305         while (envc-- > 0) {
306                 size_t len;
307                 if (__put_user((elf_addr_t)p, envp++))
308                         return -EFAULT;
309                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
310                 if (!len || len > MAX_ARG_STRLEN)
311                         return -EINVAL;
312                 p += len;
313         }
314         if (__put_user(0, envp))
315                 return -EFAULT;
316         current->mm->env_end = p;
317
318         /* Put the elf_info on the stack in the right place.  */
319         sp = (elf_addr_t __user *)envp + 1;
320         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
321                 return -EFAULT;
322         return 0;
323 }
324
325 #ifndef elf_map
326
327 static unsigned long elf_map(struct file *filep, unsigned long addr,
328                 struct elf_phdr *eppnt, int prot, int type,
329                 unsigned long total_size)
330 {
331         unsigned long map_addr;
332         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
333         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
334         addr = ELF_PAGESTART(addr);
335         size = ELF_PAGEALIGN(size);
336
337         /* mmap() will return -EINVAL if given a zero size, but a
338          * segment with zero filesize is perfectly valid */
339         if (!size)
340                 return addr;
341
342         /*
343         * total_size is the size of the ELF (interpreter) image.
344         * The _first_ mmap needs to know the full size, otherwise
345         * randomization might put this image into an overlapping
346         * position with the ELF binary image. (since size < total_size)
347         * So we first map the 'big' image - and unmap the remainder at
348         * the end. (which unmap is needed for ELF images with holes.)
349         */
350         if (total_size) {
351                 total_size = ELF_PAGEALIGN(total_size);
352                 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
353                 if (!BAD_ADDR(map_addr))
354                         vm_munmap(map_addr+size, total_size-size);
355         } else
356                 map_addr = vm_mmap(filep, addr, size, prot, type, off);
357
358         return(map_addr);
359 }
360
361 #endif /* !elf_map */
362
363 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
364 {
365         int i, first_idx = -1, last_idx = -1;
366
367         for (i = 0; i < nr; i++) {
368                 if (cmds[i].p_type == PT_LOAD) {
369                         last_idx = i;
370                         if (first_idx == -1)
371                                 first_idx = i;
372                 }
373         }
374         if (first_idx == -1)
375                 return 0;
376
377         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
378                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
379 }
380
381
382 /* This is much more generalized than the library routine read function,
383    so we keep this separate.  Technically the library read function
384    is only provided so that we can read a.out libraries that have
385    an ELF header */
386
387 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
388                 struct file *interpreter, unsigned long *interp_map_addr,
389                 unsigned long no_base)
390 {
391         struct elf_phdr *elf_phdata;
392         struct elf_phdr *eppnt;
393         unsigned long load_addr = 0;
394         int load_addr_set = 0;
395         unsigned long last_bss = 0, elf_bss = 0;
396         unsigned long error = ~0UL;
397         unsigned long total_size;
398         int retval, i, size;
399
400         /* First of all, some simple consistency checks */
401         if (interp_elf_ex->e_type != ET_EXEC &&
402             interp_elf_ex->e_type != ET_DYN)
403                 goto out;
404         if (!elf_check_arch(interp_elf_ex))
405                 goto out;
406         if (!interpreter->f_op || !interpreter->f_op->mmap)
407                 goto out;
408
409         /*
410          * If the size of this structure has changed, then punt, since
411          * we will be doing the wrong thing.
412          */
413         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
414                 goto out;
415         if (interp_elf_ex->e_phnum < 1 ||
416                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
417                 goto out;
418
419         /* Now read in all of the header information */
420         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
421         if (size > ELF_MIN_ALIGN)
422                 goto out;
423         elf_phdata = kmalloc(size, GFP_KERNEL);
424         if (!elf_phdata)
425                 goto out;
426
427         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
428                              (char *)elf_phdata, size);
429         error = -EIO;
430         if (retval != size) {
431                 if (retval < 0)
432                         error = retval; 
433                 goto out_close;
434         }
435
436         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
437         if (!total_size) {
438                 error = -EINVAL;
439                 goto out_close;
440         }
441
442         eppnt = elf_phdata;
443         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
444                 if (eppnt->p_type == PT_LOAD) {
445                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
446                         int elf_prot = 0;
447                         unsigned long vaddr = 0;
448                         unsigned long k, map_addr;
449
450                         if (eppnt->p_flags & PF_R)
451                                 elf_prot = PROT_READ;
452                         if (eppnt->p_flags & PF_W)
453                                 elf_prot |= PROT_WRITE;
454                         if (eppnt->p_flags & PF_X)
455                                 elf_prot |= PROT_EXEC;
456                         vaddr = eppnt->p_vaddr;
457                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
458                                 elf_type |= MAP_FIXED;
459                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
460                                 load_addr = -vaddr;
461
462                         map_addr = elf_map(interpreter, load_addr + vaddr,
463                                         eppnt, elf_prot, elf_type, total_size);
464                         total_size = 0;
465                         if (!*interp_map_addr)
466                                 *interp_map_addr = map_addr;
467                         error = map_addr;
468                         if (BAD_ADDR(map_addr))
469                                 goto out_close;
470
471                         if (!load_addr_set &&
472                             interp_elf_ex->e_type == ET_DYN) {
473                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
474                                 load_addr_set = 1;
475                         }
476
477                         /*
478                          * Check to see if the section's size will overflow the
479                          * allowed task size. Note that p_filesz must always be
480                          * <= p_memsize so it's only necessary to check p_memsz.
481                          */
482                         k = load_addr + eppnt->p_vaddr;
483                         if (BAD_ADDR(k) ||
484                             eppnt->p_filesz > eppnt->p_memsz ||
485                             eppnt->p_memsz > TASK_SIZE ||
486                             TASK_SIZE - eppnt->p_memsz < k) {
487                                 error = -ENOMEM;
488                                 goto out_close;
489                         }
490
491                         /*
492                          * Find the end of the file mapping for this phdr, and
493                          * keep track of the largest address we see for this.
494                          */
495                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
496                         if (k > elf_bss)
497                                 elf_bss = k;
498
499                         /*
500                          * Do the same thing for the memory mapping - between
501                          * elf_bss and last_bss is the bss section.
502                          */
503                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
504                         if (k > last_bss)
505                                 last_bss = k;
506                 }
507         }
508
509         if (last_bss > elf_bss) {
510                 /*
511                  * Now fill out the bss section.  First pad the last page up
512                  * to the page boundary, and then perform a mmap to make sure
513                  * that there are zero-mapped pages up to and including the
514                  * last bss page.
515                  */
516                 if (padzero(elf_bss)) {
517                         error = -EFAULT;
518                         goto out_close;
519                 }
520
521                 /* What we have mapped so far */
522                 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
523
524                 /* Map the last of the bss segment */
525                 error = vm_brk(elf_bss, last_bss - elf_bss);
526                 if (BAD_ADDR(error))
527                         goto out_close;
528         }
529
530         error = load_addr;
531
532 out_close:
533         kfree(elf_phdata);
534 out:
535         return error;
536 }
537
538 /*
539  * These are the functions used to load ELF style executables and shared
540  * libraries.  There is no binary dependent code anywhere else.
541  */
542
543 #define INTERPRETER_NONE 0
544 #define INTERPRETER_ELF 2
545
546 #ifndef STACK_RND_MASK
547 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
548 #endif
549
550 static unsigned long randomize_stack_top(unsigned long stack_top)
551 {
552         unsigned int random_variable = 0;
553
554         if ((current->flags & PF_RANDOMIZE) &&
555                 !(current->personality & ADDR_NO_RANDOMIZE)) {
556                 random_variable = get_random_int() & STACK_RND_MASK;
557                 random_variable <<= PAGE_SHIFT;
558         }
559 #ifdef CONFIG_STACK_GROWSUP
560         return PAGE_ALIGN(stack_top) + random_variable;
561 #else
562         return PAGE_ALIGN(stack_top) - random_variable;
563 #endif
564 }
565
566 static int load_elf_binary(struct linux_binprm *bprm)
567 {
568         struct file *interpreter = NULL; /* to shut gcc up */
569         unsigned long load_addr = 0, load_bias = 0;
570         int load_addr_set = 0;
571         char * elf_interpreter = NULL;
572         unsigned long error;
573         struct elf_phdr *elf_ppnt, *elf_phdata;
574         unsigned long elf_bss, elf_brk;
575         int retval, i;
576         unsigned int size;
577         unsigned long elf_entry;
578         unsigned long interp_load_addr = 0;
579         unsigned long start_code, end_code, start_data, end_data;
580         unsigned long reloc_func_desc __maybe_unused = 0;
581         int executable_stack = EXSTACK_DEFAULT;
582         unsigned long def_flags = 0;
583         struct pt_regs *regs = current_pt_regs();
584         struct {
585                 struct elfhdr elf_ex;
586                 struct elfhdr interp_elf_ex;
587         } *loc;
588
589         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
590         if (!loc) {
591                 retval = -ENOMEM;
592                 goto out_ret;
593         }
594         
595         /* Get the exec-header */
596         loc->elf_ex = *((struct elfhdr *)bprm->buf);
597
598         retval = -ENOEXEC;
599         /* First of all, some simple consistency checks */
600         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
601                 goto out;
602
603         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
604                 goto out;
605         if (!elf_check_arch(&loc->elf_ex))
606                 goto out;
607         if (!bprm->file->f_op || !bprm->file->f_op->mmap)
608                 goto out;
609
610         /* Now read in all of the header information */
611         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
612                 goto out;
613         if (loc->elf_ex.e_phnum < 1 ||
614                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
615                 goto out;
616         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
617         retval = -ENOMEM;
618         elf_phdata = kmalloc(size, GFP_KERNEL);
619         if (!elf_phdata)
620                 goto out;
621
622         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
623                              (char *)elf_phdata, size);
624         if (retval != size) {
625                 if (retval >= 0)
626                         retval = -EIO;
627                 goto out_free_ph;
628         }
629
630         elf_ppnt = elf_phdata;
631         elf_bss = 0;
632         elf_brk = 0;
633
634         start_code = ~0UL;
635         end_code = 0;
636         start_data = 0;
637         end_data = 0;
638
639         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
640                 if (elf_ppnt->p_type == PT_INTERP) {
641                         /* This is the program interpreter used for
642                          * shared libraries - for now assume that this
643                          * is an a.out format binary
644                          */
645                         retval = -ENOEXEC;
646                         if (elf_ppnt->p_filesz > PATH_MAX || 
647                             elf_ppnt->p_filesz < 2)
648                                 goto out_free_ph;
649
650                         retval = -ENOMEM;
651                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
652                                                   GFP_KERNEL);
653                         if (!elf_interpreter)
654                                 goto out_free_ph;
655
656                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
657                                              elf_interpreter,
658                                              elf_ppnt->p_filesz);
659                         if (retval != elf_ppnt->p_filesz) {
660                                 if (retval >= 0)
661                                         retval = -EIO;
662                                 goto out_free_interp;
663                         }
664                         /* make sure path is NULL terminated */
665                         retval = -ENOEXEC;
666                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
667                                 goto out_free_interp;
668
669                         interpreter = open_exec(elf_interpreter);
670                         retval = PTR_ERR(interpreter);
671                         if (IS_ERR(interpreter))
672                                 goto out_free_interp;
673
674                         /*
675                          * If the binary is not readable then enforce
676                          * mm->dumpable = 0 regardless of the interpreter's
677                          * permissions.
678                          */
679                         would_dump(bprm, interpreter);
680
681                         retval = kernel_read(interpreter, 0, bprm->buf,
682                                              BINPRM_BUF_SIZE);
683                         if (retval != BINPRM_BUF_SIZE) {
684                                 if (retval >= 0)
685                                         retval = -EIO;
686                                 goto out_free_dentry;
687                         }
688
689                         /* Get the exec headers */
690                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
691                         break;
692                 }
693                 elf_ppnt++;
694         }
695
696         elf_ppnt = elf_phdata;
697         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
698                 if (elf_ppnt->p_type == PT_GNU_STACK) {
699                         if (elf_ppnt->p_flags & PF_X)
700                                 executable_stack = EXSTACK_ENABLE_X;
701                         else
702                                 executable_stack = EXSTACK_DISABLE_X;
703                         break;
704                 }
705
706         /* Some simple consistency checks for the interpreter */
707         if (elf_interpreter) {
708                 retval = -ELIBBAD;
709                 /* Not an ELF interpreter */
710                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
711                         goto out_free_dentry;
712                 /* Verify the interpreter has a valid arch */
713                 if (!elf_check_arch(&loc->interp_elf_ex))
714                         goto out_free_dentry;
715         }
716
717         /* Flush all traces of the currently running executable */
718         retval = flush_old_exec(bprm);
719         if (retval)
720                 goto out_free_dentry;
721
722         /* OK, This is the point of no return */
723         current->mm->def_flags = def_flags;
724
725         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
726            may depend on the personality.  */
727         SET_PERSONALITY(loc->elf_ex);
728         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
729                 current->personality |= READ_IMPLIES_EXEC;
730
731         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
732                 current->flags |= PF_RANDOMIZE;
733
734         setup_new_exec(bprm);
735
736         /* Do this so that we can load the interpreter, if need be.  We will
737            change some of these later */
738         current->mm->free_area_cache = current->mm->mmap_base;
739         current->mm->cached_hole_size = 0;
740         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
741                                  executable_stack);
742         if (retval < 0) {
743                 send_sig(SIGKILL, current, 0);
744                 goto out_free_dentry;
745         }
746         
747         current->mm->start_stack = bprm->p;
748
749         /* Now we do a little grungy work by mmapping the ELF image into
750            the correct location in memory. */
751         for(i = 0, elf_ppnt = elf_phdata;
752             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
753                 int elf_prot = 0, elf_flags;
754                 unsigned long k, vaddr;
755
756                 if (elf_ppnt->p_type != PT_LOAD)
757                         continue;
758
759                 if (unlikely (elf_brk > elf_bss)) {
760                         unsigned long nbyte;
761                     
762                         /* There was a PT_LOAD segment with p_memsz > p_filesz
763                            before this one. Map anonymous pages, if needed,
764                            and clear the area.  */
765                         retval = set_brk(elf_bss + load_bias,
766                                          elf_brk + load_bias);
767                         if (retval) {
768                                 send_sig(SIGKILL, current, 0);
769                                 goto out_free_dentry;
770                         }
771                         nbyte = ELF_PAGEOFFSET(elf_bss);
772                         if (nbyte) {
773                                 nbyte = ELF_MIN_ALIGN - nbyte;
774                                 if (nbyte > elf_brk - elf_bss)
775                                         nbyte = elf_brk - elf_bss;
776                                 if (clear_user((void __user *)elf_bss +
777                                                         load_bias, nbyte)) {
778                                         /*
779                                          * This bss-zeroing can fail if the ELF
780                                          * file specifies odd protections. So
781                                          * we don't check the return value
782                                          */
783                                 }
784                         }
785                 }
786
787                 if (elf_ppnt->p_flags & PF_R)
788                         elf_prot |= PROT_READ;
789                 if (elf_ppnt->p_flags & PF_W)
790                         elf_prot |= PROT_WRITE;
791                 if (elf_ppnt->p_flags & PF_X)
792                         elf_prot |= PROT_EXEC;
793
794                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
795
796                 vaddr = elf_ppnt->p_vaddr;
797                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
798                         elf_flags |= MAP_FIXED;
799                 } else if (loc->elf_ex.e_type == ET_DYN) {
800                         /* Try and get dynamic programs out of the way of the
801                          * default mmap base, as well as whatever program they
802                          * might try to exec.  This is because the brk will
803                          * follow the loader, and is not movable.  */
804 #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
805                         /* Memory randomization might have been switched off
806                          * in runtime via sysctl.
807                          * If that is the case, retain the original non-zero
808                          * load_bias value in order to establish proper
809                          * non-randomized mappings.
810                          */
811                         if (current->flags & PF_RANDOMIZE)
812                                 load_bias = 0;
813                         else
814                                 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
815 #else
816                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
817 #endif
818                 }
819
820                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
821                                 elf_prot, elf_flags, 0);
822                 if (BAD_ADDR(error)) {
823                         send_sig(SIGKILL, current, 0);
824                         retval = IS_ERR((void *)error) ?
825                                 PTR_ERR((void*)error) : -EINVAL;
826                         goto out_free_dentry;
827                 }
828
829                 if (!load_addr_set) {
830                         load_addr_set = 1;
831                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
832                         if (loc->elf_ex.e_type == ET_DYN) {
833                                 load_bias += error -
834                                              ELF_PAGESTART(load_bias + vaddr);
835                                 load_addr += load_bias;
836                                 reloc_func_desc = load_bias;
837                         }
838                 }
839                 k = elf_ppnt->p_vaddr;
840                 if (k < start_code)
841                         start_code = k;
842                 if (start_data < k)
843                         start_data = k;
844
845                 /*
846                  * Check to see if the section's size will overflow the
847                  * allowed task size. Note that p_filesz must always be
848                  * <= p_memsz so it is only necessary to check p_memsz.
849                  */
850                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
851                     elf_ppnt->p_memsz > TASK_SIZE ||
852                     TASK_SIZE - elf_ppnt->p_memsz < k) {
853                         /* set_brk can never work. Avoid overflows. */
854                         send_sig(SIGKILL, current, 0);
855                         retval = -EINVAL;
856                         goto out_free_dentry;
857                 }
858
859                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
860
861                 if (k > elf_bss)
862                         elf_bss = k;
863                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
864                         end_code = k;
865                 if (end_data < k)
866                         end_data = k;
867                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
868                 if (k > elf_brk)
869                         elf_brk = k;
870         }
871
872         loc->elf_ex.e_entry += load_bias;
873         elf_bss += load_bias;
874         elf_brk += load_bias;
875         start_code += load_bias;
876         end_code += load_bias;
877         start_data += load_bias;
878         end_data += load_bias;
879
880         /* Calling set_brk effectively mmaps the pages that we need
881          * for the bss and break sections.  We must do this before
882          * mapping in the interpreter, to make sure it doesn't wind
883          * up getting placed where the bss needs to go.
884          */
885         retval = set_brk(elf_bss, elf_brk);
886         if (retval) {
887                 send_sig(SIGKILL, current, 0);
888                 goto out_free_dentry;
889         }
890         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
891                 send_sig(SIGSEGV, current, 0);
892                 retval = -EFAULT; /* Nobody gets to see this, but.. */
893                 goto out_free_dentry;
894         }
895
896         if (elf_interpreter) {
897                 unsigned long interp_map_addr = 0;
898
899                 elf_entry = load_elf_interp(&loc->interp_elf_ex,
900                                             interpreter,
901                                             &interp_map_addr,
902                                             load_bias);
903                 if (!IS_ERR((void *)elf_entry)) {
904                         /*
905                          * load_elf_interp() returns relocation
906                          * adjustment
907                          */
908                         interp_load_addr = elf_entry;
909                         elf_entry += loc->interp_elf_ex.e_entry;
910                 }
911                 if (BAD_ADDR(elf_entry)) {
912                         force_sig(SIGSEGV, current);
913                         retval = IS_ERR((void *)elf_entry) ?
914                                         (int)elf_entry : -EINVAL;
915                         goto out_free_dentry;
916                 }
917                 reloc_func_desc = interp_load_addr;
918
919                 allow_write_access(interpreter);
920                 fput(interpreter);
921                 kfree(elf_interpreter);
922         } else {
923                 elf_entry = loc->elf_ex.e_entry;
924                 if (BAD_ADDR(elf_entry)) {
925                         force_sig(SIGSEGV, current);
926                         retval = -EINVAL;
927                         goto out_free_dentry;
928                 }
929         }
930
931         kfree(elf_phdata);
932
933         set_binfmt(&elf_format);
934
935 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
936         retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
937         if (retval < 0) {
938                 send_sig(SIGKILL, current, 0);
939                 goto out;
940         }
941 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
942
943         install_exec_creds(bprm);
944         retval = create_elf_tables(bprm, &loc->elf_ex,
945                           load_addr, interp_load_addr);
946         if (retval < 0) {
947                 send_sig(SIGKILL, current, 0);
948                 goto out;
949         }
950         /* N.B. passed_fileno might not be initialized? */
951         current->mm->end_code = end_code;
952         current->mm->start_code = start_code;
953         current->mm->start_data = start_data;
954         current->mm->end_data = end_data;
955         current->mm->start_stack = bprm->p;
956
957 #ifdef arch_randomize_brk
958         if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
959                 current->mm->brk = current->mm->start_brk =
960                         arch_randomize_brk(current->mm);
961 #ifdef CONFIG_COMPAT_BRK
962                 current->brk_randomized = 1;
963 #endif
964         }
965 #endif
966
967         if (current->personality & MMAP_PAGE_ZERO) {
968                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
969                    and some applications "depend" upon this behavior.
970                    Since we do not have the power to recompile these, we
971                    emulate the SVr4 behavior. Sigh. */
972                 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
973                                 MAP_FIXED | MAP_PRIVATE, 0);
974         }
975
976 #ifdef ELF_PLAT_INIT
977         /*
978          * The ABI may specify that certain registers be set up in special
979          * ways (on i386 %edx is the address of a DT_FINI function, for
980          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
981          * that the e_entry field is the address of the function descriptor
982          * for the startup routine, rather than the address of the startup
983          * routine itself.  This macro performs whatever initialization to
984          * the regs structure is required as well as any relocations to the
985          * function descriptor entries when executing dynamically links apps.
986          */
987         ELF_PLAT_INIT(regs, reloc_func_desc);
988 #endif
989
990         start_thread(regs, elf_entry, bprm->p);
991         retval = 0;
992 out:
993         kfree(loc);
994 out_ret:
995         return retval;
996
997         /* error cleanup */
998 out_free_dentry:
999         allow_write_access(interpreter);
1000         if (interpreter)
1001                 fput(interpreter);
1002 out_free_interp:
1003         kfree(elf_interpreter);
1004 out_free_ph:
1005         kfree(elf_phdata);
1006         goto out;
1007 }
1008
1009 /* This is really simpleminded and specialized - we are loading an
1010    a.out library that is given an ELF header. */
1011 static int load_elf_library(struct file *file)
1012 {
1013         struct elf_phdr *elf_phdata;
1014         struct elf_phdr *eppnt;
1015         unsigned long elf_bss, bss, len;
1016         int retval, error, i, j;
1017         struct elfhdr elf_ex;
1018
1019         error = -ENOEXEC;
1020         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1021         if (retval != sizeof(elf_ex))
1022                 goto out;
1023
1024         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1025                 goto out;
1026
1027         /* First of all, some simple consistency checks */
1028         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1029             !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1030                 goto out;
1031
1032         /* Now read in all of the header information */
1033
1034         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1035         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1036
1037         error = -ENOMEM;
1038         elf_phdata = kmalloc(j, GFP_KERNEL);
1039         if (!elf_phdata)
1040                 goto out;
1041
1042         eppnt = elf_phdata;
1043         error = -ENOEXEC;
1044         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1045         if (retval != j)
1046                 goto out_free_ph;
1047
1048         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1049                 if ((eppnt + i)->p_type == PT_LOAD)
1050                         j++;
1051         if (j != 1)
1052                 goto out_free_ph;
1053
1054         while (eppnt->p_type != PT_LOAD)
1055                 eppnt++;
1056
1057         /* Now use mmap to map the library into memory. */
1058         error = vm_mmap(file,
1059                         ELF_PAGESTART(eppnt->p_vaddr),
1060                         (eppnt->p_filesz +
1061                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1062                         PROT_READ | PROT_WRITE | PROT_EXEC,
1063                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1064                         (eppnt->p_offset -
1065                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1066         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1067                 goto out_free_ph;
1068
1069         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1070         if (padzero(elf_bss)) {
1071                 error = -EFAULT;
1072                 goto out_free_ph;
1073         }
1074
1075         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1076                             ELF_MIN_ALIGN - 1);
1077         bss = eppnt->p_memsz + eppnt->p_vaddr;
1078         if (bss > len)
1079                 vm_brk(len, bss - len);
1080         error = 0;
1081
1082 out_free_ph:
1083         kfree(elf_phdata);
1084 out:
1085         return error;
1086 }
1087
1088 #ifdef CONFIG_ELF_CORE
1089 /*
1090  * ELF core dumper
1091  *
1092  * Modelled on fs/exec.c:aout_core_dump()
1093  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1094  */
1095
1096 /*
1097  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1098  * that are useful for post-mortem analysis are included in every core dump.
1099  * In that way we ensure that the core dump is fully interpretable later
1100  * without matching up the same kernel and hardware config to see what PC values
1101  * meant. These special mappings include - vDSO, vsyscall, and other
1102  * architecture specific mappings
1103  */
1104 static bool always_dump_vma(struct vm_area_struct *vma)
1105 {
1106         /* Any vsyscall mappings? */
1107         if (vma == get_gate_vma(vma->vm_mm))
1108                 return true;
1109         /*
1110          * arch_vma_name() returns non-NULL for special architecture mappings,
1111          * such as vDSO sections.
1112          */
1113         if (arch_vma_name(vma))
1114                 return true;
1115
1116         return false;
1117 }
1118
1119 /*
1120  * Decide what to dump of a segment, part, all or none.
1121  */
1122 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1123                                    unsigned long mm_flags)
1124 {
1125 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1126
1127         /* always dump the vdso and vsyscall sections */
1128         if (always_dump_vma(vma))
1129                 goto whole;
1130
1131         if (vma->vm_flags & VM_DONTDUMP)
1132                 return 0;
1133
1134         /* Hugetlb memory check */
1135         if (vma->vm_flags & VM_HUGETLB) {
1136                 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1137                         goto whole;
1138                 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1139                         goto whole;
1140         }
1141
1142         /* Do not dump I/O mapped devices or special mappings */
1143         if (vma->vm_flags & VM_IO)
1144                 return 0;
1145
1146         /* By default, dump shared memory if mapped from an anonymous file. */
1147         if (vma->vm_flags & VM_SHARED) {
1148                 if (file_inode(vma->vm_file)->i_nlink == 0 ?
1149                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1150                         goto whole;
1151                 return 0;
1152         }
1153
1154         /* Dump segments that have been written to.  */
1155         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1156                 goto whole;
1157         if (vma->vm_file == NULL)
1158                 return 0;
1159
1160         if (FILTER(MAPPED_PRIVATE))
1161                 goto whole;
1162
1163         /*
1164          * If this looks like the beginning of a DSO or executable mapping,
1165          * check for an ELF header.  If we find one, dump the first page to
1166          * aid in determining what was mapped here.
1167          */
1168         if (FILTER(ELF_HEADERS) &&
1169             vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1170                 u32 __user *header = (u32 __user *) vma->vm_start;
1171                 u32 word;
1172                 mm_segment_t fs = get_fs();
1173                 /*
1174                  * Doing it this way gets the constant folded by GCC.
1175                  */
1176                 union {
1177                         u32 cmp;
1178                         char elfmag[SELFMAG];
1179                 } magic;
1180                 BUILD_BUG_ON(SELFMAG != sizeof word);
1181                 magic.elfmag[EI_MAG0] = ELFMAG0;
1182                 magic.elfmag[EI_MAG1] = ELFMAG1;
1183                 magic.elfmag[EI_MAG2] = ELFMAG2;
1184                 magic.elfmag[EI_MAG3] = ELFMAG3;
1185                 /*
1186                  * Switch to the user "segment" for get_user(),
1187                  * then put back what elf_core_dump() had in place.
1188                  */
1189                 set_fs(USER_DS);
1190                 if (unlikely(get_user(word, header)))
1191                         word = 0;
1192                 set_fs(fs);
1193                 if (word == magic.cmp)
1194                         return PAGE_SIZE;
1195         }
1196
1197 #undef  FILTER
1198
1199         return 0;
1200
1201 whole:
1202         return vma->vm_end - vma->vm_start;
1203 }
1204
1205 /* An ELF note in memory */
1206 struct memelfnote
1207 {
1208         const char *name;
1209         int type;
1210         unsigned int datasz;
1211         void *data;
1212 };
1213
1214 static int notesize(struct memelfnote *en)
1215 {
1216         int sz;
1217
1218         sz = sizeof(struct elf_note);
1219         sz += roundup(strlen(en->name) + 1, 4);
1220         sz += roundup(en->datasz, 4);
1221
1222         return sz;
1223 }
1224
1225 #define DUMP_WRITE(addr, nr, foffset)   \
1226         do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1227
1228 static int alignfile(struct file *file, loff_t *foffset)
1229 {
1230         static const char buf[4] = { 0, };
1231         DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1232         return 1;
1233 }
1234
1235 static int writenote(struct memelfnote *men, struct file *file,
1236                         loff_t *foffset)
1237 {
1238         struct elf_note en;
1239         en.n_namesz = strlen(men->name) + 1;
1240         en.n_descsz = men->datasz;
1241         en.n_type = men->type;
1242
1243         DUMP_WRITE(&en, sizeof(en), foffset);
1244         DUMP_WRITE(men->name, en.n_namesz, foffset);
1245         if (!alignfile(file, foffset))
1246                 return 0;
1247         DUMP_WRITE(men->data, men->datasz, foffset);
1248         if (!alignfile(file, foffset))
1249                 return 0;
1250
1251         return 1;
1252 }
1253 #undef DUMP_WRITE
1254
1255 static void fill_elf_header(struct elfhdr *elf, int segs,
1256                             u16 machine, u32 flags)
1257 {
1258         memset(elf, 0, sizeof(*elf));
1259
1260         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1261         elf->e_ident[EI_CLASS] = ELF_CLASS;
1262         elf->e_ident[EI_DATA] = ELF_DATA;
1263         elf->e_ident[EI_VERSION] = EV_CURRENT;
1264         elf->e_ident[EI_OSABI] = ELF_OSABI;
1265
1266         elf->e_type = ET_CORE;
1267         elf->e_machine = machine;
1268         elf->e_version = EV_CURRENT;
1269         elf->e_phoff = sizeof(struct elfhdr);
1270         elf->e_flags = flags;
1271         elf->e_ehsize = sizeof(struct elfhdr);
1272         elf->e_phentsize = sizeof(struct elf_phdr);
1273         elf->e_phnum = segs;
1274
1275         return;
1276 }
1277
1278 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1279 {
1280         phdr->p_type = PT_NOTE;
1281         phdr->p_offset = offset;
1282         phdr->p_vaddr = 0;
1283         phdr->p_paddr = 0;
1284         phdr->p_filesz = sz;
1285         phdr->p_memsz = 0;
1286         phdr->p_flags = 0;
1287         phdr->p_align = 0;
1288         return;
1289 }
1290
1291 static void fill_note(struct memelfnote *note, const char *name, int type, 
1292                 unsigned int sz, void *data)
1293 {
1294         note->name = name;
1295         note->type = type;
1296         note->datasz = sz;
1297         note->data = data;
1298         return;
1299 }
1300
1301 /*
1302  * fill up all the fields in prstatus from the given task struct, except
1303  * registers which need to be filled up separately.
1304  */
1305 static void fill_prstatus(struct elf_prstatus *prstatus,
1306                 struct task_struct *p, long signr)
1307 {
1308         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1309         prstatus->pr_sigpend = p->pending.signal.sig[0];
1310         prstatus->pr_sighold = p->blocked.sig[0];
1311         rcu_read_lock();
1312         prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1313         rcu_read_unlock();
1314         prstatus->pr_pid = task_pid_vnr(p);
1315         prstatus->pr_pgrp = task_pgrp_vnr(p);
1316         prstatus->pr_sid = task_session_vnr(p);
1317         if (thread_group_leader(p)) {
1318                 struct task_cputime cputime;
1319
1320                 /*
1321                  * This is the record for the group leader.  It shows the
1322                  * group-wide total, not its individual thread total.
1323                  */
1324                 thread_group_cputime(p, &cputime);
1325                 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1326                 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1327         } else {
1328                 cputime_t utime, stime;
1329
1330                 task_cputime(p, &utime, &stime);
1331                 cputime_to_timeval(utime, &prstatus->pr_utime);
1332                 cputime_to_timeval(stime, &prstatus->pr_stime);
1333         }
1334         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1335         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1336 }
1337
1338 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1339                        struct mm_struct *mm)
1340 {
1341         const struct cred *cred;
1342         unsigned int i, len;
1343         
1344         /* first copy the parameters from user space */
1345         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1346
1347         len = mm->arg_end - mm->arg_start;
1348         if (len >= ELF_PRARGSZ)
1349                 len = ELF_PRARGSZ-1;
1350         if (copy_from_user(&psinfo->pr_psargs,
1351                            (const char __user *)mm->arg_start, len))
1352                 return -EFAULT;
1353         for(i = 0; i < len; i++)
1354                 if (psinfo->pr_psargs[i] == 0)
1355                         psinfo->pr_psargs[i] = ' ';
1356         psinfo->pr_psargs[len] = 0;
1357
1358         rcu_read_lock();
1359         psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1360         rcu_read_unlock();
1361         psinfo->pr_pid = task_pid_vnr(p);
1362         psinfo->pr_pgrp = task_pgrp_vnr(p);
1363         psinfo->pr_sid = task_session_vnr(p);
1364
1365         i = p->state ? ffz(~p->state) + 1 : 0;
1366         psinfo->pr_state = i;
1367         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1368         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1369         psinfo->pr_nice = task_nice(p);
1370         psinfo->pr_flag = p->flags;
1371         rcu_read_lock();
1372         cred = __task_cred(p);
1373         SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1374         SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1375         rcu_read_unlock();
1376         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1377         
1378         return 0;
1379 }
1380
1381 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1382 {
1383         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1384         int i = 0;
1385         do
1386                 i += 2;
1387         while (auxv[i - 2] != AT_NULL);
1388         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1389 }
1390
1391 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1392                 siginfo_t *siginfo)
1393 {
1394         mm_segment_t old_fs = get_fs();
1395         set_fs(KERNEL_DS);
1396         copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1397         set_fs(old_fs);
1398         fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1399 }
1400
1401 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1402 /*
1403  * Format of NT_FILE note:
1404  *
1405  * long count     -- how many files are mapped
1406  * long page_size -- units for file_ofs
1407  * array of [COUNT] elements of
1408  *   long start
1409  *   long end
1410  *   long file_ofs
1411  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1412  */
1413 static void fill_files_note(struct memelfnote *note)
1414 {
1415         struct vm_area_struct *vma;
1416         unsigned count, size, names_ofs, remaining, n;
1417         user_long_t *data;
1418         user_long_t *start_end_ofs;
1419         char *name_base, *name_curpos;
1420
1421         /* *Estimated* file count and total data size needed */
1422         count = current->mm->map_count;
1423         size = count * 64;
1424
1425         names_ofs = (2 + 3 * count) * sizeof(data[0]);
1426  alloc:
1427         if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1428                 goto err;
1429         size = round_up(size, PAGE_SIZE);
1430         data = vmalloc(size);
1431         if (!data)
1432                 goto err;
1433
1434         start_end_ofs = data + 2;
1435         name_base = name_curpos = ((char *)data) + names_ofs;
1436         remaining = size - names_ofs;
1437         count = 0;
1438         for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1439                 struct file *file;
1440                 const char *filename;
1441
1442                 file = vma->vm_file;
1443                 if (!file)
1444                         continue;
1445                 filename = d_path(&file->f_path, name_curpos, remaining);
1446                 if (IS_ERR(filename)) {
1447                         if (PTR_ERR(filename) == -ENAMETOOLONG) {
1448                                 vfree(data);
1449                                 size = size * 5 / 4;
1450                                 goto alloc;
1451                         }
1452                         continue;
1453                 }
1454
1455                 /* d_path() fills at the end, move name down */
1456                 /* n = strlen(filename) + 1: */
1457                 n = (name_curpos + remaining) - filename;
1458                 remaining = filename - name_curpos;
1459                 memmove(name_curpos, filename, n);
1460                 name_curpos += n;
1461
1462                 *start_end_ofs++ = vma->vm_start;
1463                 *start_end_ofs++ = vma->vm_end;
1464                 *start_end_ofs++ = vma->vm_pgoff;
1465                 count++;
1466         }
1467
1468         /* Now we know exact count of files, can store it */
1469         data[0] = count;
1470         data[1] = PAGE_SIZE;
1471         /*
1472          * Count usually is less than current->mm->map_count,
1473          * we need to move filenames down.
1474          */
1475         n = current->mm->map_count - count;
1476         if (n != 0) {
1477                 unsigned shift_bytes = n * 3 * sizeof(data[0]);
1478                 memmove(name_base - shift_bytes, name_base,
1479                         name_curpos - name_base);
1480                 name_curpos -= shift_bytes;
1481         }
1482
1483         size = name_curpos - (char *)data;
1484         fill_note(note, "CORE", NT_FILE, size, data);
1485  err: ;
1486 }
1487
1488 #ifdef CORE_DUMP_USE_REGSET
1489 #include <linux/regset.h>
1490
1491 struct elf_thread_core_info {
1492         struct elf_thread_core_info *next;
1493         struct task_struct *task;
1494         struct elf_prstatus prstatus;
1495         struct memelfnote notes[0];
1496 };
1497
1498 struct elf_note_info {
1499         struct elf_thread_core_info *thread;
1500         struct memelfnote psinfo;
1501         struct memelfnote signote;
1502         struct memelfnote auxv;
1503         struct memelfnote files;
1504         user_siginfo_t csigdata;
1505         size_t size;
1506         int thread_notes;
1507 };
1508
1509 /*
1510  * When a regset has a writeback hook, we call it on each thread before
1511  * dumping user memory.  On register window machines, this makes sure the
1512  * user memory backing the register data is up to date before we read it.
1513  */
1514 static void do_thread_regset_writeback(struct task_struct *task,
1515                                        const struct user_regset *regset)
1516 {
1517         if (regset->writeback)
1518                 regset->writeback(task, regset, 1);
1519 }
1520
1521 #ifndef PR_REG_SIZE
1522 #define PR_REG_SIZE(S) sizeof(S)
1523 #endif
1524
1525 #ifndef PRSTATUS_SIZE
1526 #define PRSTATUS_SIZE(S) sizeof(S)
1527 #endif
1528
1529 #ifndef PR_REG_PTR
1530 #define PR_REG_PTR(S) (&((S)->pr_reg))
1531 #endif
1532
1533 #ifndef SET_PR_FPVALID
1534 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1535 #endif
1536
1537 static int fill_thread_core_info(struct elf_thread_core_info *t,
1538                                  const struct user_regset_view *view,
1539                                  long signr, size_t *total)
1540 {
1541         unsigned int i;
1542
1543         /*
1544          * NT_PRSTATUS is the one special case, because the regset data
1545          * goes into the pr_reg field inside the note contents, rather
1546          * than being the whole note contents.  We fill the reset in here.
1547          * We assume that regset 0 is NT_PRSTATUS.
1548          */
1549         fill_prstatus(&t->prstatus, t->task, signr);
1550         (void) view->regsets[0].get(t->task, &view->regsets[0],
1551                                     0, PR_REG_SIZE(t->prstatus.pr_reg),
1552                                     PR_REG_PTR(&t->prstatus), NULL);
1553
1554         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1555                   PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1556         *total += notesize(&t->notes[0]);
1557
1558         do_thread_regset_writeback(t->task, &view->regsets[0]);
1559
1560         /*
1561          * Each other regset might generate a note too.  For each regset
1562          * that has no core_note_type or is inactive, we leave t->notes[i]
1563          * all zero and we'll know to skip writing it later.
1564          */
1565         for (i = 1; i < view->n; ++i) {
1566                 const struct user_regset *regset = &view->regsets[i];
1567                 do_thread_regset_writeback(t->task, regset);
1568                 if (regset->core_note_type && regset->get &&
1569                     (!regset->active || regset->active(t->task, regset))) {
1570                         int ret;
1571                         size_t size = regset->n * regset->size;
1572                         void *data = kmalloc(size, GFP_KERNEL);
1573                         if (unlikely(!data))
1574                                 return 0;
1575                         ret = regset->get(t->task, regset,
1576                                           0, size, data, NULL);
1577                         if (unlikely(ret))
1578                                 kfree(data);
1579                         else {
1580                                 if (regset->core_note_type != NT_PRFPREG)
1581                                         fill_note(&t->notes[i], "LINUX",
1582                                                   regset->core_note_type,
1583                                                   size, data);
1584                                 else {
1585                                         SET_PR_FPVALID(&t->prstatus, 1);
1586                                         fill_note(&t->notes[i], "CORE",
1587                                                   NT_PRFPREG, size, data);
1588                                 }
1589                                 *total += notesize(&t->notes[i]);
1590                         }
1591                 }
1592         }
1593
1594         return 1;
1595 }
1596
1597 static int fill_note_info(struct elfhdr *elf, int phdrs,
1598                           struct elf_note_info *info,
1599                           siginfo_t *siginfo, struct pt_regs *regs)
1600 {
1601         struct task_struct *dump_task = current;
1602         const struct user_regset_view *view = task_user_regset_view(dump_task);
1603         struct elf_thread_core_info *t;
1604         struct elf_prpsinfo *psinfo;
1605         struct core_thread *ct;
1606         unsigned int i;
1607
1608         info->size = 0;
1609         info->thread = NULL;
1610
1611         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1612         if (psinfo == NULL) {
1613                 info->psinfo.data = NULL; /* So we don't free this wrongly */
1614                 return 0;
1615         }
1616
1617         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1618
1619         /*
1620          * Figure out how many notes we're going to need for each thread.
1621          */
1622         info->thread_notes = 0;
1623         for (i = 0; i < view->n; ++i)
1624                 if (view->regsets[i].core_note_type != 0)
1625                         ++info->thread_notes;
1626
1627         /*
1628          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1629          * since it is our one special case.
1630          */
1631         if (unlikely(info->thread_notes == 0) ||
1632             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1633                 WARN_ON(1);
1634                 return 0;
1635         }
1636
1637         /*
1638          * Initialize the ELF file header.
1639          */
1640         fill_elf_header(elf, phdrs,
1641                         view->e_machine, view->e_flags);
1642
1643         /*
1644          * Allocate a structure for each thread.
1645          */
1646         for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1647                 t = kzalloc(offsetof(struct elf_thread_core_info,
1648                                      notes[info->thread_notes]),
1649                             GFP_KERNEL);
1650                 if (unlikely(!t))
1651                         return 0;
1652
1653                 t->task = ct->task;
1654                 if (ct->task == dump_task || !info->thread) {
1655                         t->next = info->thread;
1656                         info->thread = t;
1657                 } else {
1658                         /*
1659                          * Make sure to keep the original task at
1660                          * the head of the list.
1661                          */
1662                         t->next = info->thread->next;
1663                         info->thread->next = t;
1664                 }
1665         }
1666
1667         /*
1668          * Now fill in each thread's information.
1669          */
1670         for (t = info->thread; t != NULL; t = t->next)
1671                 if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1672                         return 0;
1673
1674         /*
1675          * Fill in the two process-wide notes.
1676          */
1677         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1678         info->size += notesize(&info->psinfo);
1679
1680         fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1681         info->size += notesize(&info->signote);
1682
1683         fill_auxv_note(&info->auxv, current->mm);
1684         info->size += notesize(&info->auxv);
1685
1686         fill_files_note(&info->files);
1687         info->size += notesize(&info->files);
1688
1689         return 1;
1690 }
1691
1692 static size_t get_note_info_size(struct elf_note_info *info)
1693 {
1694         return info->size;
1695 }
1696
1697 /*
1698  * Write all the notes for each thread.  When writing the first thread, the
1699  * process-wide notes are interleaved after the first thread-specific note.
1700  */
1701 static int write_note_info(struct elf_note_info *info,
1702                            struct file *file, loff_t *foffset)
1703 {
1704         bool first = 1;
1705         struct elf_thread_core_info *t = info->thread;
1706
1707         do {
1708                 int i;
1709
1710                 if (!writenote(&t->notes[0], file, foffset))
1711                         return 0;
1712
1713                 if (first && !writenote(&info->psinfo, file, foffset))
1714                         return 0;
1715                 if (first && !writenote(&info->signote, file, foffset))
1716                         return 0;
1717                 if (first && !writenote(&info->auxv, file, foffset))
1718                         return 0;
1719                 if (first && !writenote(&info->files, file, foffset))
1720                         return 0;
1721
1722                 for (i = 1; i < info->thread_notes; ++i)
1723                         if (t->notes[i].data &&
1724                             !writenote(&t->notes[i], file, foffset))
1725                                 return 0;
1726
1727                 first = 0;
1728                 t = t->next;
1729         } while (t);
1730
1731         return 1;
1732 }
1733
1734 static void free_note_info(struct elf_note_info *info)
1735 {
1736         struct elf_thread_core_info *threads = info->thread;
1737         while (threads) {
1738                 unsigned int i;
1739                 struct elf_thread_core_info *t = threads;
1740                 threads = t->next;
1741                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1742                 for (i = 1; i < info->thread_notes; ++i)
1743                         kfree(t->notes[i].data);
1744                 kfree(t);
1745         }
1746         kfree(info->psinfo.data);
1747         vfree(info->files.data);
1748 }
1749
1750 #else
1751
1752 /* Here is the structure in which status of each thread is captured. */
1753 struct elf_thread_status
1754 {
1755         struct list_head list;
1756         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1757         elf_fpregset_t fpu;             /* NT_PRFPREG */
1758         struct task_struct *thread;
1759 #ifdef ELF_CORE_COPY_XFPREGS
1760         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1761 #endif
1762         struct memelfnote notes[3];
1763         int num_notes;
1764 };
1765
1766 /*
1767  * In order to add the specific thread information for the elf file format,
1768  * we need to keep a linked list of every threads pr_status and then create
1769  * a single section for them in the final core file.
1770  */
1771 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1772 {
1773         int sz = 0;
1774         struct task_struct *p = t->thread;
1775         t->num_notes = 0;
1776
1777         fill_prstatus(&t->prstatus, p, signr);
1778         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1779         
1780         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1781                   &(t->prstatus));
1782         t->num_notes++;
1783         sz += notesize(&t->notes[0]);
1784
1785         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1786                                                                 &t->fpu))) {
1787                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1788                           &(t->fpu));
1789                 t->num_notes++;
1790                 sz += notesize(&t->notes[1]);
1791         }
1792
1793 #ifdef ELF_CORE_COPY_XFPREGS
1794         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1795                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1796                           sizeof(t->xfpu), &t->xfpu);
1797                 t->num_notes++;
1798                 sz += notesize(&t->notes[2]);
1799         }
1800 #endif  
1801         return sz;
1802 }
1803
1804 struct elf_note_info {
1805         struct memelfnote *notes;
1806         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1807         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1808         struct list_head thread_list;
1809         elf_fpregset_t *fpu;
1810 #ifdef ELF_CORE_COPY_XFPREGS
1811         elf_fpxregset_t *xfpu;
1812 #endif
1813         user_siginfo_t csigdata;
1814         int thread_status_size;
1815         int numnote;
1816 };
1817
1818 static int elf_note_info_init(struct elf_note_info *info)
1819 {
1820         memset(info, 0, sizeof(*info));
1821         INIT_LIST_HEAD(&info->thread_list);
1822
1823         /* Allocate space for ELF notes */
1824         info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1825         if (!info->notes)
1826                 return 0;
1827         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1828         if (!info->psinfo)
1829                 return 0;
1830         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1831         if (!info->prstatus)
1832                 return 0;
1833         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1834         if (!info->fpu)
1835                 return 0;
1836 #ifdef ELF_CORE_COPY_XFPREGS
1837         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1838         if (!info->xfpu)
1839                 return 0;
1840 #endif
1841         return 1;
1842 }
1843
1844 static int fill_note_info(struct elfhdr *elf, int phdrs,
1845                           struct elf_note_info *info,
1846                           siginfo_t *siginfo, struct pt_regs *regs)
1847 {
1848         struct list_head *t;
1849
1850         if (!elf_note_info_init(info))
1851                 return 0;
1852
1853         if (siginfo->si_signo) {
1854                 struct core_thread *ct;
1855                 struct elf_thread_status *ets;
1856
1857                 for (ct = current->mm->core_state->dumper.next;
1858                                                 ct; ct = ct->next) {
1859                         ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1860                         if (!ets)
1861                                 return 0;
1862
1863                         ets->thread = ct->task;
1864                         list_add(&ets->list, &info->thread_list);
1865                 }
1866
1867                 list_for_each(t, &info->thread_list) {
1868                         int sz;
1869
1870                         ets = list_entry(t, struct elf_thread_status, list);
1871                         sz = elf_dump_thread_status(siginfo->si_signo, ets);
1872                         info->thread_status_size += sz;
1873                 }
1874         }
1875         /* now collect the dump for the current */
1876         memset(info->prstatus, 0, sizeof(*info->prstatus));
1877         fill_prstatus(info->prstatus, current, siginfo->si_signo);
1878         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1879
1880         /* Set up header */
1881         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1882
1883         /*
1884          * Set up the notes in similar form to SVR4 core dumps made
1885          * with info from their /proc.
1886          */
1887
1888         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1889                   sizeof(*info->prstatus), info->prstatus);
1890         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1891         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1892                   sizeof(*info->psinfo), info->psinfo);
1893
1894         fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1895         fill_auxv_note(info->notes + 3, current->mm);
1896         fill_files_note(info->notes + 4);
1897
1898         info->numnote = 5;
1899
1900         /* Try to dump the FPU. */
1901         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1902                                                                info->fpu);
1903         if (info->prstatus->pr_fpvalid)
1904                 fill_note(info->notes + info->numnote++,
1905                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1906 #ifdef ELF_CORE_COPY_XFPREGS
1907         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1908                 fill_note(info->notes + info->numnote++,
1909                           "LINUX", ELF_CORE_XFPREG_TYPE,
1910                           sizeof(*info->xfpu), info->xfpu);
1911 #endif
1912
1913         return 1;
1914 }
1915
1916 static size_t get_note_info_size(struct elf_note_info *info)
1917 {
1918         int sz = 0;
1919         int i;
1920
1921         for (i = 0; i < info->numnote; i++)
1922                 sz += notesize(info->notes + i);
1923
1924         sz += info->thread_status_size;
1925
1926         return sz;
1927 }
1928
1929 static int write_note_info(struct elf_note_info *info,
1930                            struct file *file, loff_t *foffset)
1931 {
1932         int i;
1933         struct list_head *t;
1934
1935         for (i = 0; i < info->numnote; i++)
1936                 if (!writenote(info->notes + i, file, foffset))
1937                         return 0;
1938
1939         /* write out the thread status notes section */
1940         list_for_each(t, &info->thread_list) {
1941                 struct elf_thread_status *tmp =
1942                                 list_entry(t, struct elf_thread_status, list);
1943
1944                 for (i = 0; i < tmp->num_notes; i++)
1945                         if (!writenote(&tmp->notes[i], file, foffset))
1946                                 return 0;
1947         }
1948
1949         return 1;
1950 }
1951
1952 static void free_note_info(struct elf_note_info *info)
1953 {
1954         while (!list_empty(&info->thread_list)) {
1955                 struct list_head *tmp = info->thread_list.next;
1956                 list_del(tmp);
1957                 kfree(list_entry(tmp, struct elf_thread_status, list));
1958         }
1959
1960         /* Free data allocated by fill_files_note(): */
1961         vfree(info->notes[4].data);
1962
1963         kfree(info->prstatus);
1964         kfree(info->psinfo);
1965         kfree(info->notes);
1966         kfree(info->fpu);
1967 #ifdef ELF_CORE_COPY_XFPREGS
1968         kfree(info->xfpu);
1969 #endif
1970 }
1971
1972 #endif
1973
1974 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1975                                         struct vm_area_struct *gate_vma)
1976 {
1977         struct vm_area_struct *ret = tsk->mm->mmap;
1978
1979         if (ret)
1980                 return ret;
1981         return gate_vma;
1982 }
1983 /*
1984  * Helper function for iterating across a vma list.  It ensures that the caller
1985  * will visit `gate_vma' prior to terminating the search.
1986  */
1987 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1988                                         struct vm_area_struct *gate_vma)
1989 {
1990         struct vm_area_struct *ret;
1991
1992         ret = this_vma->vm_next;
1993         if (ret)
1994                 return ret;
1995         if (this_vma == gate_vma)
1996                 return NULL;
1997         return gate_vma;
1998 }
1999
2000 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2001                              elf_addr_t e_shoff, int segs)
2002 {
2003         elf->e_shoff = e_shoff;
2004         elf->e_shentsize = sizeof(*shdr4extnum);
2005         elf->e_shnum = 1;
2006         elf->e_shstrndx = SHN_UNDEF;
2007
2008         memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2009
2010         shdr4extnum->sh_type = SHT_NULL;
2011         shdr4extnum->sh_size = elf->e_shnum;
2012         shdr4extnum->sh_link = elf->e_shstrndx;
2013         shdr4extnum->sh_info = segs;
2014 }
2015
2016 static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
2017                                      unsigned long mm_flags)
2018 {
2019         struct vm_area_struct *vma;
2020         size_t size = 0;
2021
2022         for (vma = first_vma(current, gate_vma); vma != NULL;
2023              vma = next_vma(vma, gate_vma))
2024                 size += vma_dump_size(vma, mm_flags);
2025         return size;
2026 }
2027
2028 /*
2029  * Actual dumper
2030  *
2031  * This is a two-pass process; first we find the offsets of the bits,
2032  * and then they are actually written out.  If we run out of core limit
2033  * we just truncate.
2034  */
2035 static int elf_core_dump(struct coredump_params *cprm)
2036 {
2037         int has_dumped = 0;
2038         mm_segment_t fs;
2039         int segs;
2040         size_t size = 0;
2041         struct vm_area_struct *vma, *gate_vma;
2042         struct elfhdr *elf = NULL;
2043         loff_t offset = 0, dataoff, foffset;
2044         struct elf_note_info info;
2045         struct elf_phdr *phdr4note = NULL;
2046         struct elf_shdr *shdr4extnum = NULL;
2047         Elf_Half e_phnum;
2048         elf_addr_t e_shoff;
2049
2050         /*
2051          * We no longer stop all VM operations.
2052          * 
2053          * This is because those proceses that could possibly change map_count
2054          * or the mmap / vma pages are now blocked in do_exit on current
2055          * finishing this core dump.
2056          *
2057          * Only ptrace can touch these memory addresses, but it doesn't change
2058          * the map_count or the pages allocated. So no possibility of crashing
2059          * exists while dumping the mm->vm_next areas to the core file.
2060          */
2061   
2062         /* alloc memory for large data structures: too large to be on stack */
2063         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2064         if (!elf)
2065                 goto out;
2066         /*
2067          * The number of segs are recored into ELF header as 16bit value.
2068          * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2069          */
2070         segs = current->mm->map_count;
2071         segs += elf_core_extra_phdrs();
2072
2073         gate_vma = get_gate_vma(current->mm);
2074         if (gate_vma != NULL)
2075                 segs++;
2076
2077         /* for notes section */
2078         segs++;
2079
2080         /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2081          * this, kernel supports extended numbering. Have a look at
2082          * include/linux/elf.h for further information. */
2083         e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2084
2085         /*
2086          * Collect all the non-memory information about the process for the
2087          * notes.  This also sets up the file header.
2088          */
2089         if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2090                 goto cleanup;
2091
2092         has_dumped = 1;
2093         current->flags |= PF_DUMPCORE;
2094   
2095         fs = get_fs();
2096         set_fs(KERNEL_DS);
2097
2098         offset += sizeof(*elf);                         /* Elf header */
2099         offset += segs * sizeof(struct elf_phdr);       /* Program headers */
2100         foffset = offset;
2101
2102         /* Write notes phdr entry */
2103         {
2104                 size_t sz = get_note_info_size(&info);
2105
2106                 sz += elf_coredump_extra_notes_size();
2107
2108                 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2109                 if (!phdr4note)
2110                         goto end_coredump;
2111
2112                 fill_elf_note_phdr(phdr4note, sz, offset);
2113                 offset += sz;
2114         }
2115
2116         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2117
2118         offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
2119         offset += elf_core_extra_data_size();
2120         e_shoff = offset;
2121
2122         if (e_phnum == PN_XNUM) {
2123                 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2124                 if (!shdr4extnum)
2125                         goto end_coredump;
2126                 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2127         }
2128
2129         offset = dataoff;
2130
2131         size += sizeof(*elf);
2132         if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
2133                 goto end_coredump;
2134
2135         size += sizeof(*phdr4note);
2136         if (size > cprm->limit
2137             || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
2138                 goto end_coredump;
2139
2140         /* Write program headers for segments dump */
2141         for (vma = first_vma(current, gate_vma); vma != NULL;
2142                         vma = next_vma(vma, gate_vma)) {
2143                 struct elf_phdr phdr;
2144
2145                 phdr.p_type = PT_LOAD;
2146                 phdr.p_offset = offset;
2147                 phdr.p_vaddr = vma->vm_start;
2148                 phdr.p_paddr = 0;
2149                 phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2150                 phdr.p_memsz = vma->vm_end - vma->vm_start;
2151                 offset += phdr.p_filesz;
2152                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2153                 if (vma->vm_flags & VM_WRITE)
2154                         phdr.p_flags |= PF_W;
2155                 if (vma->vm_flags & VM_EXEC)
2156                         phdr.p_flags |= PF_X;
2157                 phdr.p_align = ELF_EXEC_PAGESIZE;
2158
2159                 size += sizeof(phdr);
2160                 if (size > cprm->limit
2161                     || !dump_write(cprm->file, &phdr, sizeof(phdr)))
2162                         goto end_coredump;
2163         }
2164
2165         if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
2166                 goto end_coredump;
2167
2168         /* write out the notes section */
2169         if (!write_note_info(&info, cprm->file, &foffset))
2170                 goto end_coredump;
2171
2172         if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2173                 goto end_coredump;
2174
2175         /* Align to page */
2176         if (!dump_seek(cprm->file, dataoff - foffset))
2177                 goto end_coredump;
2178
2179         for (vma = first_vma(current, gate_vma); vma != NULL;
2180                         vma = next_vma(vma, gate_vma)) {
2181                 unsigned long addr;
2182                 unsigned long end;
2183
2184                 end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2185
2186                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2187                         struct page *page;
2188                         int stop;
2189
2190                         page = get_dump_page(addr);
2191                         if (page) {
2192                                 void *kaddr = kmap(page);
2193                                 stop = ((size += PAGE_SIZE) > cprm->limit) ||
2194                                         !dump_write(cprm->file, kaddr,
2195                                                     PAGE_SIZE);
2196                                 kunmap(page);
2197                                 page_cache_release(page);
2198                         } else
2199                                 stop = !dump_seek(cprm->file, PAGE_SIZE);
2200                         if (stop)
2201                                 goto end_coredump;
2202                 }
2203         }
2204
2205         if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
2206                 goto end_coredump;
2207
2208         if (e_phnum == PN_XNUM) {
2209                 size += sizeof(*shdr4extnum);
2210                 if (size > cprm->limit
2211                     || !dump_write(cprm->file, shdr4extnum,
2212                                    sizeof(*shdr4extnum)))
2213                         goto end_coredump;
2214         }
2215
2216 end_coredump:
2217         set_fs(fs);
2218
2219 cleanup:
2220         free_note_info(&info);
2221         kfree(shdr4extnum);
2222         kfree(phdr4note);
2223         kfree(elf);
2224 out:
2225         return has_dumped;
2226 }
2227
2228 #endif          /* CONFIG_ELF_CORE */
2229
2230 static int __init init_elf_binfmt(void)
2231 {
2232         register_binfmt(&elf_format);
2233         return 0;
2234 }
2235
2236 static void __exit exit_elf_binfmt(void)
2237 {
2238         /* Remove the COFF and ELF loaders. */
2239         unregister_binfmt(&elf_format);
2240 }
2241
2242 core_initcall(init_elf_binfmt);
2243 module_exit(exit_elf_binfmt);
2244 MODULE_LICENSE("GPL");