]> Pileus Git - ~andy/linux/blob - kernel/power/snapshot.c
Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
[~andy/linux] / kernel / power / snapshot.c
1 /*
2  * linux/kernel/power/snapshot.c
3  *
4  * This file provide system snapshot/restore functionality.
5  *
6  * Copyright (C) 1998-2005 Pavel Machek <pavel@suse.cz>
7  *
8  * This file is released under the GPLv2, and is based on swsusp.c.
9  *
10  */
11
12
13 #include <linux/version.h>
14 #include <linux/module.h>
15 #include <linux/mm.h>
16 #include <linux/suspend.h>
17 #include <linux/smp_lock.h>
18 #include <linux/delay.h>
19 #include <linux/bitops.h>
20 #include <linux/spinlock.h>
21 #include <linux/kernel.h>
22 #include <linux/pm.h>
23 #include <linux/device.h>
24 #include <linux/bootmem.h>
25 #include <linux/syscalls.h>
26 #include <linux/console.h>
27 #include <linux/highmem.h>
28
29 #include <asm/uaccess.h>
30 #include <asm/mmu_context.h>
31 #include <asm/pgtable.h>
32 #include <asm/tlbflush.h>
33 #include <asm/io.h>
34
35 #include "power.h"
36
37 struct pbe *pagedir_nosave;
38 static unsigned int nr_copy_pages;
39 static unsigned int nr_meta_pages;
40 static unsigned long *buffer;
41
42 struct arch_saveable_page {
43         unsigned long start;
44         unsigned long end;
45         char *data;
46         struct arch_saveable_page *next;
47 };
48 static struct arch_saveable_page *arch_pages;
49
50 int swsusp_add_arch_pages(unsigned long start, unsigned long end)
51 {
52         struct arch_saveable_page *tmp;
53
54         while (start < end) {
55                 tmp = kzalloc(sizeof(struct arch_saveable_page), GFP_KERNEL);
56                 if (!tmp)
57                         return -ENOMEM;
58                 tmp->start = start;
59                 tmp->end = ((start >> PAGE_SHIFT) + 1) << PAGE_SHIFT;
60                 if (tmp->end > end)
61                         tmp->end = end;
62                 tmp->next = arch_pages;
63                 start = tmp->end;
64                 arch_pages = tmp;
65         }
66         return 0;
67 }
68
69 static unsigned int count_arch_pages(void)
70 {
71         unsigned int count = 0;
72         struct arch_saveable_page *tmp = arch_pages;
73         while (tmp) {
74                 count++;
75                 tmp = tmp->next;
76         }
77         return count;
78 }
79
80 static int save_arch_mem(void)
81 {
82         char *kaddr;
83         struct arch_saveable_page *tmp = arch_pages;
84         int offset;
85
86         pr_debug("swsusp: Saving arch specific memory");
87         while (tmp) {
88                 tmp->data = (char *)__get_free_page(GFP_ATOMIC);
89                 if (!tmp->data)
90                         return -ENOMEM;
91                 offset = tmp->start - (tmp->start & PAGE_MASK);
92                 /* arch pages might haven't a 'struct page' */
93                 kaddr = kmap_atomic_pfn(tmp->start >> PAGE_SHIFT, KM_USER0);
94                 memcpy(tmp->data + offset, kaddr + offset,
95                         tmp->end - tmp->start);
96                 kunmap_atomic(kaddr, KM_USER0);
97
98                 tmp = tmp->next;
99         }
100         return 0;
101 }
102
103 static int restore_arch_mem(void)
104 {
105         char *kaddr;
106         struct arch_saveable_page *tmp = arch_pages;
107         int offset;
108
109         while (tmp) {
110                 if (!tmp->data)
111                         continue;
112                 offset = tmp->start - (tmp->start & PAGE_MASK);
113                 kaddr = kmap_atomic_pfn(tmp->start >> PAGE_SHIFT, KM_USER0);
114                 memcpy(kaddr + offset, tmp->data + offset,
115                         tmp->end - tmp->start);
116                 kunmap_atomic(kaddr, KM_USER0);
117                 free_page((long)tmp->data);
118                 tmp->data = NULL;
119                 tmp = tmp->next;
120         }
121         return 0;
122 }
123
124 #ifdef CONFIG_HIGHMEM
125 static unsigned int count_highmem_pages(void)
126 {
127         struct zone *zone;
128         unsigned long zone_pfn;
129         unsigned int n = 0;
130
131         for_each_zone (zone)
132                 if (is_highmem(zone)) {
133                         mark_free_pages(zone);
134                         for (zone_pfn = 0; zone_pfn < zone->spanned_pages; zone_pfn++) {
135                                 struct page *page;
136                                 unsigned long pfn = zone_pfn + zone->zone_start_pfn;
137                                 if (!pfn_valid(pfn))
138                                         continue;
139                                 page = pfn_to_page(pfn);
140                                 if (PageReserved(page))
141                                         continue;
142                                 if (PageNosaveFree(page))
143                                         continue;
144                                 n++;
145                         }
146                 }
147         return n;
148 }
149
150 struct highmem_page {
151         char *data;
152         struct page *page;
153         struct highmem_page *next;
154 };
155
156 static struct highmem_page *highmem_copy;
157
158 static int save_highmem_zone(struct zone *zone)
159 {
160         unsigned long zone_pfn;
161         mark_free_pages(zone);
162         for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
163                 struct page *page;
164                 struct highmem_page *save;
165                 void *kaddr;
166                 unsigned long pfn = zone_pfn + zone->zone_start_pfn;
167
168                 if (!(pfn%10000))
169                         printk(".");
170                 if (!pfn_valid(pfn))
171                         continue;
172                 page = pfn_to_page(pfn);
173                 /*
174                  * This condition results from rvmalloc() sans vmalloc_32()
175                  * and architectural memory reservations. This should be
176                  * corrected eventually when the cases giving rise to this
177                  * are better understood.
178                  */
179                 if (PageReserved(page))
180                         continue;
181                 BUG_ON(PageNosave(page));
182                 if (PageNosaveFree(page))
183                         continue;
184                 save = kmalloc(sizeof(struct highmem_page), GFP_ATOMIC);
185                 if (!save)
186                         return -ENOMEM;
187                 save->next = highmem_copy;
188                 save->page = page;
189                 save->data = (void *) get_zeroed_page(GFP_ATOMIC);
190                 if (!save->data) {
191                         kfree(save);
192                         return -ENOMEM;
193                 }
194                 kaddr = kmap_atomic(page, KM_USER0);
195                 memcpy(save->data, kaddr, PAGE_SIZE);
196                 kunmap_atomic(kaddr, KM_USER0);
197                 highmem_copy = save;
198         }
199         return 0;
200 }
201
202 static int save_highmem(void)
203 {
204         struct zone *zone;
205         int res = 0;
206
207         pr_debug("swsusp: Saving Highmem");
208         drain_local_pages();
209         for_each_zone (zone) {
210                 if (is_highmem(zone))
211                         res = save_highmem_zone(zone);
212                 if (res)
213                         return res;
214         }
215         printk("\n");
216         return 0;
217 }
218
219 static int restore_highmem(void)
220 {
221         printk("swsusp: Restoring Highmem\n");
222         while (highmem_copy) {
223                 struct highmem_page *save = highmem_copy;
224                 void *kaddr;
225                 highmem_copy = save->next;
226
227                 kaddr = kmap_atomic(save->page, KM_USER0);
228                 memcpy(kaddr, save->data, PAGE_SIZE);
229                 kunmap_atomic(kaddr, KM_USER0);
230                 free_page((long) save->data);
231                 kfree(save);
232         }
233         return 0;
234 }
235 #else
236 static inline unsigned int count_highmem_pages(void) {return 0;}
237 static inline int save_highmem(void) {return 0;}
238 static inline int restore_highmem(void) {return 0;}
239 #endif
240
241 unsigned int count_special_pages(void)
242 {
243         return count_arch_pages() + count_highmem_pages();
244 }
245
246 int save_special_mem(void)
247 {
248         int ret;
249         ret = save_arch_mem();
250         if (!ret)
251                 ret = save_highmem();
252         return ret;
253 }
254
255 int restore_special_mem(void)
256 {
257         int ret;
258         ret = restore_arch_mem();
259         if (!ret)
260                 ret = restore_highmem();
261         return ret;
262 }
263
264 static int pfn_is_nosave(unsigned long pfn)
265 {
266         unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
267         unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT;
268         return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
269 }
270
271 /**
272  *      saveable - Determine whether a page should be cloned or not.
273  *      @pfn:   The page
274  *
275  *      We save a page if it's Reserved, and not in the range of pages
276  *      statically defined as 'unsaveable', or if it isn't reserved, and
277  *      isn't part of a free chunk of pages.
278  */
279
280 static int saveable(struct zone *zone, unsigned long *zone_pfn)
281 {
282         unsigned long pfn = *zone_pfn + zone->zone_start_pfn;
283         struct page *page;
284
285         if (!pfn_valid(pfn))
286                 return 0;
287
288         page = pfn_to_page(pfn);
289         if (PageNosave(page))
290                 return 0;
291         if (PageReserved(page) && pfn_is_nosave(pfn))
292                 return 0;
293         if (PageNosaveFree(page))
294                 return 0;
295
296         return 1;
297 }
298
299 unsigned int count_data_pages(void)
300 {
301         struct zone *zone;
302         unsigned long zone_pfn;
303         unsigned int n = 0;
304
305         for_each_zone (zone) {
306                 if (is_highmem(zone))
307                         continue;
308                 mark_free_pages(zone);
309                 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
310                         n += saveable(zone, &zone_pfn);
311         }
312         return n;
313 }
314
315 static void copy_data_pages(struct pbe *pblist)
316 {
317         struct zone *zone;
318         unsigned long zone_pfn;
319         struct pbe *pbe, *p;
320
321         pbe = pblist;
322         for_each_zone (zone) {
323                 if (is_highmem(zone))
324                         continue;
325                 mark_free_pages(zone);
326                 /* This is necessary for swsusp_free() */
327                 for_each_pb_page (p, pblist)
328                         SetPageNosaveFree(virt_to_page(p));
329                 for_each_pbe (p, pblist)
330                         SetPageNosaveFree(virt_to_page(p->address));
331                 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
332                         if (saveable(zone, &zone_pfn)) {
333                                 struct page *page;
334                                 page = pfn_to_page(zone_pfn + zone->zone_start_pfn);
335                                 BUG_ON(!pbe);
336                                 pbe->orig_address = (unsigned long)page_address(page);
337                                 /* copy_page is not usable for copying task structs. */
338                                 memcpy((void *)pbe->address, (void *)pbe->orig_address, PAGE_SIZE);
339                                 pbe = pbe->next;
340                         }
341                 }
342         }
343         BUG_ON(pbe);
344 }
345
346
347 /**
348  *      free_pagedir - free pages allocated with alloc_pagedir()
349  */
350
351 static void free_pagedir(struct pbe *pblist, int clear_nosave_free)
352 {
353         struct pbe *pbe;
354
355         while (pblist) {
356                 pbe = (pblist + PB_PAGE_SKIP)->next;
357                 ClearPageNosave(virt_to_page(pblist));
358                 if (clear_nosave_free)
359                         ClearPageNosaveFree(virt_to_page(pblist));
360                 free_page((unsigned long)pblist);
361                 pblist = pbe;
362         }
363 }
364
365 /**
366  *      fill_pb_page - Create a list of PBEs on a given memory page
367  */
368
369 static inline void fill_pb_page(struct pbe *pbpage)
370 {
371         struct pbe *p;
372
373         p = pbpage;
374         pbpage += PB_PAGE_SKIP;
375         do
376                 p->next = p + 1;
377         while (++p < pbpage);
378 }
379
380 /**
381  *      create_pbe_list - Create a list of PBEs on top of a given chain
382  *      of memory pages allocated with alloc_pagedir()
383  */
384
385 static inline void create_pbe_list(struct pbe *pblist, unsigned int nr_pages)
386 {
387         struct pbe *pbpage, *p;
388         unsigned int num = PBES_PER_PAGE;
389
390         for_each_pb_page (pbpage, pblist) {
391                 if (num >= nr_pages)
392                         break;
393
394                 fill_pb_page(pbpage);
395                 num += PBES_PER_PAGE;
396         }
397         if (pbpage) {
398                 for (num -= PBES_PER_PAGE - 1, p = pbpage; num < nr_pages; p++, num++)
399                         p->next = p + 1;
400                 p->next = NULL;
401         }
402 }
403
404 static unsigned int unsafe_pages;
405
406 /**
407  *      @safe_needed - on resume, for storing the PBE list and the image,
408  *      we can only use memory pages that do not conflict with the pages
409  *      used before suspend.
410  *
411  *      The unsafe pages are marked with the PG_nosave_free flag
412  *      and we count them using unsafe_pages
413  */
414
415 static inline void *alloc_image_page(gfp_t gfp_mask, int safe_needed)
416 {
417         void *res;
418
419         res = (void *)get_zeroed_page(gfp_mask);
420         if (safe_needed)
421                 while (res && PageNosaveFree(virt_to_page(res))) {
422                         /* The page is unsafe, mark it for swsusp_free() */
423                         SetPageNosave(virt_to_page(res));
424                         unsafe_pages++;
425                         res = (void *)get_zeroed_page(gfp_mask);
426                 }
427         if (res) {
428                 SetPageNosave(virt_to_page(res));
429                 SetPageNosaveFree(virt_to_page(res));
430         }
431         return res;
432 }
433
434 unsigned long get_safe_page(gfp_t gfp_mask)
435 {
436         return (unsigned long)alloc_image_page(gfp_mask, 1);
437 }
438
439 /**
440  *      alloc_pagedir - Allocate the page directory.
441  *
442  *      First, determine exactly how many pages we need and
443  *      allocate them.
444  *
445  *      We arrange the pages in a chain: each page is an array of PBES_PER_PAGE
446  *      struct pbe elements (pbes) and the last element in the page points
447  *      to the next page.
448  *
449  *      On each page we set up a list of struct_pbe elements.
450  */
451
452 static struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask,
453                                  int safe_needed)
454 {
455         unsigned int num;
456         struct pbe *pblist, *pbe;
457
458         if (!nr_pages)
459                 return NULL;
460
461         pblist = alloc_image_page(gfp_mask, safe_needed);
462         /* FIXME: rewrite this ugly loop */
463         for (pbe = pblist, num = PBES_PER_PAGE; pbe && num < nr_pages;
464                         pbe = pbe->next, num += PBES_PER_PAGE) {
465                 pbe += PB_PAGE_SKIP;
466                 pbe->next = alloc_image_page(gfp_mask, safe_needed);
467         }
468         if (!pbe) { /* get_zeroed_page() failed */
469                 free_pagedir(pblist, 1);
470                 pblist = NULL;
471         } else
472                 create_pbe_list(pblist, nr_pages);
473         return pblist;
474 }
475
476 /**
477  * Free pages we allocated for suspend. Suspend pages are alocated
478  * before atomic copy, so we need to free them after resume.
479  */
480
481 void swsusp_free(void)
482 {
483         struct zone *zone;
484         unsigned long zone_pfn;
485
486         for_each_zone(zone) {
487                 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
488                         if (pfn_valid(zone_pfn + zone->zone_start_pfn)) {
489                                 struct page *page;
490                                 page = pfn_to_page(zone_pfn + zone->zone_start_pfn);
491                                 if (PageNosave(page) && PageNosaveFree(page)) {
492                                         ClearPageNosave(page);
493                                         ClearPageNosaveFree(page);
494                                         free_page((long) page_address(page));
495                                 }
496                         }
497         }
498         nr_copy_pages = 0;
499         nr_meta_pages = 0;
500         pagedir_nosave = NULL;
501         buffer = NULL;
502 }
503
504
505 /**
506  *      enough_free_mem - Make sure we enough free memory to snapshot.
507  *
508  *      Returns TRUE or FALSE after checking the number of available
509  *      free pages.
510  */
511
512 static int enough_free_mem(unsigned int nr_pages)
513 {
514         struct zone *zone;
515         unsigned int n = 0;
516
517         for_each_zone (zone)
518                 if (!is_highmem(zone))
519                         n += zone->free_pages;
520         pr_debug("swsusp: available memory: %u pages\n", n);
521         return n > (nr_pages + PAGES_FOR_IO +
522                 (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE);
523 }
524
525 static int alloc_data_pages(struct pbe *pblist, gfp_t gfp_mask, int safe_needed)
526 {
527         struct pbe *p;
528
529         for_each_pbe (p, pblist) {
530                 p->address = (unsigned long)alloc_image_page(gfp_mask, safe_needed);
531                 if (!p->address)
532                         return -ENOMEM;
533         }
534         return 0;
535 }
536
537 static struct pbe *swsusp_alloc(unsigned int nr_pages)
538 {
539         struct pbe *pblist;
540
541         if (!(pblist = alloc_pagedir(nr_pages, GFP_ATOMIC | __GFP_COLD, 0))) {
542                 printk(KERN_ERR "suspend: Allocating pagedir failed.\n");
543                 return NULL;
544         }
545
546         if (alloc_data_pages(pblist, GFP_ATOMIC | __GFP_COLD, 0)) {
547                 printk(KERN_ERR "suspend: Allocating image pages failed.\n");
548                 swsusp_free();
549                 return NULL;
550         }
551
552         return pblist;
553 }
554
555 asmlinkage int swsusp_save(void)
556 {
557         unsigned int nr_pages;
558
559         pr_debug("swsusp: critical section: \n");
560
561         drain_local_pages();
562         nr_pages = count_data_pages();
563         printk("swsusp: Need to copy %u pages\n", nr_pages);
564
565         pr_debug("swsusp: pages needed: %u + %lu + %u, free: %u\n",
566                  nr_pages,
567                  (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE,
568                  PAGES_FOR_IO, nr_free_pages());
569
570         if (!enough_free_mem(nr_pages)) {
571                 printk(KERN_ERR "swsusp: Not enough free memory\n");
572                 return -ENOMEM;
573         }
574
575         pagedir_nosave = swsusp_alloc(nr_pages);
576         if (!pagedir_nosave)
577                 return -ENOMEM;
578
579         /* During allocating of suspend pagedir, new cold pages may appear.
580          * Kill them.
581          */
582         drain_local_pages();
583         copy_data_pages(pagedir_nosave);
584
585         /*
586          * End of critical section. From now on, we can write to memory,
587          * but we should not touch disk. This specially means we must _not_
588          * touch swap space! Except we must write out our image of course.
589          */
590
591         nr_copy_pages = nr_pages;
592         nr_meta_pages = (nr_pages * sizeof(long) + PAGE_SIZE - 1) >> PAGE_SHIFT;
593
594         printk("swsusp: critical section/: done (%d pages copied)\n", nr_pages);
595         return 0;
596 }
597
598 static void init_header(struct swsusp_info *info)
599 {
600         memset(info, 0, sizeof(struct swsusp_info));
601         info->version_code = LINUX_VERSION_CODE;
602         info->num_physpages = num_physpages;
603         memcpy(&info->uts, &system_utsname, sizeof(system_utsname));
604         info->cpus = num_online_cpus();
605         info->image_pages = nr_copy_pages;
606         info->pages = nr_copy_pages + nr_meta_pages + 1;
607         info->size = info->pages;
608         info->size <<= PAGE_SHIFT;
609 }
610
611 /**
612  *      pack_orig_addresses - the .orig_address fields of the PBEs from the
613  *      list starting at @pbe are stored in the array @buf[] (1 page)
614  */
615
616 static inline struct pbe *pack_orig_addresses(unsigned long *buf, struct pbe *pbe)
617 {
618         int j;
619
620         for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) {
621                 buf[j] = pbe->orig_address;
622                 pbe = pbe->next;
623         }
624         if (!pbe)
625                 for (; j < PAGE_SIZE / sizeof(long); j++)
626                         buf[j] = 0;
627         return pbe;
628 }
629
630 /**
631  *      snapshot_read_next - used for reading the system memory snapshot.
632  *
633  *      On the first call to it @handle should point to a zeroed
634  *      snapshot_handle structure.  The structure gets updated and a pointer
635  *      to it should be passed to this function every next time.
636  *
637  *      The @count parameter should contain the number of bytes the caller
638  *      wants to read from the snapshot.  It must not be zero.
639  *
640  *      On success the function returns a positive number.  Then, the caller
641  *      is allowed to read up to the returned number of bytes from the memory
642  *      location computed by the data_of() macro.  The number returned
643  *      may be smaller than @count, but this only happens if the read would
644  *      cross a page boundary otherwise.
645  *
646  *      The function returns 0 to indicate the end of data stream condition,
647  *      and a negative number is returned on error.  In such cases the
648  *      structure pointed to by @handle is not updated and should not be used
649  *      any more.
650  */
651
652 int snapshot_read_next(struct snapshot_handle *handle, size_t count)
653 {
654         if (handle->page > nr_meta_pages + nr_copy_pages)
655                 return 0;
656         if (!buffer) {
657                 /* This makes the buffer be freed by swsusp_free() */
658                 buffer = alloc_image_page(GFP_ATOMIC, 0);
659                 if (!buffer)
660                         return -ENOMEM;
661         }
662         if (!handle->offset) {
663                 init_header((struct swsusp_info *)buffer);
664                 handle->buffer = buffer;
665                 handle->pbe = pagedir_nosave;
666         }
667         if (handle->prev < handle->page) {
668                 if (handle->page <= nr_meta_pages) {
669                         handle->pbe = pack_orig_addresses(buffer, handle->pbe);
670                         if (!handle->pbe)
671                                 handle->pbe = pagedir_nosave;
672                 } else {
673                         handle->buffer = (void *)handle->pbe->address;
674                         handle->pbe = handle->pbe->next;
675                 }
676                 handle->prev = handle->page;
677         }
678         handle->buf_offset = handle->page_offset;
679         if (handle->page_offset + count >= PAGE_SIZE) {
680                 count = PAGE_SIZE - handle->page_offset;
681                 handle->page_offset = 0;
682                 handle->page++;
683         } else {
684                 handle->page_offset += count;
685         }
686         handle->offset += count;
687         return count;
688 }
689
690 /**
691  *      mark_unsafe_pages - mark the pages that cannot be used for storing
692  *      the image during resume, because they conflict with the pages that
693  *      had been used before suspend
694  */
695
696 static int mark_unsafe_pages(struct pbe *pblist)
697 {
698         struct zone *zone;
699         unsigned long zone_pfn;
700         struct pbe *p;
701
702         if (!pblist) /* a sanity check */
703                 return -EINVAL;
704
705         /* Clear page flags */
706         for_each_zone (zone) {
707                 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
708                         if (pfn_valid(zone_pfn + zone->zone_start_pfn))
709                                 ClearPageNosaveFree(pfn_to_page(zone_pfn +
710                                         zone->zone_start_pfn));
711         }
712
713         /* Mark orig addresses */
714         for_each_pbe (p, pblist) {
715                 if (virt_addr_valid(p->orig_address))
716                         SetPageNosaveFree(virt_to_page(p->orig_address));
717                 else
718                         return -EFAULT;
719         }
720
721         unsafe_pages = 0;
722
723         return 0;
724 }
725
726 static void copy_page_backup_list(struct pbe *dst, struct pbe *src)
727 {
728         /* We assume both lists contain the same number of elements */
729         while (src) {
730                 dst->orig_address = src->orig_address;
731                 dst = dst->next;
732                 src = src->next;
733         }
734 }
735
736 static int check_header(struct swsusp_info *info)
737 {
738         char *reason = NULL;
739
740         if (info->version_code != LINUX_VERSION_CODE)
741                 reason = "kernel version";
742         if (info->num_physpages != num_physpages)
743                 reason = "memory size";
744         if (strcmp(info->uts.sysname,system_utsname.sysname))
745                 reason = "system type";
746         if (strcmp(info->uts.release,system_utsname.release))
747                 reason = "kernel release";
748         if (strcmp(info->uts.version,system_utsname.version))
749                 reason = "version";
750         if (strcmp(info->uts.machine,system_utsname.machine))
751                 reason = "machine";
752         if (reason) {
753                 printk(KERN_ERR "swsusp: Resume mismatch: %s\n", reason);
754                 return -EPERM;
755         }
756         return 0;
757 }
758
759 /**
760  *      load header - check the image header and copy data from it
761  */
762
763 static int load_header(struct snapshot_handle *handle,
764                               struct swsusp_info *info)
765 {
766         int error;
767         struct pbe *pblist;
768
769         error = check_header(info);
770         if (!error) {
771                 pblist = alloc_pagedir(info->image_pages, GFP_ATOMIC, 0);
772                 if (!pblist)
773                         return -ENOMEM;
774                 pagedir_nosave = pblist;
775                 handle->pbe = pblist;
776                 nr_copy_pages = info->image_pages;
777                 nr_meta_pages = info->pages - info->image_pages - 1;
778         }
779         return error;
780 }
781
782 /**
783  *      unpack_orig_addresses - copy the elements of @buf[] (1 page) to
784  *      the PBEs in the list starting at @pbe
785  */
786
787 static inline struct pbe *unpack_orig_addresses(unsigned long *buf,
788                                                 struct pbe *pbe)
789 {
790         int j;
791
792         for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) {
793                 pbe->orig_address = buf[j];
794                 pbe = pbe->next;
795         }
796         return pbe;
797 }
798
799 /**
800  *      prepare_image - use metadata contained in the PBE list
801  *      pointed to by pagedir_nosave to mark the pages that will
802  *      be overwritten in the process of restoring the system
803  *      memory state from the image ("unsafe" pages) and allocate
804  *      memory for the image
805  *
806  *      The idea is to allocate the PBE list first and then
807  *      allocate as many pages as it's needed for the image data,
808  *      but not to assign these pages to the PBEs initially.
809  *      Instead, we just mark them as allocated and create a list
810  *      of "safe" which will be used later
811  */
812
813 struct safe_page {
814         struct safe_page *next;
815         char padding[PAGE_SIZE - sizeof(void *)];
816 };
817
818 static struct safe_page *safe_pages;
819
820 static int prepare_image(struct snapshot_handle *handle)
821 {
822         int error = 0;
823         unsigned int nr_pages = nr_copy_pages;
824         struct pbe *p, *pblist = NULL;
825
826         p = pagedir_nosave;
827         error = mark_unsafe_pages(p);
828         if (!error) {
829                 pblist = alloc_pagedir(nr_pages, GFP_ATOMIC, 1);
830                 if (pblist)
831                         copy_page_backup_list(pblist, p);
832                 free_pagedir(p, 0);
833                 if (!pblist)
834                         error = -ENOMEM;
835         }
836         safe_pages = NULL;
837         if (!error && nr_pages > unsafe_pages) {
838                 nr_pages -= unsafe_pages;
839                 while (nr_pages--) {
840                         struct safe_page *ptr;
841
842                         ptr = (struct safe_page *)get_zeroed_page(GFP_ATOMIC);
843                         if (!ptr) {
844                                 error = -ENOMEM;
845                                 break;
846                         }
847                         if (!PageNosaveFree(virt_to_page(ptr))) {
848                                 /* The page is "safe", add it to the list */
849                                 ptr->next = safe_pages;
850                                 safe_pages = ptr;
851                         }
852                         /* Mark the page as allocated */
853                         SetPageNosave(virt_to_page(ptr));
854                         SetPageNosaveFree(virt_to_page(ptr));
855                 }
856         }
857         if (!error) {
858                 pagedir_nosave = pblist;
859         } else {
860                 handle->pbe = NULL;
861                 swsusp_free();
862         }
863         return error;
864 }
865
866 static void *get_buffer(struct snapshot_handle *handle)
867 {
868         struct pbe *pbe = handle->pbe, *last = handle->last_pbe;
869         struct page *page = virt_to_page(pbe->orig_address);
870
871         if (PageNosave(page) && PageNosaveFree(page)) {
872                 /*
873                  * We have allocated the "original" page frame and we can
874                  * use it directly to store the read page
875                  */
876                 pbe->address = 0;
877                 if (last && last->next)
878                         last->next = NULL;
879                 return (void *)pbe->orig_address;
880         }
881         /*
882          * The "original" page frame has not been allocated and we have to
883          * use a "safe" page frame to store the read page
884          */
885         pbe->address = (unsigned long)safe_pages;
886         safe_pages = safe_pages->next;
887         if (last)
888                 last->next = pbe;
889         handle->last_pbe = pbe;
890         return (void *)pbe->address;
891 }
892
893 /**
894  *      snapshot_write_next - used for writing the system memory snapshot.
895  *
896  *      On the first call to it @handle should point to a zeroed
897  *      snapshot_handle structure.  The structure gets updated and a pointer
898  *      to it should be passed to this function every next time.
899  *
900  *      The @count parameter should contain the number of bytes the caller
901  *      wants to write to the image.  It must not be zero.
902  *
903  *      On success the function returns a positive number.  Then, the caller
904  *      is allowed to write up to the returned number of bytes to the memory
905  *      location computed by the data_of() macro.  The number returned
906  *      may be smaller than @count, but this only happens if the write would
907  *      cross a page boundary otherwise.
908  *
909  *      The function returns 0 to indicate the "end of file" condition,
910  *      and a negative number is returned on error.  In such cases the
911  *      structure pointed to by @handle is not updated and should not be used
912  *      any more.
913  */
914
915 int snapshot_write_next(struct snapshot_handle *handle, size_t count)
916 {
917         int error = 0;
918
919         if (handle->prev && handle->page > nr_meta_pages + nr_copy_pages)
920                 return 0;
921         if (!buffer) {
922                 /* This makes the buffer be freed by swsusp_free() */
923                 buffer = alloc_image_page(GFP_ATOMIC, 0);
924                 if (!buffer)
925                         return -ENOMEM;
926         }
927         if (!handle->offset)
928                 handle->buffer = buffer;
929         if (handle->prev < handle->page) {
930                 if (!handle->prev) {
931                         error = load_header(handle, (struct swsusp_info *)buffer);
932                         if (error)
933                                 return error;
934                 } else if (handle->prev <= nr_meta_pages) {
935                         handle->pbe = unpack_orig_addresses(buffer, handle->pbe);
936                         if (!handle->pbe) {
937                                 error = prepare_image(handle);
938                                 if (error)
939                                         return error;
940                                 handle->pbe = pagedir_nosave;
941                                 handle->last_pbe = NULL;
942                                 handle->buffer = get_buffer(handle);
943                         }
944                 } else {
945                         handle->pbe = handle->pbe->next;
946                         handle->buffer = get_buffer(handle);
947                 }
948                 handle->prev = handle->page;
949         }
950         handle->buf_offset = handle->page_offset;
951         if (handle->page_offset + count >= PAGE_SIZE) {
952                 count = PAGE_SIZE - handle->page_offset;
953                 handle->page_offset = 0;
954                 handle->page++;
955         } else {
956                 handle->page_offset += count;
957         }
958         handle->offset += count;
959         return count;
960 }
961
962 int snapshot_image_loaded(struct snapshot_handle *handle)
963 {
964         return !(!handle->pbe || handle->pbe->next || !nr_copy_pages ||
965                 handle->page <= nr_meta_pages + nr_copy_pages);
966 }