1/*
2 * linux/kernel/power/swap.c
3 *
4 * This file provides functions for reading the suspend image from
5 * and writing it to a swap partition.
6 *
7 * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz>
8 * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
9 * Copyright (C) 2010-2012 Bojan Smojver <bojan@rexursive.com>
10 *
11 * This file is released under the GPLv2.
12 *
13 */
14
15#include <linux/module.h>
16#include <linux/file.h>
17#include <linux/delay.h>
18#include <linux/bitops.h>
19#include <linux/genhd.h>
20#include <linux/device.h>
21#include <linux/bio.h>
22#include <linux/blkdev.h>
23#include <linux/swap.h>
24#include <linux/swapops.h>
25#include <linux/pm.h>
26#include <linux/slab.h>
27#include <linux/lzo.h>
28#include <linux/vmalloc.h>
29#include <linux/cpumask.h>
30#include <linux/atomic.h>
31#include <linux/kthread.h>
32#include <linux/crc32.h>
33
34#include "power.h"
35
36#define HIBERNATE_SIG	"S1SUSPEND"
37
38/*
39 *	The swap map is a data structure used for keeping track of each page
40 *	written to a swap partition.  It consists of many swap_map_page
41 *	structures that contain each an array of MAP_PAGE_ENTRIES swap entries.
42 *	These structures are stored on the swap and linked together with the
43 *	help of the .next_swap member.
44 *
45 *	The swap map is created during suspend.  The swap map pages are
46 *	allocated and populated one at a time, so we only need one memory
47 *	page to set up the entire structure.
48 *
49 *	During resume we pick up all swap_map_page structures into a list.
50 */
51
52#define MAP_PAGE_ENTRIES	(PAGE_SIZE / sizeof(sector_t) - 1)
53
54/*
55 * Number of free pages that are not high.
56 */
57static inline unsigned long low_free_pages(void)
58{
59	return nr_free_pages() - nr_free_highpages();
60}
61
62/*
63 * Number of pages required to be kept free while writing the image. Always
64 * half of all available low pages before the writing starts.
65 */
66static inline unsigned long reqd_free_pages(void)
67{
68	return low_free_pages() / 2;
69}
70
71struct swap_map_page {
72	sector_t entries[MAP_PAGE_ENTRIES];
73	sector_t next_swap;
74};
75
76struct swap_map_page_list {
77	struct swap_map_page *map;
78	struct swap_map_page_list *next;
79};
80
81/**
82 *	The swap_map_handle structure is used for handling swap in
83 *	a file-alike way
84 */
85
86struct swap_map_handle {
87	struct swap_map_page *cur;
88	struct swap_map_page_list *maps;
89	sector_t cur_swap;
90	sector_t first_sector;
91	unsigned int k;
92	unsigned long reqd_free_pages;
93	u32 crc32;
94};
95
96struct swsusp_header {
97	char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int) -
98	              sizeof(u32)];
99	u32	crc32;
100	sector_t image;
101	unsigned int flags;	/* Flags to pass to the "boot" kernel */
102	char	orig_sig[10];
103	char	sig[10];
104} __packed;
105
106static struct swsusp_header *swsusp_header;
107
108/**
109 *	The following functions are used for tracing the allocated
110 *	swap pages, so that they can be freed in case of an error.
111 */
112
113struct swsusp_extent {
114	struct rb_node node;
115	unsigned long start;
116	unsigned long end;
117};
118
119static struct rb_root swsusp_extents = RB_ROOT;
120
121static int swsusp_extents_insert(unsigned long swap_offset)
122{
123	struct rb_node **new = &(swsusp_extents.rb_node);
124	struct rb_node *parent = NULL;
125	struct swsusp_extent *ext;
126
127	/* Figure out where to put the new node */
128	while (*new) {
129		ext = rb_entry(*new, struct swsusp_extent, node);
130		parent = *new;
131		if (swap_offset < ext->start) {
132			/* Try to merge */
133			if (swap_offset == ext->start - 1) {
134				ext->start--;
135				return 0;
136			}
137			new = &((*new)->rb_left);
138		} else if (swap_offset > ext->end) {
139			/* Try to merge */
140			if (swap_offset == ext->end + 1) {
141				ext->end++;
142				return 0;
143			}
144			new = &((*new)->rb_right);
145		} else {
146			/* It already is in the tree */
147			return -EINVAL;
148		}
149	}
150	/* Add the new node and rebalance the tree. */
151	ext = kzalloc(sizeof(struct swsusp_extent), GFP_KERNEL);
152	if (!ext)
153		return -ENOMEM;
154
155	ext->start = swap_offset;
156	ext->end = swap_offset;
157	rb_link_node(&ext->node, parent, new);
158	rb_insert_color(&ext->node, &swsusp_extents);
159	return 0;
160}
161
162/**
163 *	alloc_swapdev_block - allocate a swap page and register that it has
164 *	been allocated, so that it can be freed in case of an error.
165 */
166
167sector_t alloc_swapdev_block(int swap)
168{
169	unsigned long offset;
170
171	offset = swp_offset(get_swap_page_of_type(swap));
172	if (offset) {
173		if (swsusp_extents_insert(offset))
174			swap_free(swp_entry(swap, offset));
175		else
176			return swapdev_block(swap, offset);
177	}
178	return 0;
179}
180
181/**
182 *	free_all_swap_pages - free swap pages allocated for saving image data.
183 *	It also frees the extents used to register which swap entries had been
184 *	allocated.
185 */
186
187void free_all_swap_pages(int swap)
188{
189	struct rb_node *node;
190
191	while ((node = swsusp_extents.rb_node)) {
192		struct swsusp_extent *ext;
193		unsigned long offset;
194
195		ext = container_of(node, struct swsusp_extent, node);
196		rb_erase(node, &swsusp_extents);
197		for (offset = ext->start; offset <= ext->end; offset++)
198			swap_free(swp_entry(swap, offset));
199
200		kfree(ext);
201	}
202}
203
204int swsusp_swap_in_use(void)
205{
206	return (swsusp_extents.rb_node != NULL);
207}
208
209/*
210 * General things
211 */
212
213static unsigned short root_swap = 0xffff;
214struct block_device *hib_resume_bdev;
215
216/*
217 * Saving part
218 */
219
220static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags)
221{
222	int error;
223
224	hib_bio_read_page(swsusp_resume_block, swsusp_header, NULL);
225	if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) ||
226	    !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) {
227		memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10);
228		memcpy(swsusp_header->sig, HIBERNATE_SIG, 10);
229		swsusp_header->image = handle->first_sector;
230		swsusp_header->flags = flags;
231		if (flags & SF_CRC32_MODE)
232			swsusp_header->crc32 = handle->crc32;
233		error = hib_bio_write_page(swsusp_resume_block,
234					swsusp_header, NULL);
235	} else {
236		printk(KERN_ERR "PM: Swap header not found!\n");
237		error = -ENODEV;
238	}
239	return error;
240}
241
242/**
243 *	swsusp_swap_check - check if the resume device is a swap device
244 *	and get its index (if so)
245 *
246 *	This is called before saving image
247 */
248static int swsusp_swap_check(void)
249{
250	int res;
251
252	res = swap_type_of(swsusp_resume_device, swsusp_resume_block,
253			&hib_resume_bdev);
254	if (res < 0)
255		return res;
256
257	root_swap = res;
258	res = blkdev_get(hib_resume_bdev, FMODE_WRITE, NULL);
259	if (res)
260		return res;
261
262	res = set_blocksize(hib_resume_bdev, PAGE_SIZE);
263	if (res < 0)
264		blkdev_put(hib_resume_bdev, FMODE_WRITE);
265
266	return res;
267}
268
269/**
270 *	write_page - Write one page to given swap location.
271 *	@buf:		Address we're writing.
272 *	@offset:	Offset of the swap page we're writing to.
273 *	@bio_chain:	Link the next write BIO here
274 */
275
276static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
277{
278	void *src;
279	int ret;
280
281	if (!offset)
282		return -ENOSPC;
283
284	if (bio_chain) {
285		src = (void *)__get_free_page(__GFP_WAIT | __GFP_NOWARN |
286		                              __GFP_NORETRY);
287		if (src) {
288			copy_page(src, buf);
289		} else {
290			ret = hib_wait_on_bio_chain(bio_chain); /* Free pages */
291			if (ret)
292				return ret;
293			src = (void *)__get_free_page(__GFP_WAIT |
294			                              __GFP_NOWARN |
295			                              __GFP_NORETRY);
296			if (src) {
297				copy_page(src, buf);
298			} else {
299				WARN_ON_ONCE(1);
300				bio_chain = NULL;	/* Go synchronous */
301				src = buf;
302			}
303		}
304	} else {
305		src = buf;
306	}
307	return hib_bio_write_page(offset, src, bio_chain);
308}
309
310static void release_swap_writer(struct swap_map_handle *handle)
311{
312	if (handle->cur)
313		free_page((unsigned long)handle->cur);
314	handle->cur = NULL;
315}
316
317static int get_swap_writer(struct swap_map_handle *handle)
318{
319	int ret;
320
321	ret = swsusp_swap_check();
322	if (ret) {
323		if (ret != -ENOSPC)
324			printk(KERN_ERR "PM: Cannot find swap device, try "
325					"swapon -a.\n");
326		return ret;
327	}
328	handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL);
329	if (!handle->cur) {
330		ret = -ENOMEM;
331		goto err_close;
332	}
333	handle->cur_swap = alloc_swapdev_block(root_swap);
334	if (!handle->cur_swap) {
335		ret = -ENOSPC;
336		goto err_rel;
337	}
338	handle->k = 0;
339	handle->reqd_free_pages = reqd_free_pages();
340	handle->first_sector = handle->cur_swap;
341	return 0;
342err_rel:
343	release_swap_writer(handle);
344err_close:
345	swsusp_close(FMODE_WRITE);
346	return ret;
347}
348
349static int swap_write_page(struct swap_map_handle *handle, void *buf,
350				struct bio **bio_chain)
351{
352	int error = 0;
353	sector_t offset;
354
355	if (!handle->cur)
356		return -EINVAL;
357	offset = alloc_swapdev_block(root_swap);
358	error = write_page(buf, offset, bio_chain);
359	if (error)
360		return error;
361	handle->cur->entries[handle->k++] = offset;
362	if (handle->k >= MAP_PAGE_ENTRIES) {
363		offset = alloc_swapdev_block(root_swap);
364		if (!offset)
365			return -ENOSPC;
366		handle->cur->next_swap = offset;
367		error = write_page(handle->cur, handle->cur_swap, bio_chain);
368		if (error)
369			goto out;
370		clear_page(handle->cur);
371		handle->cur_swap = offset;
372		handle->k = 0;
373
374		if (bio_chain && low_free_pages() <= handle->reqd_free_pages) {
375			error = hib_wait_on_bio_chain(bio_chain);
376			if (error)
377				goto out;
378			/*
379			 * Recalculate the number of required free pages, to
380			 * make sure we never take more than half.
381			 */
382			handle->reqd_free_pages = reqd_free_pages();
383		}
384	}
385 out:
386	return error;
387}
388
389static int flush_swap_writer(struct swap_map_handle *handle)
390{
391	if (handle->cur && handle->cur_swap)
392		return write_page(handle->cur, handle->cur_swap, NULL);
393	else
394		return -EINVAL;
395}
396
397static int swap_writer_finish(struct swap_map_handle *handle,
398		unsigned int flags, int error)
399{
400	if (!error) {
401		flush_swap_writer(handle);
402		printk(KERN_INFO "PM: S");
403		error = mark_swapfiles(handle, flags);
404		printk("|\n");
405	}
406
407	if (error)
408		free_all_swap_pages(root_swap);
409	release_swap_writer(handle);
410	swsusp_close(FMODE_WRITE);
411
412	return error;
413}
414
415/* We need to remember how much compressed data we need to read. */
416#define LZO_HEADER	sizeof(size_t)
417
418/* Number of pages/bytes we'll compress at one time. */
419#define LZO_UNC_PAGES	32
420#define LZO_UNC_SIZE	(LZO_UNC_PAGES * PAGE_SIZE)
421
422/* Number of pages/bytes we need for compressed data (worst case). */
423#define LZO_CMP_PAGES	DIV_ROUND_UP(lzo1x_worst_compress(LZO_UNC_SIZE) + \
424			             LZO_HEADER, PAGE_SIZE)
425#define LZO_CMP_SIZE	(LZO_CMP_PAGES * PAGE_SIZE)
426
427/* Maximum number of threads for compression/decompression. */
428#define LZO_THREADS	3
429
430/* Minimum/maximum number of pages for read buffering. */
431#define LZO_MIN_RD_PAGES	1024
432#define LZO_MAX_RD_PAGES	8192
433
434
435/**
436 *	save_image - save the suspend image data
437 */
438
439static int save_image(struct swap_map_handle *handle,
440                      struct snapshot_handle *snapshot,
441                      unsigned int nr_to_write)
442{
443	unsigned int m;
444	int ret;
445	int nr_pages;
446	int err2;
447	struct bio *bio;
448	struct timeval start;
449	struct timeval stop;
450
451	printk(KERN_INFO "PM: Saving image data pages (%u pages)...\n",
452		nr_to_write);
453	m = nr_to_write / 10;
454	if (!m)
455		m = 1;
456	nr_pages = 0;
457	bio = NULL;
458	do_gettimeofday(&start);
459	while (1) {
460		ret = snapshot_read_next(snapshot);
461		if (ret <= 0)
462			break;
463		ret = swap_write_page(handle, data_of(*snapshot), &bio);
464		if (ret)
465			break;
466		if (!(nr_pages % m))
467			printk(KERN_INFO "PM: Image saving progress: %3d%%\n",
468			       nr_pages / m * 10);
469		nr_pages++;
470	}
471	err2 = hib_wait_on_bio_chain(&bio);
472	do_gettimeofday(&stop);
473	if (!ret)
474		ret = err2;
475	if (!ret)
476		printk(KERN_INFO "PM: Image saving done.\n");
477	swsusp_show_speed(&start, &stop, nr_to_write, "Wrote");
478	return ret;
479}
480
481/**
482 * Structure used for CRC32.
483 */
484struct crc_data {
485	struct task_struct *thr;                  /* thread */
486	atomic_t ready;                           /* ready to start flag */
487	atomic_t stop;                            /* ready to stop flag */
488	unsigned run_threads;                     /* nr current threads */
489	wait_queue_head_t go;                     /* start crc update */
490	wait_queue_head_t done;                   /* crc update done */
491	u32 *crc32;                               /* points to handle's crc32 */
492	size_t *unc_len[LZO_THREADS];             /* uncompressed lengths */
493	unsigned char *unc[LZO_THREADS];          /* uncompressed data */
494};
495
496/**
497 * CRC32 update function that runs in its own thread.
498 */
499static int crc32_threadfn(void *data)
500{
501	struct crc_data *d = data;
502	unsigned i;
503
504	while (1) {
505		wait_event(d->go, atomic_read(&d->ready) ||
506		                  kthread_should_stop());
507		if (kthread_should_stop()) {
508			d->thr = NULL;
509			atomic_set(&d->stop, 1);
510			wake_up(&d->done);
511			break;
512		}
513		atomic_set(&d->ready, 0);
514
515		for (i = 0; i < d->run_threads; i++)
516			*d->crc32 = crc32_le(*d->crc32,
517			                     d->unc[i], *d->unc_len[i]);
518		atomic_set(&d->stop, 1);
519		wake_up(&d->done);
520	}
521	return 0;
522}
523/**
524 * Structure used for LZO data compression.
525 */
526struct cmp_data {
527	struct task_struct *thr;                  /* thread */
528	atomic_t ready;                           /* ready to start flag */
529	atomic_t stop;                            /* ready to stop flag */
530	int ret;                                  /* return code */
531	wait_queue_head_t go;                     /* start compression */
532	wait_queue_head_t done;                   /* compression done */
533	size_t unc_len;                           /* uncompressed length */
534	size_t cmp_len;                           /* compressed length */
535	unsigned char unc[LZO_UNC_SIZE];          /* uncompressed buffer */
536	unsigned char cmp[LZO_CMP_SIZE];          /* compressed buffer */
537	unsigned char wrk[LZO1X_1_MEM_COMPRESS];  /* compression workspace */
538};
539
540/**
541 * Compression function that runs in its own thread.
542 */
543static int lzo_compress_threadfn(void *data)
544{
545	struct cmp_data *d = data;
546
547	while (1) {
548		wait_event(d->go, atomic_read(&d->ready) ||
549		                  kthread_should_stop());
550		if (kthread_should_stop()) {
551			d->thr = NULL;
552			d->ret = -1;
553			atomic_set(&d->stop, 1);
554			wake_up(&d->done);
555			break;
556		}
557		atomic_set(&d->ready, 0);
558
559		d->ret = lzo1x_1_compress(d->unc, d->unc_len,
560		                          d->cmp + LZO_HEADER, &d->cmp_len,
561		                          d->wrk);
562		atomic_set(&d->stop, 1);
563		wake_up(&d->done);
564	}
565	return 0;
566}
567
568/**
569 * save_image_lzo - Save the suspend image data compressed with LZO.
570 * @handle: Swap map handle to use for saving the image.
571 * @snapshot: Image to read data from.
572 * @nr_to_write: Number of pages to save.
573 */
574static int save_image_lzo(struct swap_map_handle *handle,
575                          struct snapshot_handle *snapshot,
576                          unsigned int nr_to_write)
577{
578	unsigned int m;
579	int ret = 0;
580	int nr_pages;
581	int err2;
582	struct bio *bio;
583	struct timeval start;
584	struct timeval stop;
585	size_t off;
586	unsigned thr, run_threads, nr_threads;
587	unsigned char *page = NULL;
588	struct cmp_data *data = NULL;
589	struct crc_data *crc = NULL;
590
591	/*
592	 * We'll limit the number of threads for compression to limit memory
593	 * footprint.
594	 */
595	nr_threads = num_online_cpus() - 1;
596	nr_threads = clamp_val(nr_threads, 1, LZO_THREADS);
597
598	page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
599	if (!page) {
600		printk(KERN_ERR "PM: Failed to allocate LZO page\n");
601		ret = -ENOMEM;
602		goto out_clean;
603	}
604
605	data = vmalloc(sizeof(*data) * nr_threads);
606	if (!data) {
607		printk(KERN_ERR "PM: Failed to allocate LZO data\n");
608		ret = -ENOMEM;
609		goto out_clean;
610	}
611	for (thr = 0; thr < nr_threads; thr++)
612		memset(&data[thr], 0, offsetof(struct cmp_data, go));
613
614	crc = kmalloc(sizeof(*crc), GFP_KERNEL);
615	if (!crc) {
616		printk(KERN_ERR "PM: Failed to allocate crc\n");
617		ret = -ENOMEM;
618		goto out_clean;
619	}
620	memset(crc, 0, offsetof(struct crc_data, go));
621
622	/*
623	 * Start the compression threads.
624	 */
625	for (thr = 0; thr < nr_threads; thr++) {
626		init_waitqueue_head(&data[thr].go);
627		init_waitqueue_head(&data[thr].done);
628
629		data[thr].thr = kthread_run(lzo_compress_threadfn,
630		                            &data[thr],
631		                            "image_compress/%u", thr);
632		if (IS_ERR(data[thr].thr)) {
633			data[thr].thr = NULL;
634			printk(KERN_ERR
635			       "PM: Cannot start compression threads\n");
636			ret = -ENOMEM;
637			goto out_clean;
638		}
639	}
640
641	/*
642	 * Start the CRC32 thread.
643	 */
644	init_waitqueue_head(&crc->go);
645	init_waitqueue_head(&crc->done);
646
647	handle->crc32 = 0;
648	crc->crc32 = &handle->crc32;
649	for (thr = 0; thr < nr_threads; thr++) {
650		crc->unc[thr] = data[thr].unc;
651		crc->unc_len[thr] = &data[thr].unc_len;
652	}
653
654	crc->thr = kthread_run(crc32_threadfn, crc, "image_crc32");
655	if (IS_ERR(crc->thr)) {
656		crc->thr = NULL;
657		printk(KERN_ERR "PM: Cannot start CRC32 thread\n");
658		ret = -ENOMEM;
659		goto out_clean;
660	}
661
662	/*
663	 * Adjust the number of required free pages after all allocations have
664	 * been done. We don't want to run out of pages when writing.
665	 */
666	handle->reqd_free_pages = reqd_free_pages();
667
668	printk(KERN_INFO
669		"PM: Using %u thread(s) for compression.\n"
670		"PM: Compressing and saving image data (%u pages)...\n",
671		nr_threads, nr_to_write);
672	m = nr_to_write / 10;
673	if (!m)
674		m = 1;
675	nr_pages = 0;
676	bio = NULL;
677	do_gettimeofday(&start);
678	for (;;) {
679		for (thr = 0; thr < nr_threads; thr++) {
680			for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) {
681				ret = snapshot_read_next(snapshot);
682				if (ret < 0)
683					goto out_finish;
684
685				if (!ret)
686					break;
687
688				memcpy(data[thr].unc + off,
689				       data_of(*snapshot), PAGE_SIZE);
690
691				if (!(nr_pages % m))
692					printk(KERN_INFO
693					       "PM: Image saving progress: "
694					       "%3d%%\n",
695				               nr_pages / m * 10);
696				nr_pages++;
697			}
698			if (!off)
699				break;
700
701			data[thr].unc_len = off;
702
703			atomic_set(&data[thr].ready, 1);
704			wake_up(&data[thr].go);
705		}
706
707		if (!thr)
708			break;
709
710		crc->run_threads = thr;
711		atomic_set(&crc->ready, 1);
712		wake_up(&crc->go);
713
714		for (run_threads = thr, thr = 0; thr < run_threads; thr++) {
715			wait_event(data[thr].done,
716			           atomic_read(&data[thr].stop));
717			atomic_set(&data[thr].stop, 0);
718
719			ret = data[thr].ret;
720
721			if (ret < 0) {
722				printk(KERN_ERR "PM: LZO compression failed\n");
723				goto out_finish;
724			}
725
726			if (unlikely(!data[thr].cmp_len ||
727			             data[thr].cmp_len >
728			             lzo1x_worst_compress(data[thr].unc_len))) {
729				printk(KERN_ERR
730				       "PM: Invalid LZO compressed length\n");
731				ret = -1;
732				goto out_finish;
733			}
734
735			*(size_t *)data[thr].cmp = data[thr].cmp_len;
736
737			/*
738			 * Given we are writing one page at a time to disk, we
739			 * copy that much from the buffer, although the last
740			 * bit will likely be smaller than full page. This is
741			 * OK - we saved the length of the compressed data, so
742			 * any garbage at the end will be discarded when we
743			 * read it.
744			 */
745			for (off = 0;
746			     off < LZO_HEADER + data[thr].cmp_len;
747			     off += PAGE_SIZE) {
748				memcpy(page, data[thr].cmp + off, PAGE_SIZE);
749
750				ret = swap_write_page(handle, page, &bio);
751				if (ret)
752					goto out_finish;
753			}
754		}
755
756		wait_event(crc->done, atomic_read(&crc->stop));
757		atomic_set(&crc->stop, 0);
758	}
759
760out_finish:
761	err2 = hib_wait_on_bio_chain(&bio);
762	do_gettimeofday(&stop);
763	if (!ret)
764		ret = err2;
765	if (!ret)
766		printk(KERN_INFO "PM: Image saving done.\n");
767	swsusp_show_speed(&start, &stop, nr_to_write, "Wrote");
768out_clean:
769	if (crc) {
770		if (crc->thr)
771			kthread_stop(crc->thr);
772		kfree(crc);
773	}
774	if (data) {
775		for (thr = 0; thr < nr_threads; thr++)
776			if (data[thr].thr)
777				kthread_stop(data[thr].thr);
778		vfree(data);
779	}
780	if (page) free_page((unsigned long)page);
781
782	return ret;
783}
784
785/**
786 *	enough_swap - Make sure we have enough swap to save the image.
787 *
788 *	Returns TRUE or FALSE after checking the total amount of swap
789 *	space avaiable from the resume partition.
790 */
791
792static int enough_swap(unsigned int nr_pages, unsigned int flags)
793{
794	unsigned int free_swap = count_swap_pages(root_swap, 1);
795	unsigned int required;
796
797	pr_debug("PM: Free swap pages: %u\n", free_swap);
798
799	required = PAGES_FOR_IO + nr_pages;
800	return free_swap > required;
801}
802
803/**
804 *	swsusp_write - Write entire image and metadata.
805 *	@flags: flags to pass to the "boot" kernel in the image header
806 *
807 *	It is important _NOT_ to umount filesystems at this point. We want
808 *	them synced (in case something goes wrong) but we DO not want to mark
809 *	filesystem clean: it is not. (And it does not matter, if we resume
810 *	correctly, we'll mark system clean, anyway.)
811 */
812
813int swsusp_write(unsigned int flags)
814{
815	struct swap_map_handle handle;
816	struct snapshot_handle snapshot;
817	struct swsusp_info *header;
818	unsigned long pages;
819	int error;
820
821	pages = snapshot_get_image_size();
822	error = get_swap_writer(&handle);
823	if (error) {
824		printk(KERN_ERR "PM: Cannot get swap writer\n");
825		return error;
826	}
827	if (flags & SF_NOCOMPRESS_MODE) {
828		if (!enough_swap(pages, flags)) {
829			printk(KERN_ERR "PM: Not enough free swap\n");
830			error = -ENOSPC;
831			goto out_finish;
832		}
833	}
834	memset(&snapshot, 0, sizeof(struct snapshot_handle));
835	error = snapshot_read_next(&snapshot);
836	if (error < PAGE_SIZE) {
837		if (error >= 0)
838			error = -EFAULT;
839
840		goto out_finish;
841	}
842	header = (struct swsusp_info *)data_of(snapshot);
843	error = swap_write_page(&handle, header, NULL);
844	if (!error) {
845		error = (flags & SF_NOCOMPRESS_MODE) ?
846			save_image(&handle, &snapshot, pages - 1) :
847			save_image_lzo(&handle, &snapshot, pages - 1);
848	}
849out_finish:
850	error = swap_writer_finish(&handle, flags, error);
851	return error;
852}
853
854/**
855 *	The following functions allow us to read data using a swap map
856 *	in a file-alike way
857 */
858
859static void release_swap_reader(struct swap_map_handle *handle)
860{
861	struct swap_map_page_list *tmp;
862
863	while (handle->maps) {
864		if (handle->maps->map)
865			free_page((unsigned long)handle->maps->map);
866		tmp = handle->maps;
867		handle->maps = handle->maps->next;
868		kfree(tmp);
869	}
870	handle->cur = NULL;
871}
872
873static int get_swap_reader(struct swap_map_handle *handle,
874		unsigned int *flags_p)
875{
876	int error;
877	struct swap_map_page_list *tmp, *last;
878	sector_t offset;
879
880	*flags_p = swsusp_header->flags;
881
882	if (!swsusp_header->image) /* how can this happen? */
883		return -EINVAL;
884
885	handle->cur = NULL;
886	last = handle->maps = NULL;
887	offset = swsusp_header->image;
888	while (offset) {
889		tmp = kmalloc(sizeof(*handle->maps), GFP_KERNEL);
890		if (!tmp) {
891			release_swap_reader(handle);
892			return -ENOMEM;
893		}
894		memset(tmp, 0, sizeof(*tmp));
895		if (!handle->maps)
896			handle->maps = tmp;
897		if (last)
898			last->next = tmp;
899		last = tmp;
900
901		tmp->map = (struct swap_map_page *)
902		           __get_free_page(__GFP_WAIT | __GFP_HIGH);
903		if (!tmp->map) {
904			release_swap_reader(handle);
905			return -ENOMEM;
906		}
907
908		error = hib_bio_read_page(offset, tmp->map, NULL);
909		if (error) {
910			release_swap_reader(handle);
911			return error;
912		}
913		offset = tmp->map->next_swap;
914	}
915	handle->k = 0;
916	handle->cur = handle->maps->map;
917	return 0;
918}
919
920static int swap_read_page(struct swap_map_handle *handle, void *buf,
921				struct bio **bio_chain)
922{
923	sector_t offset;
924	int error;
925	struct swap_map_page_list *tmp;
926
927	if (!handle->cur)
928		return -EINVAL;
929	offset = handle->cur->entries[handle->k];
930	if (!offset)
931		return -EFAULT;
932	error = hib_bio_read_page(offset, buf, bio_chain);
933	if (error)
934		return error;
935	if (++handle->k >= MAP_PAGE_ENTRIES) {
936		handle->k = 0;
937		free_page((unsigned long)handle->maps->map);
938		tmp = handle->maps;
939		handle->maps = handle->maps->next;
940		kfree(tmp);
941		if (!handle->maps)
942			release_swap_reader(handle);
943		else
944			handle->cur = handle->maps->map;
945	}
946	return error;
947}
948
949static int swap_reader_finish(struct swap_map_handle *handle)
950{
951	release_swap_reader(handle);
952
953	return 0;
954}
955
956/**
957 *	load_image - load the image using the swap map handle
958 *	@handle and the snapshot handle @snapshot
959 *	(assume there are @nr_pages pages to load)
960 */
961
962static int load_image(struct swap_map_handle *handle,
963                      struct snapshot_handle *snapshot,
964                      unsigned int nr_to_read)
965{
966	unsigned int m;
967	int ret = 0;
968	struct timeval start;
969	struct timeval stop;
970	struct bio *bio;
971	int err2;
972	unsigned nr_pages;
973
974	printk(KERN_INFO "PM: Loading image data pages (%u pages)...\n",
975		nr_to_read);
976	m = nr_to_read / 10;
977	if (!m)
978		m = 1;
979	nr_pages = 0;
980	bio = NULL;
981	do_gettimeofday(&start);
982	for ( ; ; ) {
983		ret = snapshot_write_next(snapshot);
984		if (ret <= 0)
985			break;
986		ret = swap_read_page(handle, data_of(*snapshot), &bio);
987		if (ret)
988			break;
989		if (snapshot->sync_read)
990			ret = hib_wait_on_bio_chain(&bio);
991		if (ret)
992			break;
993		if (!(nr_pages % m))
994			printk(KERN_INFO "PM: Image loading progress: %3d%%\n",
995			       nr_pages / m * 10);
996		nr_pages++;
997	}
998	err2 = hib_wait_on_bio_chain(&bio);
999	do_gettimeofday(&stop);
1000	if (!ret)
1001		ret = err2;
1002	if (!ret) {
1003		printk(KERN_INFO "PM: Image loading done.\n");
1004		snapshot_write_finalize(snapshot);
1005		if (!snapshot_image_loaded(snapshot))
1006			ret = -ENODATA;
1007	}
1008	swsusp_show_speed(&start, &stop, nr_to_read, "Read");
1009	return ret;
1010}
1011
1012/**
1013 * Structure used for LZO data decompression.
1014 */
1015struct dec_data {
1016	struct task_struct *thr;                  /* thread */
1017	atomic_t ready;                           /* ready to start flag */
1018	atomic_t stop;                            /* ready to stop flag */
1019	int ret;                                  /* return code */
1020	wait_queue_head_t go;                     /* start decompression */
1021	wait_queue_head_t done;                   /* decompression done */
1022	size_t unc_len;                           /* uncompressed length */
1023	size_t cmp_len;                           /* compressed length */
1024	unsigned char unc[LZO_UNC_SIZE];          /* uncompressed buffer */
1025	unsigned char cmp[LZO_CMP_SIZE];          /* compressed buffer */
1026};
1027
1028/**
1029 * Deompression function that runs in its own thread.
1030 */
1031static int lzo_decompress_threadfn(void *data)
1032{
1033	struct dec_data *d = data;
1034
1035	while (1) {
1036		wait_event(d->go, atomic_read(&d->ready) ||
1037		                  kthread_should_stop());
1038		if (kthread_should_stop()) {
1039			d->thr = NULL;
1040			d->ret = -1;
1041			atomic_set(&d->stop, 1);
1042			wake_up(&d->done);
1043			break;
1044		}
1045		atomic_set(&d->ready, 0);
1046
1047		d->unc_len = LZO_UNC_SIZE;
1048		d->ret = lzo1x_decompress_safe(d->cmp + LZO_HEADER, d->cmp_len,
1049		                               d->unc, &d->unc_len);
1050		atomic_set(&d->stop, 1);
1051		wake_up(&d->done);
1052	}
1053	return 0;
1054}
1055
1056/**
1057 * load_image_lzo - Load compressed image data and decompress them with LZO.
1058 * @handle: Swap map handle to use for loading data.
1059 * @snapshot: Image to copy uncompressed data into.
1060 * @nr_to_read: Number of pages to load.
1061 */
1062static int load_image_lzo(struct swap_map_handle *handle,
1063                          struct snapshot_handle *snapshot,
1064                          unsigned int nr_to_read)
1065{
1066	unsigned int m;
1067	int ret = 0;
1068	int eof = 0;
1069	struct bio *bio;
1070	struct timeval start;
1071	struct timeval stop;
1072	unsigned nr_pages;
1073	size_t off;
1074	unsigned i, thr, run_threads, nr_threads;
1075	unsigned ring = 0, pg = 0, ring_size = 0,
1076	         have = 0, want, need, asked = 0;
1077	unsigned long read_pages = 0;
1078	unsigned char **page = NULL;
1079	struct dec_data *data = NULL;
1080	struct crc_data *crc = NULL;
1081
1082	/*
1083	 * We'll limit the number of threads for decompression to limit memory
1084	 * footprint.
1085	 */
1086	nr_threads = num_online_cpus() - 1;
1087	nr_threads = clamp_val(nr_threads, 1, LZO_THREADS);
1088
1089	page = vmalloc(sizeof(*page) * LZO_MAX_RD_PAGES);
1090	if (!page) {
1091		printk(KERN_ERR "PM: Failed to allocate LZO page\n");
1092		ret = -ENOMEM;
1093		goto out_clean;
1094	}
1095
1096	data = vmalloc(sizeof(*data) * nr_threads);
1097	if (!data) {
1098		printk(KERN_ERR "PM: Failed to allocate LZO data\n");
1099		ret = -ENOMEM;
1100		goto out_clean;
1101	}
1102	for (thr = 0; thr < nr_threads; thr++)
1103		memset(&data[thr], 0, offsetof(struct dec_data, go));
1104
1105	crc = kmalloc(sizeof(*crc), GFP_KERNEL);
1106	if (!crc) {
1107		printk(KERN_ERR "PM: Failed to allocate crc\n");
1108		ret = -ENOMEM;
1109		goto out_clean;
1110	}
1111	memset(crc, 0, offsetof(struct crc_data, go));
1112
1113	/*
1114	 * Start the decompression threads.
1115	 */
1116	for (thr = 0; thr < nr_threads; thr++) {
1117		init_waitqueue_head(&data[thr].go);
1118		init_waitqueue_head(&data[thr].done);
1119
1120		data[thr].thr = kthread_run(lzo_decompress_threadfn,
1121		                            &data[thr],
1122		                            "image_decompress/%u", thr);
1123		if (IS_ERR(data[thr].thr)) {
1124			data[thr].thr = NULL;
1125			printk(KERN_ERR
1126			       "PM: Cannot start decompression threads\n");
1127			ret = -ENOMEM;
1128			goto out_clean;
1129		}
1130	}
1131
1132	/*
1133	 * Start the CRC32 thread.
1134	 */
1135	init_waitqueue_head(&crc->go);
1136	init_waitqueue_head(&crc->done);
1137
1138	handle->crc32 = 0;
1139	crc->crc32 = &handle->crc32;
1140	for (thr = 0; thr < nr_threads; thr++) {
1141		crc->unc[thr] = data[thr].unc;
1142		crc->unc_len[thr] = &data[thr].unc_len;
1143	}
1144
1145	crc->thr = kthread_run(crc32_threadfn, crc, "image_crc32");
1146	if (IS_ERR(crc->thr)) {
1147		crc->thr = NULL;
1148		printk(KERN_ERR "PM: Cannot start CRC32 thread\n");
1149		ret = -ENOMEM;
1150		goto out_clean;
1151	}
1152
1153	/*
1154	 * Set the number of pages for read buffering.
1155	 * This is complete guesswork, because we'll only know the real
1156	 * picture once prepare_image() is called, which is much later on
1157	 * during the image load phase. We'll assume the worst case and
1158	 * say that none of the image pages are from high memory.
1159	 */
1160	if (low_free_pages() > snapshot_get_image_size())
1161		read_pages = (low_free_pages() - snapshot_get_image_size()) / 2;
1162	read_pages = clamp_val(read_pages, LZO_MIN_RD_PAGES, LZO_MAX_RD_PAGES);
1163
1164	for (i = 0; i < read_pages; i++) {
1165		page[i] = (void *)__get_free_page(i < LZO_CMP_PAGES ?
1166		                                  __GFP_WAIT | __GFP_HIGH :
1167		                                  __GFP_WAIT | __GFP_NOWARN |
1168		                                  __GFP_NORETRY);
1169
1170		if (!page[i]) {
1171			if (i < LZO_CMP_PAGES) {
1172				ring_size = i;
1173				printk(KERN_ERR
1174				       "PM: Failed to allocate LZO pages\n");
1175				ret = -ENOMEM;
1176				goto out_clean;
1177			} else {
1178				break;
1179			}
1180		}
1181	}
1182	want = ring_size = i;
1183
1184	printk(KERN_INFO
1185		"PM: Using %u thread(s) for decompression.\n"
1186		"PM: Loading and decompressing image data (%u pages)...\n",
1187		nr_threads, nr_to_read);
1188	m = nr_to_read / 10;
1189	if (!m)
1190		m = 1;
1191	nr_pages = 0;
1192	bio = NULL;
1193	do_gettimeofday(&start);
1194
1195	ret = snapshot_write_next(snapshot);
1196	if (ret <= 0)
1197		goto out_finish;
1198
1199	for(;;) {
1200		for (i = 0; !eof && i < want; i++) {
1201			ret = swap_read_page(handle, page[ring], &bio);
1202			if (ret) {
1203				/*
1204				 * On real read error, finish. On end of data,
1205				 * set EOF flag and just exit the read loop.
1206				 */
1207				if (handle->cur &&
1208				    handle->cur->entries[handle->k]) {
1209					goto out_finish;
1210				} else {
1211					eof = 1;
1212					break;
1213				}
1214			}
1215			if (++ring >= ring_size)
1216				ring = 0;
1217		}
1218		asked += i;
1219		want -= i;
1220
1221		/*
1222		 * We are out of data, wait for some more.
1223		 */
1224		if (!have) {
1225			if (!asked)
1226				break;
1227
1228			ret = hib_wait_on_bio_chain(&bio);
1229			if (ret)
1230				goto out_finish;
1231			have += asked;
1232			asked = 0;
1233			if (eof)
1234				eof = 2;
1235		}
1236
1237		if (crc->run_threads) {
1238			wait_event(crc->done, atomic_read(&crc->stop));
1239			atomic_set(&crc->stop, 0);
1240			crc->run_threads = 0;
1241		}
1242
1243		for (thr = 0; have && thr < nr_threads; thr++) {
1244			data[thr].cmp_len = *(size_t *)page[pg];
1245			if (unlikely(!data[thr].cmp_len ||
1246			             data[thr].cmp_len >
1247			             lzo1x_worst_compress(LZO_UNC_SIZE))) {
1248				printk(KERN_ERR
1249				       "PM: Invalid LZO compressed length\n");
1250				ret = -1;
1251				goto out_finish;
1252			}
1253
1254			need = DIV_ROUND_UP(data[thr].cmp_len + LZO_HEADER,
1255			                    PAGE_SIZE);
1256			if (need > have) {
1257				if (eof > 1) {
1258					ret = -1;
1259					goto out_finish;
1260				}
1261				break;
1262			}
1263
1264			for (off = 0;
1265			     off < LZO_HEADER + data[thr].cmp_len;
1266			     off += PAGE_SIZE) {
1267				memcpy(data[thr].cmp + off,
1268				       page[pg], PAGE_SIZE);
1269				have--;
1270				want++;
1271				if (++pg >= ring_size)
1272					pg = 0;
1273			}
1274
1275			atomic_set(&data[thr].ready, 1);
1276			wake_up(&data[thr].go);
1277		}
1278
1279		/*
1280		 * Wait for more data while we are decompressing.
1281		 */
1282		if (have < LZO_CMP_PAGES && asked) {
1283			ret = hib_wait_on_bio_chain(&bio);
1284			if (ret)
1285				goto out_finish;
1286			have += asked;
1287			asked = 0;
1288			if (eof)
1289				eof = 2;
1290		}
1291
1292		for (run_threads = thr, thr = 0; thr < run_threads; thr++) {
1293			wait_event(data[thr].done,
1294			           atomic_read(&data[thr].stop));
1295			atomic_set(&data[thr].stop, 0);
1296
1297			ret = data[thr].ret;
1298
1299			if (ret < 0) {
1300				printk(KERN_ERR
1301				       "PM: LZO decompression failed\n");
1302				goto out_finish;
1303			}
1304
1305			if (unlikely(!data[thr].unc_len ||
1306			             data[thr].unc_len > LZO_UNC_SIZE ||
1307			             data[thr].unc_len & (PAGE_SIZE - 1))) {
1308				printk(KERN_ERR
1309				       "PM: Invalid LZO uncompressed length\n");
1310				ret = -1;
1311				goto out_finish;
1312			}
1313
1314			for (off = 0;
1315			     off < data[thr].unc_len; off += PAGE_SIZE) {
1316				memcpy(data_of(*snapshot),
1317				       data[thr].unc + off, PAGE_SIZE);
1318
1319				if (!(nr_pages % m))
1320					printk(KERN_INFO
1321					       "PM: Image loading progress: "
1322					       "%3d%%\n",
1323					       nr_pages / m * 10);
1324				nr_pages++;
1325
1326				ret = snapshot_write_next(snapshot);
1327				if (ret <= 0) {
1328					crc->run_threads = thr + 1;
1329					atomic_set(&crc->ready, 1);
1330					wake_up(&crc->go);
1331					goto out_finish;
1332				}
1333			}
1334		}
1335
1336		crc->run_threads = thr;
1337		atomic_set(&crc->ready, 1);
1338		wake_up(&crc->go);
1339	}
1340
1341out_finish:
1342	if (crc->run_threads) {
1343		wait_event(crc->done, atomic_read(&crc->stop));
1344		atomic_set(&crc->stop, 0);
1345	}
1346	do_gettimeofday(&stop);
1347	if (!ret) {
1348		printk(KERN_INFO "PM: Image loading done.\n");
1349		snapshot_write_finalize(snapshot);
1350		if (!snapshot_image_loaded(snapshot))
1351			ret = -ENODATA;
1352		if (!ret) {
1353			if (swsusp_header->flags & SF_CRC32_MODE) {
1354				if(handle->crc32 != swsusp_header->crc32) {
1355					printk(KERN_ERR
1356					       "PM: Invalid image CRC32!\n");
1357					ret = -ENODATA;
1358				}
1359			}
1360		}
1361	}
1362	swsusp_show_speed(&start, &stop, nr_to_read, "Read");
1363out_clean:
1364	for (i = 0; i < ring_size; i++)
1365		free_page((unsigned long)page[i]);
1366	if (crc) {
1367		if (crc->thr)
1368			kthread_stop(crc->thr);
1369		kfree(crc);
1370	}
1371	if (data) {
1372		for (thr = 0; thr < nr_threads; thr++)
1373			if (data[thr].thr)
1374				kthread_stop(data[thr].thr);
1375		vfree(data);
1376	}
1377	if (page) vfree(page);
1378
1379	return ret;
1380}
1381
1382/**
1383 *	swsusp_read - read the hibernation image.
1384 *	@flags_p: flags passed by the "frozen" kernel in the image header should
1385 *		  be written into this memory location
1386 */
1387
1388int swsusp_read(unsigned int *flags_p)
1389{
1390	int error;
1391	struct swap_map_handle handle;
1392	struct snapshot_handle snapshot;
1393	struct swsusp_info *header;
1394
1395	memset(&snapshot, 0, sizeof(struct snapshot_handle));
1396	error = snapshot_write_next(&snapshot);
1397	if (error < PAGE_SIZE)
1398		return error < 0 ? error : -EFAULT;
1399	header = (struct swsusp_info *)data_of(snapshot);
1400	error = get_swap_reader(&handle, flags_p);
1401	if (error)
1402		goto end;
1403	if (!error)
1404		error = swap_read_page(&handle, header, NULL);
1405	if (!error) {
1406		error = (*flags_p & SF_NOCOMPRESS_MODE) ?
1407			load_image(&handle, &snapshot, header->pages - 1) :
1408			load_image_lzo(&handle, &snapshot, header->pages - 1);
1409	}
1410	swap_reader_finish(&handle);
1411end:
1412	if (!error)
1413		pr_debug("PM: Image successfully loaded\n");
1414	else
1415		pr_debug("PM: Error %d resuming\n", error);
1416	return error;
1417}
1418
1419/**
1420 *      swsusp_check - Check for swsusp signature in the resume device
1421 */
1422
1423int swsusp_check(void)
1424{
1425	int error;
1426
1427	hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device,
1428					    FMODE_READ, NULL);
1429	if (!IS_ERR(hib_resume_bdev)) {
1430		set_blocksize(hib_resume_bdev, PAGE_SIZE);
1431		clear_page(swsusp_header);
1432		error = hib_bio_read_page(swsusp_resume_block,
1433					swsusp_header, NULL);
1434		if (error)
1435			goto put;
1436
1437		if (!memcmp(HIBERNATE_SIG, swsusp_header->sig, 10)) {
1438			memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10);
1439			/* Reset swap signature now */
1440			error = hib_bio_write_page(swsusp_resume_block,
1441						swsusp_header, NULL);
1442		} else {
1443			error = -EINVAL;
1444		}
1445
1446put:
1447		if (error)
1448			blkdev_put(hib_resume_bdev, FMODE_READ);
1449		else
1450			pr_debug("PM: Image signature found, resuming\n");
1451	} else {
1452		error = PTR_ERR(hib_resume_bdev);
1453	}
1454
1455	if (error)
1456		pr_debug("PM: Image not found (code %d)\n", error);
1457
1458	return error;
1459}
1460
1461/**
1462 *	swsusp_close - close swap device.
1463 */
1464
1465void swsusp_close(fmode_t mode)
1466{
1467	if (IS_ERR(hib_resume_bdev)) {
1468		pr_debug("PM: Image device not initialised\n");
1469		return;
1470	}
1471
1472	blkdev_put(hib_resume_bdev, mode);
1473}
1474
1475/**
1476 *      swsusp_unmark - Unmark swsusp signature in the resume device
1477 */
1478
1479#ifdef CONFIG_SUSPEND
1480int swsusp_unmark(void)
1481{
1482	int error;
1483
1484	hib_bio_read_page(swsusp_resume_block, swsusp_header, NULL);
1485	if (!memcmp(HIBERNATE_SIG,swsusp_header->sig, 10)) {
1486		memcpy(swsusp_header->sig,swsusp_header->orig_sig, 10);
1487		error = hib_bio_write_page(swsusp_resume_block,
1488					swsusp_header, NULL);
1489	} else {
1490		printk(KERN_ERR "PM: Cannot find swsusp signature!\n");
1491		error = -ENODEV;
1492	}
1493
1494	/*
1495	 * We just returned from suspend, we don't need the image any more.
1496	 */
1497	free_all_swap_pages(root_swap);
1498
1499	return error;
1500}
1501#endif
1502
1503static int swsusp_header_init(void)
1504{
1505	swsusp_header = (struct swsusp_header*) __get_free_page(GFP_KERNEL);
1506	if (!swsusp_header)
1507		panic("Could not allocate memory for swsusp_header\n");
1508	return 0;
1509}
1510
1511core_initcall(swsusp_header_init);
1512