]> Pileus Git - ~andy/linux/blob - fs/btrfs/zlib.c
btrfs: Fix bugs in zlib workspace
[~andy/linux] / fs / btrfs / zlib.c
1 /*
2  * Copyright (C) 2008 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  *
18  * Based on jffs2 zlib code:
19  * Copyright © 2001-2007 Red Hat, Inc.
20  * Created by David Woodhouse <dwmw2@infradead.org>
21  */
22
23 #include <linux/kernel.h>
24 #include <linux/slab.h>
25 #include <linux/zlib.h>
26 #include <linux/zutil.h>
27 #include <linux/vmalloc.h>
28 #include <linux/init.h>
29 #include <linux/err.h>
30 #include <linux/sched.h>
31 #include <linux/pagemap.h>
32 #include <linux/bio.h>
33 #include "compression.h"
34
35 /* Plan: call deflate() with avail_in == *sourcelen,
36         avail_out = *dstlen - 12 and flush == Z_FINISH.
37         If it doesn't manage to finish, call it again with
38         avail_in == 0 and avail_out set to the remaining 12
39         bytes for it to clean up.
40    Q: Is 12 bytes sufficient?
41 */
42 #define STREAM_END_SPACE 12
43
44 struct workspace {
45         z_stream inf_strm;
46         z_stream def_strm;
47         char *buf;
48         struct list_head list;
49 };
50
51 static LIST_HEAD(idle_workspace);
52 static DEFINE_SPINLOCK(workspace_lock);
53 static unsigned long num_workspace;
54 static atomic_t alloc_workspace = ATOMIC_INIT(0);
55 static DECLARE_WAIT_QUEUE_HEAD(workspace_wait);
56
57 /*
58  * this finds an available zlib workspace or allocates a new one
59  * NULL or an ERR_PTR is returned if things go bad.
60  */
61 static struct workspace *find_zlib_workspace(void)
62 {
63         struct workspace *workspace;
64         int ret;
65         int cpus = num_online_cpus();
66
67 again:
68         spin_lock(&workspace_lock);
69         if (!list_empty(&idle_workspace)) {
70                 workspace = list_entry(idle_workspace.next, struct workspace,
71                                        list);
72                 list_del(&workspace->list);
73                 num_workspace--;
74                 spin_unlock(&workspace_lock);
75                 return workspace;
76
77         }
78         if (atomic_read(&alloc_workspace) > cpus) {
79                 DEFINE_WAIT(wait);
80
81                 spin_unlock(&workspace_lock);
82                 prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE);
83                 if (atomic_read(&alloc_workspace) > cpus && !num_workspace)
84                         schedule();
85                 finish_wait(&workspace_wait, &wait);
86                 goto again;
87         }
88         atomic_inc(&alloc_workspace);
89         spin_unlock(&workspace_lock);
90
91         workspace = kzalloc(sizeof(*workspace), GFP_NOFS);
92         if (!workspace) {
93                 ret = -ENOMEM;
94                 goto fail;
95         }
96
97         workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize());
98         if (!workspace->def_strm.workspace) {
99                 ret = -ENOMEM;
100                 goto fail;
101         }
102         workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize());
103         if (!workspace->inf_strm.workspace) {
104                 ret = -ENOMEM;
105                 goto fail_inflate;
106         }
107         workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS);
108         if (!workspace->buf) {
109                 ret = -ENOMEM;
110                 goto fail_kmalloc;
111         }
112         return workspace;
113
114 fail_kmalloc:
115         vfree(workspace->inf_strm.workspace);
116 fail_inflate:
117         vfree(workspace->def_strm.workspace);
118 fail:
119         kfree(workspace);
120         atomic_dec(&alloc_workspace);
121         wake_up(&workspace_wait);
122         return ERR_PTR(ret);
123 }
124
125 /*
126  * put a workspace struct back on the list or free it if we have enough
127  * idle ones sitting around
128  */
129 static int free_workspace(struct workspace *workspace)
130 {
131         spin_lock(&workspace_lock);
132         if (num_workspace < num_online_cpus()) {
133                 list_add_tail(&workspace->list, &idle_workspace);
134                 num_workspace++;
135                 spin_unlock(&workspace_lock);
136                 if (waitqueue_active(&workspace_wait))
137                         wake_up(&workspace_wait);
138                 return 0;
139         }
140         spin_unlock(&workspace_lock);
141         vfree(workspace->def_strm.workspace);
142         vfree(workspace->inf_strm.workspace);
143         kfree(workspace->buf);
144         kfree(workspace);
145
146         atomic_dec(&alloc_workspace);
147         if (waitqueue_active(&workspace_wait))
148                 wake_up(&workspace_wait);
149         return 0;
150 }
151
152 /*
153  * cleanup function for module exit
154  */
155 static void free_workspaces(void)
156 {
157         struct workspace *workspace;
158         while (!list_empty(&idle_workspace)) {
159                 workspace = list_entry(idle_workspace.next, struct workspace,
160                                        list);
161                 list_del(&workspace->list);
162                 vfree(workspace->def_strm.workspace);
163                 vfree(workspace->inf_strm.workspace);
164                 kfree(workspace->buf);
165                 kfree(workspace);
166                 atomic_dec(&alloc_workspace);
167         }
168 }
169
170 /*
171  * given an address space and start/len, compress the bytes.
172  *
173  * pages are allocated to hold the compressed result and stored
174  * in 'pages'
175  *
176  * out_pages is used to return the number of pages allocated.  There
177  * may be pages allocated even if we return an error
178  *
179  * total_in is used to return the number of bytes actually read.  It
180  * may be smaller then len if we had to exit early because we
181  * ran out of room in the pages array or because we cross the
182  * max_out threshold.
183  *
184  * total_out is used to return the total number of compressed bytes
185  *
186  * max_out tells us the max number of bytes that we're allowed to
187  * stuff into pages
188  */
189 int btrfs_zlib_compress_pages(struct address_space *mapping,
190                               u64 start, unsigned long len,
191                               struct page **pages,
192                               unsigned long nr_dest_pages,
193                               unsigned long *out_pages,
194                               unsigned long *total_in,
195                               unsigned long *total_out,
196                               unsigned long max_out)
197 {
198         int ret;
199         struct workspace *workspace;
200         char *data_in;
201         char *cpage_out;
202         int nr_pages = 0;
203         struct page *in_page = NULL;
204         struct page *out_page = NULL;
205         unsigned long bytes_left;
206
207         *out_pages = 0;
208         *total_out = 0;
209         *total_in = 0;
210
211         workspace = find_zlib_workspace();
212         if (IS_ERR(workspace))
213                 return -1;
214
215         if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) {
216                 printk(KERN_WARNING "deflateInit failed\n");
217                 ret = -1;
218                 goto out;
219         }
220
221         workspace->def_strm.total_in = 0;
222         workspace->def_strm.total_out = 0;
223
224         in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
225         data_in = kmap(in_page);
226
227         out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
228         cpage_out = kmap(out_page);
229         pages[0] = out_page;
230         nr_pages = 1;
231
232         workspace->def_strm.next_in = data_in;
233         workspace->def_strm.next_out = cpage_out;
234         workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
235         workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE);
236
237         while (workspace->def_strm.total_in < len) {
238                 ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH);
239                 if (ret != Z_OK) {
240                         printk(KERN_DEBUG "btrfs deflate in loop returned %d\n",
241                                ret);
242                         zlib_deflateEnd(&workspace->def_strm);
243                         ret = -1;
244                         goto out;
245                 }
246
247                 /* we're making it bigger, give up */
248                 if (workspace->def_strm.total_in > 8192 &&
249                     workspace->def_strm.total_in <
250                     workspace->def_strm.total_out) {
251                         ret = -1;
252                         goto out;
253                 }
254                 /* we need another page for writing out.  Test this
255                  * before the total_in so we will pull in a new page for
256                  * the stream end if required
257                  */
258                 if (workspace->def_strm.avail_out == 0) {
259                         kunmap(out_page);
260                         if (nr_pages == nr_dest_pages) {
261                                 out_page = NULL;
262                                 ret = -1;
263                                 goto out;
264                         }
265                         out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
266                         cpage_out = kmap(out_page);
267                         pages[nr_pages] = out_page;
268                         nr_pages++;
269                         workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
270                         workspace->def_strm.next_out = cpage_out;
271                 }
272                 /* we're all done */
273                 if (workspace->def_strm.total_in >= len)
274                         break;
275
276                 /* we've read in a full page, get a new one */
277                 if (workspace->def_strm.avail_in == 0) {
278                         if (workspace->def_strm.total_out > max_out)
279                                 break;
280
281                         bytes_left = len - workspace->def_strm.total_in;
282                         kunmap(in_page);
283                         page_cache_release(in_page);
284
285                         start += PAGE_CACHE_SIZE;
286                         in_page = find_get_page(mapping,
287                                                 start >> PAGE_CACHE_SHIFT);
288                         data_in = kmap(in_page);
289                         workspace->def_strm.avail_in = min(bytes_left,
290                                                            PAGE_CACHE_SIZE);
291                         workspace->def_strm.next_in = data_in;
292                 }
293         }
294         workspace->def_strm.avail_in = 0;
295         ret = zlib_deflate(&workspace->def_strm, Z_FINISH);
296         zlib_deflateEnd(&workspace->def_strm);
297
298         if (ret != Z_STREAM_END) {
299                 ret = -1;
300                 goto out;
301         }
302
303         if (workspace->def_strm.total_out >= workspace->def_strm.total_in) {
304                 ret = -1;
305                 goto out;
306         }
307
308         ret = 0;
309         *total_out = workspace->def_strm.total_out;
310         *total_in = workspace->def_strm.total_in;
311 out:
312         *out_pages = nr_pages;
313         if (out_page)
314                 kunmap(out_page);
315
316         if (in_page) {
317                 kunmap(in_page);
318                 page_cache_release(in_page);
319         }
320         free_workspace(workspace);
321         return ret;
322 }
323
324 /*
325  * pages_in is an array of pages with compressed data.
326  *
327  * disk_start is the starting logical offset of this array in the file
328  *
329  * bvec is a bio_vec of pages from the file that we want to decompress into
330  *
331  * vcnt is the count of pages in the biovec
332  *
333  * srclen is the number of bytes in pages_in
334  *
335  * The basic idea is that we have a bio that was created by readpages.
336  * The pages in the bio are for the uncompressed data, and they may not
337  * be contiguous.  They all correspond to the range of bytes covered by
338  * the compressed extent.
339  */
340 int btrfs_zlib_decompress_biovec(struct page **pages_in,
341                               u64 disk_start,
342                               struct bio_vec *bvec,
343                               int vcnt,
344                               size_t srclen)
345 {
346         int ret = 0;
347         int wbits = MAX_WBITS;
348         struct workspace *workspace;
349         char *data_in;
350         size_t total_out = 0;
351         unsigned long page_bytes_left;
352         unsigned long page_in_index = 0;
353         unsigned long page_out_index = 0;
354         struct page *page_out;
355         unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) /
356                                         PAGE_CACHE_SIZE;
357         unsigned long buf_start;
358         unsigned long buf_offset;
359         unsigned long bytes;
360         unsigned long working_bytes;
361         unsigned long pg_offset;
362         unsigned long start_byte;
363         unsigned long current_buf_start;
364         char *kaddr;
365
366         workspace = find_zlib_workspace();
367         if (IS_ERR(workspace))
368                 return -ENOMEM;
369
370         data_in = kmap(pages_in[page_in_index]);
371         workspace->inf_strm.next_in = data_in;
372         workspace->inf_strm.avail_in = min_t(size_t, srclen, PAGE_CACHE_SIZE);
373         workspace->inf_strm.total_in = 0;
374
375         workspace->inf_strm.total_out = 0;
376         workspace->inf_strm.next_out = workspace->buf;
377         workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
378         page_out = bvec[page_out_index].bv_page;
379         page_bytes_left = PAGE_CACHE_SIZE;
380         pg_offset = 0;
381
382         /* If it's deflate, and it's got no preset dictionary, then
383            we can tell zlib to skip the adler32 check. */
384         if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
385             ((data_in[0] & 0x0f) == Z_DEFLATED) &&
386             !(((data_in[0]<<8) + data_in[1]) % 31)) {
387
388                 wbits = -((data_in[0] >> 4) + 8);
389                 workspace->inf_strm.next_in += 2;
390                 workspace->inf_strm.avail_in -= 2;
391         }
392
393         if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
394                 printk(KERN_WARNING "inflateInit failed\n");
395                 ret = -1;
396                 goto out;
397         }
398         while (workspace->inf_strm.total_in < srclen) {
399                 ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
400                 if (ret != Z_OK && ret != Z_STREAM_END)
401                         break;
402                 /*
403                  * buf start is the byte offset we're of the start of
404                  * our workspace buffer
405                  */
406                 buf_start = total_out;
407
408                 /* total_out is the last byte of the workspace buffer */
409                 total_out = workspace->inf_strm.total_out;
410
411                 working_bytes = total_out - buf_start;
412
413                 /*
414                  * start byte is the first byte of the page we're currently
415                  * copying into relative to the start of the compressed data.
416                  */
417                 start_byte = page_offset(page_out) - disk_start;
418
419                 if (working_bytes == 0) {
420                         /* we didn't make progress in this inflate
421                          * call, we're done
422                          */
423                         if (ret != Z_STREAM_END)
424                                 ret = -1;
425                         break;
426                 }
427
428                 /* we haven't yet hit data corresponding to this page */
429                 if (total_out <= start_byte)
430                         goto next;
431
432                 /*
433                  * the start of the data we care about is offset into
434                  * the middle of our working buffer
435                  */
436                 if (total_out > start_byte && buf_start < start_byte) {
437                         buf_offset = start_byte - buf_start;
438                         working_bytes -= buf_offset;
439                 } else {
440                         buf_offset = 0;
441                 }
442                 current_buf_start = buf_start;
443
444                 /* copy bytes from the working buffer into the pages */
445                 while (working_bytes > 0) {
446                         bytes = min(PAGE_CACHE_SIZE - pg_offset,
447                                     PAGE_CACHE_SIZE - buf_offset);
448                         bytes = min(bytes, working_bytes);
449                         kaddr = kmap_atomic(page_out, KM_USER0);
450                         memcpy(kaddr + pg_offset, workspace->buf + buf_offset,
451                                bytes);
452                         kunmap_atomic(kaddr, KM_USER0);
453                         flush_dcache_page(page_out);
454
455                         pg_offset += bytes;
456                         page_bytes_left -= bytes;
457                         buf_offset += bytes;
458                         working_bytes -= bytes;
459                         current_buf_start += bytes;
460
461                         /* check if we need to pick another page */
462                         if (page_bytes_left == 0) {
463                                 page_out_index++;
464                                 if (page_out_index >= vcnt) {
465                                         ret = 0;
466                                         goto done;
467                                 }
468
469                                 page_out = bvec[page_out_index].bv_page;
470                                 pg_offset = 0;
471                                 page_bytes_left = PAGE_CACHE_SIZE;
472                                 start_byte = page_offset(page_out) - disk_start;
473
474                                 /*
475                                  * make sure our new page is covered by this
476                                  * working buffer
477                                  */
478                                 if (total_out <= start_byte)
479                                         goto next;
480
481                                 /* the next page in the biovec might not
482                                  * be adjacent to the last page, but it
483                                  * might still be found inside this working
484                                  * buffer.  bump our offset pointer
485                                  */
486                                 if (total_out > start_byte &&
487                                     current_buf_start < start_byte) {
488                                         buf_offset = start_byte - buf_start;
489                                         working_bytes = total_out - start_byte;
490                                         current_buf_start = buf_start +
491                                                 buf_offset;
492                                 }
493                         }
494                 }
495 next:
496                 workspace->inf_strm.next_out = workspace->buf;
497                 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
498
499                 if (workspace->inf_strm.avail_in == 0) {
500                         unsigned long tmp;
501                         kunmap(pages_in[page_in_index]);
502                         page_in_index++;
503                         if (page_in_index >= total_pages_in) {
504                                 data_in = NULL;
505                                 break;
506                         }
507                         data_in = kmap(pages_in[page_in_index]);
508                         workspace->inf_strm.next_in = data_in;
509                         tmp = srclen - workspace->inf_strm.total_in;
510                         workspace->inf_strm.avail_in = min(tmp,
511                                                            PAGE_CACHE_SIZE);
512                 }
513         }
514         if (ret != Z_STREAM_END)
515                 ret = -1;
516         else
517                 ret = 0;
518 done:
519         zlib_inflateEnd(&workspace->inf_strm);
520         if (data_in)
521                 kunmap(pages_in[page_in_index]);
522 out:
523         free_workspace(workspace);
524         return ret;
525 }
526
527 /*
528  * a less complex decompression routine.  Our compressed data fits in a
529  * single page, and we want to read a single page out of it.
530  * start_byte tells us the offset into the compressed data we're interested in
531  */
532 int btrfs_zlib_decompress(unsigned char *data_in,
533                           struct page *dest_page,
534                           unsigned long start_byte,
535                           size_t srclen, size_t destlen)
536 {
537         int ret = 0;
538         int wbits = MAX_WBITS;
539         struct workspace *workspace;
540         unsigned long bytes_left = destlen;
541         unsigned long total_out = 0;
542         char *kaddr;
543
544         if (destlen > PAGE_CACHE_SIZE)
545                 return -ENOMEM;
546
547         workspace = find_zlib_workspace();
548         if (IS_ERR(workspace))
549                 return -ENOMEM;
550
551         workspace->inf_strm.next_in = data_in;
552         workspace->inf_strm.avail_in = srclen;
553         workspace->inf_strm.total_in = 0;
554
555         workspace->inf_strm.next_out = workspace->buf;
556         workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
557         workspace->inf_strm.total_out = 0;
558         /* If it's deflate, and it's got no preset dictionary, then
559            we can tell zlib to skip the adler32 check. */
560         if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
561             ((data_in[0] & 0x0f) == Z_DEFLATED) &&
562             !(((data_in[0]<<8) + data_in[1]) % 31)) {
563
564                 wbits = -((data_in[0] >> 4) + 8);
565                 workspace->inf_strm.next_in += 2;
566                 workspace->inf_strm.avail_in -= 2;
567         }
568
569         if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
570                 printk(KERN_WARNING "inflateInit failed\n");
571                 ret = -1;
572                 goto out;
573         }
574
575         while (bytes_left > 0) {
576                 unsigned long buf_start;
577                 unsigned long buf_offset;
578                 unsigned long bytes;
579                 unsigned long pg_offset = 0;
580
581                 ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
582                 if (ret != Z_OK && ret != Z_STREAM_END)
583                         break;
584
585                 buf_start = total_out;
586                 total_out = workspace->inf_strm.total_out;
587
588                 if (total_out == buf_start) {
589                         ret = -1;
590                         break;
591                 }
592
593                 if (total_out <= start_byte)
594                         goto next;
595
596                 if (total_out > start_byte && buf_start < start_byte)
597                         buf_offset = start_byte - buf_start;
598                 else
599                         buf_offset = 0;
600
601                 bytes = min(PAGE_CACHE_SIZE - pg_offset,
602                             PAGE_CACHE_SIZE - buf_offset);
603                 bytes = min(bytes, bytes_left);
604
605                 kaddr = kmap_atomic(dest_page, KM_USER0);
606                 memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes);
607                 kunmap_atomic(kaddr, KM_USER0);
608
609                 pg_offset += bytes;
610                 bytes_left -= bytes;
611 next:
612                 workspace->inf_strm.next_out = workspace->buf;
613                 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
614         }
615
616         if (ret != Z_STREAM_END && bytes_left != 0)
617                 ret = -1;
618         else
619                 ret = 0;
620
621         zlib_inflateEnd(&workspace->inf_strm);
622 out:
623         free_workspace(workspace);
624         return ret;
625 }
626
627 void btrfs_zlib_exit(void)
628 {
629     free_workspaces();
630 }