]> Pileus Git - ~andy/linux/blob - fs/btrfs/zlib.c
btrfs: Allow to add new compression algorithm
[~andy/linux] / fs / btrfs / zlib.c
1 /*
2  * Copyright (C) 2008 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  *
18  * Based on jffs2 zlib code:
19  * Copyright © 2001-2007 Red Hat, Inc.
20  * Created by David Woodhouse <dwmw2@infradead.org>
21  */
22
23 #include <linux/kernel.h>
24 #include <linux/slab.h>
25 #include <linux/zlib.h>
26 #include <linux/zutil.h>
27 #include <linux/vmalloc.h>
28 #include <linux/init.h>
29 #include <linux/err.h>
30 #include <linux/sched.h>
31 #include <linux/pagemap.h>
32 #include <linux/bio.h>
33 #include "compression.h"
34
35 struct workspace {
36         z_stream inf_strm;
37         z_stream def_strm;
38         char *buf;
39         struct list_head list;
40 };
41
42 static void zlib_free_workspace(struct list_head *ws)
43 {
44         struct workspace *workspace = list_entry(ws, struct workspace, list);
45
46         vfree(workspace->def_strm.workspace);
47         vfree(workspace->inf_strm.workspace);
48         kfree(workspace->buf);
49         kfree(workspace);
50 }
51
52 static struct list_head *zlib_alloc_workspace(void)
53 {
54         struct workspace *workspace;
55
56         workspace = kzalloc(sizeof(*workspace), GFP_NOFS);
57         if (!workspace)
58                 return ERR_PTR(-ENOMEM);
59
60         workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize());
61         workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize());
62         workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS);
63         if (!workspace->def_strm.workspace ||
64             !workspace->inf_strm.workspace || !workspace->buf)
65                 goto fail;
66
67         INIT_LIST_HEAD(&workspace->list);
68
69         return &workspace->list;
70 fail:
71         zlib_free_workspace(&workspace->list);
72         return ERR_PTR(-ENOMEM);
73 }
74
75 static int zlib_compress_pages(struct list_head *ws,
76                                struct address_space *mapping,
77                                u64 start, unsigned long len,
78                                struct page **pages,
79                                unsigned long nr_dest_pages,
80                                unsigned long *out_pages,
81                                unsigned long *total_in,
82                                unsigned long *total_out,
83                                unsigned long max_out)
84 {
85         struct workspace *workspace = list_entry(ws, struct workspace, list);
86         int ret;
87         char *data_in;
88         char *cpage_out;
89         int nr_pages = 0;
90         struct page *in_page = NULL;
91         struct page *out_page = NULL;
92         unsigned long bytes_left;
93
94         *out_pages = 0;
95         *total_out = 0;
96         *total_in = 0;
97
98         if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) {
99                 printk(KERN_WARNING "deflateInit failed\n");
100                 ret = -1;
101                 goto out;
102         }
103
104         workspace->def_strm.total_in = 0;
105         workspace->def_strm.total_out = 0;
106
107         in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
108         data_in = kmap(in_page);
109
110         out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
111         if (out_page == NULL) {
112                 ret = -1;
113                 goto out;
114         }
115         cpage_out = kmap(out_page);
116         pages[0] = out_page;
117         nr_pages = 1;
118
119         workspace->def_strm.next_in = data_in;
120         workspace->def_strm.next_out = cpage_out;
121         workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
122         workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE);
123
124         while (workspace->def_strm.total_in < len) {
125                 ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH);
126                 if (ret != Z_OK) {
127                         printk(KERN_DEBUG "btrfs deflate in loop returned %d\n",
128                                ret);
129                         zlib_deflateEnd(&workspace->def_strm);
130                         ret = -1;
131                         goto out;
132                 }
133
134                 /* we're making it bigger, give up */
135                 if (workspace->def_strm.total_in > 8192 &&
136                     workspace->def_strm.total_in <
137                     workspace->def_strm.total_out) {
138                         ret = -1;
139                         goto out;
140                 }
141                 /* we need another page for writing out.  Test this
142                  * before the total_in so we will pull in a new page for
143                  * the stream end if required
144                  */
145                 if (workspace->def_strm.avail_out == 0) {
146                         kunmap(out_page);
147                         if (nr_pages == nr_dest_pages) {
148                                 out_page = NULL;
149                                 ret = -1;
150                                 goto out;
151                         }
152                         out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
153                         if (out_page == NULL) {
154                                 ret = -1;
155                                 goto out;
156                         }
157                         cpage_out = kmap(out_page);
158                         pages[nr_pages] = out_page;
159                         nr_pages++;
160                         workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
161                         workspace->def_strm.next_out = cpage_out;
162                 }
163                 /* we're all done */
164                 if (workspace->def_strm.total_in >= len)
165                         break;
166
167                 /* we've read in a full page, get a new one */
168                 if (workspace->def_strm.avail_in == 0) {
169                         if (workspace->def_strm.total_out > max_out)
170                                 break;
171
172                         bytes_left = len - workspace->def_strm.total_in;
173                         kunmap(in_page);
174                         page_cache_release(in_page);
175
176                         start += PAGE_CACHE_SIZE;
177                         in_page = find_get_page(mapping,
178                                                 start >> PAGE_CACHE_SHIFT);
179                         data_in = kmap(in_page);
180                         workspace->def_strm.avail_in = min(bytes_left,
181                                                            PAGE_CACHE_SIZE);
182                         workspace->def_strm.next_in = data_in;
183                 }
184         }
185         workspace->def_strm.avail_in = 0;
186         ret = zlib_deflate(&workspace->def_strm, Z_FINISH);
187         zlib_deflateEnd(&workspace->def_strm);
188
189         if (ret != Z_STREAM_END) {
190                 ret = -1;
191                 goto out;
192         }
193
194         if (workspace->def_strm.total_out >= workspace->def_strm.total_in) {
195                 ret = -1;
196                 goto out;
197         }
198
199         ret = 0;
200         *total_out = workspace->def_strm.total_out;
201         *total_in = workspace->def_strm.total_in;
202 out:
203         *out_pages = nr_pages;
204         if (out_page)
205                 kunmap(out_page);
206
207         if (in_page) {
208                 kunmap(in_page);
209                 page_cache_release(in_page);
210         }
211         return ret;
212 }
213
214 static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in,
215                                   u64 disk_start,
216                                   struct bio_vec *bvec,
217                                   int vcnt,
218                                   size_t srclen)
219 {
220         struct workspace *workspace = list_entry(ws, struct workspace, list);
221         int ret = 0;
222         int wbits = MAX_WBITS;
223         char *data_in;
224         size_t total_out = 0;
225         unsigned long page_bytes_left;
226         unsigned long page_in_index = 0;
227         unsigned long page_out_index = 0;
228         struct page *page_out;
229         unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) /
230                                         PAGE_CACHE_SIZE;
231         unsigned long buf_start;
232         unsigned long buf_offset;
233         unsigned long bytes;
234         unsigned long working_bytes;
235         unsigned long pg_offset;
236         unsigned long start_byte;
237         unsigned long current_buf_start;
238         char *kaddr;
239
240         data_in = kmap(pages_in[page_in_index]);
241         workspace->inf_strm.next_in = data_in;
242         workspace->inf_strm.avail_in = min_t(size_t, srclen, PAGE_CACHE_SIZE);
243         workspace->inf_strm.total_in = 0;
244
245         workspace->inf_strm.total_out = 0;
246         workspace->inf_strm.next_out = workspace->buf;
247         workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
248         page_out = bvec[page_out_index].bv_page;
249         page_bytes_left = PAGE_CACHE_SIZE;
250         pg_offset = 0;
251
252         /* If it's deflate, and it's got no preset dictionary, then
253            we can tell zlib to skip the adler32 check. */
254         if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
255             ((data_in[0] & 0x0f) == Z_DEFLATED) &&
256             !(((data_in[0]<<8) + data_in[1]) % 31)) {
257
258                 wbits = -((data_in[0] >> 4) + 8);
259                 workspace->inf_strm.next_in += 2;
260                 workspace->inf_strm.avail_in -= 2;
261         }
262
263         if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
264                 printk(KERN_WARNING "inflateInit failed\n");
265                 return -1;
266         }
267         while (workspace->inf_strm.total_in < srclen) {
268                 ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
269                 if (ret != Z_OK && ret != Z_STREAM_END)
270                         break;
271                 /*
272                  * buf start is the byte offset we're of the start of
273                  * our workspace buffer
274                  */
275                 buf_start = total_out;
276
277                 /* total_out is the last byte of the workspace buffer */
278                 total_out = workspace->inf_strm.total_out;
279
280                 working_bytes = total_out - buf_start;
281
282                 /*
283                  * start byte is the first byte of the page we're currently
284                  * copying into relative to the start of the compressed data.
285                  */
286                 start_byte = page_offset(page_out) - disk_start;
287
288                 if (working_bytes == 0) {
289                         /* we didn't make progress in this inflate
290                          * call, we're done
291                          */
292                         if (ret != Z_STREAM_END)
293                                 ret = -1;
294                         break;
295                 }
296
297                 /* we haven't yet hit data corresponding to this page */
298                 if (total_out <= start_byte)
299                         goto next;
300
301                 /*
302                  * the start of the data we care about is offset into
303                  * the middle of our working buffer
304                  */
305                 if (total_out > start_byte && buf_start < start_byte) {
306                         buf_offset = start_byte - buf_start;
307                         working_bytes -= buf_offset;
308                 } else {
309                         buf_offset = 0;
310                 }
311                 current_buf_start = buf_start;
312
313                 /* copy bytes from the working buffer into the pages */
314                 while (working_bytes > 0) {
315                         bytes = min(PAGE_CACHE_SIZE - pg_offset,
316                                     PAGE_CACHE_SIZE - buf_offset);
317                         bytes = min(bytes, working_bytes);
318                         kaddr = kmap_atomic(page_out, KM_USER0);
319                         memcpy(kaddr + pg_offset, workspace->buf + buf_offset,
320                                bytes);
321                         kunmap_atomic(kaddr, KM_USER0);
322                         flush_dcache_page(page_out);
323
324                         pg_offset += bytes;
325                         page_bytes_left -= bytes;
326                         buf_offset += bytes;
327                         working_bytes -= bytes;
328                         current_buf_start += bytes;
329
330                         /* check if we need to pick another page */
331                         if (page_bytes_left == 0) {
332                                 page_out_index++;
333                                 if (page_out_index >= vcnt) {
334                                         ret = 0;
335                                         goto done;
336                                 }
337
338                                 page_out = bvec[page_out_index].bv_page;
339                                 pg_offset = 0;
340                                 page_bytes_left = PAGE_CACHE_SIZE;
341                                 start_byte = page_offset(page_out) - disk_start;
342
343                                 /*
344                                  * make sure our new page is covered by this
345                                  * working buffer
346                                  */
347                                 if (total_out <= start_byte)
348                                         goto next;
349
350                                 /* the next page in the biovec might not
351                                  * be adjacent to the last page, but it
352                                  * might still be found inside this working
353                                  * buffer.  bump our offset pointer
354                                  */
355                                 if (total_out > start_byte &&
356                                     current_buf_start < start_byte) {
357                                         buf_offset = start_byte - buf_start;
358                                         working_bytes = total_out - start_byte;
359                                         current_buf_start = buf_start +
360                                                 buf_offset;
361                                 }
362                         }
363                 }
364 next:
365                 workspace->inf_strm.next_out = workspace->buf;
366                 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
367
368                 if (workspace->inf_strm.avail_in == 0) {
369                         unsigned long tmp;
370                         kunmap(pages_in[page_in_index]);
371                         page_in_index++;
372                         if (page_in_index >= total_pages_in) {
373                                 data_in = NULL;
374                                 break;
375                         }
376                         data_in = kmap(pages_in[page_in_index]);
377                         workspace->inf_strm.next_in = data_in;
378                         tmp = srclen - workspace->inf_strm.total_in;
379                         workspace->inf_strm.avail_in = min(tmp,
380                                                            PAGE_CACHE_SIZE);
381                 }
382         }
383         if (ret != Z_STREAM_END)
384                 ret = -1;
385         else
386                 ret = 0;
387 done:
388         zlib_inflateEnd(&workspace->inf_strm);
389         if (data_in)
390                 kunmap(pages_in[page_in_index]);
391         return ret;
392 }
393
394 static int zlib_decompress(struct list_head *ws, unsigned char *data_in,
395                            struct page *dest_page,
396                            unsigned long start_byte,
397                            size_t srclen, size_t destlen)
398 {
399         struct workspace *workspace = list_entry(ws, struct workspace, list);
400         int ret = 0;
401         int wbits = MAX_WBITS;
402         unsigned long bytes_left = destlen;
403         unsigned long total_out = 0;
404         char *kaddr;
405
406         workspace->inf_strm.next_in = data_in;
407         workspace->inf_strm.avail_in = srclen;
408         workspace->inf_strm.total_in = 0;
409
410         workspace->inf_strm.next_out = workspace->buf;
411         workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
412         workspace->inf_strm.total_out = 0;
413         /* If it's deflate, and it's got no preset dictionary, then
414            we can tell zlib to skip the adler32 check. */
415         if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
416             ((data_in[0] & 0x0f) == Z_DEFLATED) &&
417             !(((data_in[0]<<8) + data_in[1]) % 31)) {
418
419                 wbits = -((data_in[0] >> 4) + 8);
420                 workspace->inf_strm.next_in += 2;
421                 workspace->inf_strm.avail_in -= 2;
422         }
423
424         if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
425                 printk(KERN_WARNING "inflateInit failed\n");
426                 return -1;
427         }
428
429         while (bytes_left > 0) {
430                 unsigned long buf_start;
431                 unsigned long buf_offset;
432                 unsigned long bytes;
433                 unsigned long pg_offset = 0;
434
435                 ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
436                 if (ret != Z_OK && ret != Z_STREAM_END)
437                         break;
438
439                 buf_start = total_out;
440                 total_out = workspace->inf_strm.total_out;
441
442                 if (total_out == buf_start) {
443                         ret = -1;
444                         break;
445                 }
446
447                 if (total_out <= start_byte)
448                         goto next;
449
450                 if (total_out > start_byte && buf_start < start_byte)
451                         buf_offset = start_byte - buf_start;
452                 else
453                         buf_offset = 0;
454
455                 bytes = min(PAGE_CACHE_SIZE - pg_offset,
456                             PAGE_CACHE_SIZE - buf_offset);
457                 bytes = min(bytes, bytes_left);
458
459                 kaddr = kmap_atomic(dest_page, KM_USER0);
460                 memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes);
461                 kunmap_atomic(kaddr, KM_USER0);
462
463                 pg_offset += bytes;
464                 bytes_left -= bytes;
465 next:
466                 workspace->inf_strm.next_out = workspace->buf;
467                 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
468         }
469
470         if (ret != Z_STREAM_END && bytes_left != 0)
471                 ret = -1;
472         else
473                 ret = 0;
474
475         zlib_inflateEnd(&workspace->inf_strm);
476         return ret;
477 }
478
479 struct btrfs_compress_op btrfs_zlib_compress = {
480         .alloc_workspace        = zlib_alloc_workspace,
481         .free_workspace         = zlib_free_workspace,
482         .compress_pages         = zlib_compress_pages,
483         .decompress_biovec      = zlib_decompress_biovec,
484         .decompress             = zlib_decompress,
485 };