X-Git-Url: http://pileus.org/git/?a=blobdiff_plain;f=fs%2Fxfs%2Fxfs_buf.c;h=d1edfa1a8112ec31a5d74cb679d6476f629c2660;hb=08023d6dbe840dc4271805a9ea376fcbdee9f744;hp=172d3cc8f8cb8ad6d588fea95819ecfded627167;hpb=84a442b9a16ee69243ce7fce5d6f6f9c3fbdee68;p=~andy%2Flinux diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 172d3cc8f8c..d1edfa1a811 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -164,14 +164,49 @@ xfs_buf_stale( ASSERT(atomic_read(&bp->b_hold) >= 1); } +static int +xfs_buf_get_maps( + struct xfs_buf *bp, + int map_count) +{ + ASSERT(bp->b_maps == NULL); + bp->b_map_count = map_count; + + if (map_count == 1) { + bp->b_maps = &bp->b_map; + return 0; + } + + bp->b_maps = kmem_zalloc(map_count * sizeof(struct xfs_buf_map), + KM_NOFS); + if (!bp->b_maps) + return ENOMEM; + return 0; +} + +/* + * Frees b_pages if it was allocated. + */ +static void +xfs_buf_free_maps( + struct xfs_buf *bp) +{ + if (bp->b_maps != &bp->b_map) { + kmem_free(bp->b_maps); + bp->b_maps = NULL; + } +} + struct xfs_buf * -xfs_buf_alloc( +_xfs_buf_alloc( struct xfs_buftarg *target, - xfs_daddr_t blkno, - size_t numblks, + struct xfs_buf_map *map, + int nmaps, xfs_buf_flags_t flags) { struct xfs_buf *bp; + int error; + int i; bp = kmem_zone_zalloc(xfs_buf_zone, KM_NOFS); if (unlikely(!bp)) @@ -192,23 +227,28 @@ xfs_buf_alloc( sema_init(&bp->b_sema, 0); /* held, no waiters */ XB_SET_OWNER(bp); bp->b_target = target; + bp->b_flags = flags; /* * Set length and io_length to the same value initially. * I/O routines should use io_length, which will be the same in * most cases but may be reset (e.g. XFS recovery). */ - bp->b_length = numblks; - bp->b_io_length = numblks; - bp->b_flags = flags; + error = xfs_buf_get_maps(bp, nmaps); + if (error) { + kmem_zone_free(xfs_buf_zone, bp); + return NULL; + } + + bp->b_bn = map[0].bm_bn; + bp->b_length = 0; + for (i = 0; i < nmaps; i++) { + bp->b_maps[i].bm_bn = map[i].bm_bn; + bp->b_maps[i].bm_len = map[i].bm_len; + bp->b_length += map[i].bm_len; + } + bp->b_io_length = bp->b_length; - /* - * We do not set the block number here in the buffer because we have not - * finished initialising the buffer. We insert the buffer into the cache - * in this state, so this ensures that we are unable to do IO on a - * buffer that hasn't been fully initialised. - */ - bp->b_bn = XFS_BUF_DADDR_NULL; atomic_set(&bp->b_pin_count, 0); init_waitqueue_head(&bp->b_waiters); @@ -287,6 +327,7 @@ xfs_buf_free( } else if (bp->b_flags & _XBF_KMEM) kmem_free(bp->b_addr); _xfs_buf_free_pages(bp); + xfs_buf_free_maps(bp); kmem_zone_free(xfs_buf_zone, bp); } @@ -334,8 +375,9 @@ xfs_buf_allocate_memory( } use_alloc_page: - start = BBTOB(bp->b_bn) >> PAGE_SHIFT; - end = (BBTOB(bp->b_bn + bp->b_length) + PAGE_SIZE - 1) >> PAGE_SHIFT; + start = BBTOB(bp->b_map.bm_bn) >> PAGE_SHIFT; + end = (BBTOB(bp->b_map.bm_bn + bp->b_length) + PAGE_SIZE - 1) + >> PAGE_SHIFT; page_count = end - start; error = _xfs_buf_get_pages(bp, page_count, flags); if (unlikely(error)) @@ -432,8 +474,8 @@ _xfs_buf_map_pages( xfs_buf_t * _xfs_buf_find( struct xfs_buftarg *btp, - xfs_daddr_t blkno, - size_t numblks, + struct xfs_buf_map *map, + int nmaps, xfs_buf_flags_t flags, xfs_buf_t *new_bp) { @@ -442,7 +484,12 @@ _xfs_buf_find( struct rb_node **rbp; struct rb_node *parent; xfs_buf_t *bp; + xfs_daddr_t blkno = map[0].bm_bn; + int numblks = 0; + int i; + for (i = 0; i < nmaps; i++) + numblks += map[i].bm_len; numbytes = BBTOB(numblks); /* Check for IOs smaller than the sector size / not sector aligned */ @@ -534,31 +581,31 @@ found: * more hits than misses. */ struct xfs_buf * -xfs_buf_get( - xfs_buftarg_t *target, - xfs_daddr_t blkno, - size_t numblks, +xfs_buf_get_map( + struct xfs_buftarg *target, + struct xfs_buf_map *map, + int nmaps, xfs_buf_flags_t flags) { struct xfs_buf *bp; struct xfs_buf *new_bp; int error = 0; - bp = _xfs_buf_find(target, blkno, numblks, flags, NULL); + bp = _xfs_buf_find(target, map, nmaps, flags, NULL); if (likely(bp)) goto found; - new_bp = xfs_buf_alloc(target, blkno, numblks, flags); + new_bp = _xfs_buf_alloc(target, map, nmaps, flags); if (unlikely(!new_bp)) return NULL; error = xfs_buf_allocate_memory(new_bp, flags); if (error) { - kmem_zone_free(xfs_buf_zone, new_bp); + xfs_buf_free(new_bp); return NULL; } - bp = _xfs_buf_find(target, blkno, numblks, flags, new_bp); + bp = _xfs_buf_find(target, map, nmaps, flags, new_bp); if (!bp) { xfs_buf_free(new_bp); return NULL; @@ -567,13 +614,6 @@ xfs_buf_get( if (bp != new_bp) xfs_buf_free(new_bp); - /* - * Now we have a workable buffer, fill in the block number so - * that we can do IO on it. - */ - bp->b_bn = blkno; - bp->b_io_length = bp->b_length; - found: if (!bp->b_addr) { error = _xfs_buf_map_pages(bp, flags); @@ -596,7 +636,7 @@ _xfs_buf_read( xfs_buf_flags_t flags) { ASSERT(!(flags & XBF_WRITE)); - ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); + ASSERT(bp->b_map.bm_bn != XFS_BUF_DADDR_NULL); bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD); bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); @@ -608,17 +648,17 @@ _xfs_buf_read( } xfs_buf_t * -xfs_buf_read( - xfs_buftarg_t *target, - xfs_daddr_t blkno, - size_t numblks, +xfs_buf_read_map( + struct xfs_buftarg *target, + struct xfs_buf_map *map, + int nmaps, xfs_buf_flags_t flags) { - xfs_buf_t *bp; + struct xfs_buf *bp; flags |= XBF_READ; - bp = xfs_buf_get(target, blkno, numblks, flags); + bp = xfs_buf_get_map(target, map, nmaps, flags); if (bp) { trace_xfs_buf_read(bp, flags, _RET_IP_); @@ -646,15 +686,15 @@ xfs_buf_read( * safe manner. */ void -xfs_buf_readahead( - xfs_buftarg_t *target, - xfs_daddr_t blkno, - size_t numblks) +xfs_buf_readahead_map( + struct xfs_buftarg *target, + struct xfs_buf_map *map, + int nmaps) { if (bdi_read_congested(target->bt_bdi)) return; - xfs_buf_read(target, blkno, numblks, + xfs_buf_read_map(target, map, nmaps, XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD); } @@ -677,8 +717,10 @@ xfs_buf_read_uncached( return NULL; /* set up the buffer for a read IO */ - XFS_BUF_SET_ADDR(bp, daddr); - XFS_BUF_READ(bp); + ASSERT(bp->b_map_count == 1); + bp->b_bn = daddr; + bp->b_maps[0].bm_bn = daddr; + bp->b_flags |= XBF_READ; xfsbdstrat(target->bt_mount, bp); error = xfs_buf_iowait(bp); @@ -706,7 +748,11 @@ xfs_buf_set_empty( bp->b_addr = NULL; bp->b_length = numblks; bp->b_io_length = numblks; + + ASSERT(bp->b_map_count == 1); bp->b_bn = XFS_BUF_DADDR_NULL; + bp->b_maps[0].bm_bn = XFS_BUF_DADDR_NULL; + bp->b_maps[0].bm_len = bp->b_length; } static inline struct page * @@ -770,9 +816,10 @@ xfs_buf_get_uncached( { unsigned long page_count; int error, i; - xfs_buf_t *bp; + struct xfs_buf *bp; + DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks); - bp = xfs_buf_alloc(target, 0, numblks, 0); + bp = _xfs_buf_alloc(target, &map, 1, 0); if (unlikely(bp == NULL)) goto fail; @@ -803,6 +850,7 @@ xfs_buf_get_uncached( __free_page(bp->b_pages[i]); _xfs_buf_free_pages(bp); fail_free_buf: + xfs_buf_free_maps(bp); kmem_zone_free(xfs_buf_zone, bp); fail: return NULL; @@ -1163,36 +1211,39 @@ xfs_buf_bio_end_io( bio_put(bio); } -STATIC void -_xfs_buf_ioapply( - xfs_buf_t *bp) +static void +xfs_buf_ioapply_map( + struct xfs_buf *bp, + int map, + int *buf_offset, + int *count, + int rw) { - int rw, map_i, total_nr_pages, nr_pages; - struct bio *bio; - int offset = bp->b_offset; - int size = BBTOB(bp->b_io_length); - sector_t sector = bp->b_bn; + int page_index; + int total_nr_pages = bp->b_page_count; + int nr_pages; + struct bio *bio; + sector_t sector = bp->b_maps[map].bm_bn; + int size; + int offset; total_nr_pages = bp->b_page_count; - map_i = 0; - if (bp->b_flags & XBF_WRITE) { - if (bp->b_flags & XBF_SYNCIO) - rw = WRITE_SYNC; - else - rw = WRITE; - if (bp->b_flags & XBF_FUA) - rw |= REQ_FUA; - if (bp->b_flags & XBF_FLUSH) - rw |= REQ_FLUSH; - } else if (bp->b_flags & XBF_READ_AHEAD) { - rw = READA; - } else { - rw = READ; + /* skip the pages in the buffer before the start offset */ + page_index = 0; + offset = *buf_offset; + while (offset >= PAGE_SIZE) { + page_index++; + offset -= PAGE_SIZE; } - /* we only use the buffer cache for meta-data */ - rw |= REQ_META; + /* + * Limit the IO size to the length of the current vector, and update the + * remaining IO count for the next time around. + */ + size = min_t(int, BBTOB(bp->b_maps[map].bm_len), *count); + *count -= size; + *buf_offset += size; next_chunk: atomic_inc(&bp->b_io_remaining); @@ -1207,13 +1258,14 @@ next_chunk: bio->bi_private = bp; - for (; size && nr_pages; nr_pages--, map_i++) { + for (; size && nr_pages; nr_pages--, page_index++) { int rbytes, nbytes = PAGE_SIZE - offset; if (nbytes > size) nbytes = size; - rbytes = bio_add_page(bio, bp->b_pages[map_i], nbytes, offset); + rbytes = bio_add_page(bio, bp->b_pages[page_index], nbytes, + offset); if (rbytes < nbytes) break; @@ -1235,6 +1287,54 @@ next_chunk: xfs_buf_ioerror(bp, EIO); bio_put(bio); } + +} + +STATIC void +_xfs_buf_ioapply( + struct xfs_buf *bp) +{ + struct blk_plug plug; + int rw; + int offset; + int size; + int i; + + if (bp->b_flags & XBF_WRITE) { + if (bp->b_flags & XBF_SYNCIO) + rw = WRITE_SYNC; + else + rw = WRITE; + if (bp->b_flags & XBF_FUA) + rw |= REQ_FUA; + if (bp->b_flags & XBF_FLUSH) + rw |= REQ_FLUSH; + } else if (bp->b_flags & XBF_READ_AHEAD) { + rw = READA; + } else { + rw = READ; + } + + /* we only use the buffer cache for meta-data */ + rw |= REQ_META; + + /* + * Walk all the vectors issuing IO on them. Set up the initial offset + * into the buffer and the desired IO size before we start - + * _xfs_buf_ioapply_vec() will modify them appropriately for each + * subsequent call. + */ + offset = bp->b_offset; + size = BBTOB(bp->b_io_length); + blk_start_plug(&plug); + for (i = 0; i < bp->b_map_count; i++) { + xfs_buf_ioapply_map(bp, i, &offset, &size, rw); + if (bp->b_error) + break; + if (size <= 0) + break; /* all done */ + } + blk_finish_plug(&plug); } void @@ -1255,7 +1355,7 @@ xfs_buf_iorequest( */ atomic_set(&bp->b_io_remaining, 1); _xfs_buf_ioapply(bp); - _xfs_buf_ioend(bp, 0); + _xfs_buf_ioend(bp, 1); xfs_buf_rele(bp); } @@ -1576,7 +1676,7 @@ xfs_buf_cmp( struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list); xfs_daddr_t diff; - diff = ap->b_bn - bp->b_bn; + diff = ap->b_map.bm_bn - bp->b_map.bm_bn; if (diff < 0) return -1; if (diff > 0)