]> Pileus Git - ~andy/linux/blob - fs/xfs/xfs_extent_busy.c
Merge branch 'i2c/for-current' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa...
[~andy/linux] / fs / xfs / xfs_extent_busy.c
1 /*
2  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3  * Copyright (c) 2010 David Chinner.
4  * Copyright (c) 2011 Christoph Hellwig.
5  * All Rights Reserved.
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation.
10  *
11  * This program is distributed in the hope that it would be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write the Free Software Foundation,
18  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  */
20 #include "xfs.h"
21 #include "xfs_fs.h"
22 #include "xfs_format.h"
23 #include "xfs_log_format.h"
24 #include "xfs_shared.h"
25 #include "xfs_trans_resv.h"
26 #include "xfs_sb.h"
27 #include "xfs_ag.h"
28 #include "xfs_mount.h"
29 #include "xfs_alloc.h"
30 #include "xfs_extent_busy.h"
31 #include "xfs_trace.h"
32 #include "xfs_trans.h"
33 #include "xfs_log.h"
34
35 void
36 xfs_extent_busy_insert(
37         struct xfs_trans        *tp,
38         xfs_agnumber_t          agno,
39         xfs_agblock_t           bno,
40         xfs_extlen_t            len,
41         unsigned int            flags)
42 {
43         struct xfs_extent_busy  *new;
44         struct xfs_extent_busy  *busyp;
45         struct xfs_perag        *pag;
46         struct rb_node          **rbp;
47         struct rb_node          *parent = NULL;
48
49         new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_MAYFAIL);
50         if (!new) {
51                 /*
52                  * No Memory!  Since it is now not possible to track the free
53                  * block, make this a synchronous transaction to insure that
54                  * the block is not reused before this transaction commits.
55                  */
56                 trace_xfs_extent_busy_enomem(tp->t_mountp, agno, bno, len);
57                 xfs_trans_set_sync(tp);
58                 return;
59         }
60
61         new->agno = agno;
62         new->bno = bno;
63         new->length = len;
64         INIT_LIST_HEAD(&new->list);
65         new->flags = flags;
66
67         /* trace before insert to be able to see failed inserts */
68         trace_xfs_extent_busy(tp->t_mountp, agno, bno, len);
69
70         pag = xfs_perag_get(tp->t_mountp, new->agno);
71         spin_lock(&pag->pagb_lock);
72         rbp = &pag->pagb_tree.rb_node;
73         while (*rbp) {
74                 parent = *rbp;
75                 busyp = rb_entry(parent, struct xfs_extent_busy, rb_node);
76
77                 if (new->bno < busyp->bno) {
78                         rbp = &(*rbp)->rb_left;
79                         ASSERT(new->bno + new->length <= busyp->bno);
80                 } else if (new->bno > busyp->bno) {
81                         rbp = &(*rbp)->rb_right;
82                         ASSERT(bno >= busyp->bno + busyp->length);
83                 } else {
84                         ASSERT(0);
85                 }
86         }
87
88         rb_link_node(&new->rb_node, parent, rbp);
89         rb_insert_color(&new->rb_node, &pag->pagb_tree);
90
91         list_add(&new->list, &tp->t_busy);
92         spin_unlock(&pag->pagb_lock);
93         xfs_perag_put(pag);
94 }
95
96 /*
97  * Search for a busy extent within the range of the extent we are about to
98  * allocate.  You need to be holding the busy extent tree lock when calling
99  * xfs_extent_busy_search(). This function returns 0 for no overlapping busy
100  * extent, -1 for an overlapping but not exact busy extent, and 1 for an exact
101  * match. This is done so that a non-zero return indicates an overlap that
102  * will require a synchronous transaction, but it can still be
103  * used to distinguish between a partial or exact match.
104  */
105 int
106 xfs_extent_busy_search(
107         struct xfs_mount        *mp,
108         xfs_agnumber_t          agno,
109         xfs_agblock_t           bno,
110         xfs_extlen_t            len)
111 {
112         struct xfs_perag        *pag;
113         struct rb_node          *rbp;
114         struct xfs_extent_busy  *busyp;
115         int                     match = 0;
116
117         pag = xfs_perag_get(mp, agno);
118         spin_lock(&pag->pagb_lock);
119
120         rbp = pag->pagb_tree.rb_node;
121
122         /* find closest start bno overlap */
123         while (rbp) {
124                 busyp = rb_entry(rbp, struct xfs_extent_busy, rb_node);
125                 if (bno < busyp->bno) {
126                         /* may overlap, but exact start block is lower */
127                         if (bno + len > busyp->bno)
128                                 match = -1;
129                         rbp = rbp->rb_left;
130                 } else if (bno > busyp->bno) {
131                         /* may overlap, but exact start block is higher */
132                         if (bno < busyp->bno + busyp->length)
133                                 match = -1;
134                         rbp = rbp->rb_right;
135                 } else {
136                         /* bno matches busyp, length determines exact match */
137                         match = (busyp->length == len) ? 1 : -1;
138                         break;
139                 }
140         }
141         spin_unlock(&pag->pagb_lock);
142         xfs_perag_put(pag);
143         return match;
144 }
145
146 /*
147  * The found free extent [fbno, fend] overlaps part or all of the given busy
148  * extent.  If the overlap covers the beginning, the end, or all of the busy
149  * extent, the overlapping portion can be made unbusy and used for the
150  * allocation.  We can't split a busy extent because we can't modify a
151  * transaction/CIL context busy list, but we can update an entry's block
152  * number or length.
153  *
154  * Returns true if the extent can safely be reused, or false if the search
155  * needs to be restarted.
156  */
157 STATIC bool
158 xfs_extent_busy_update_extent(
159         struct xfs_mount        *mp,
160         struct xfs_perag        *pag,
161         struct xfs_extent_busy  *busyp,
162         xfs_agblock_t           fbno,
163         xfs_extlen_t            flen,
164         bool                    userdata) __releases(&pag->pagb_lock)
165                                           __acquires(&pag->pagb_lock)
166 {
167         xfs_agblock_t           fend = fbno + flen;
168         xfs_agblock_t           bbno = busyp->bno;
169         xfs_agblock_t           bend = bbno + busyp->length;
170
171         /*
172          * This extent is currently being discarded.  Give the thread
173          * performing the discard a chance to mark the extent unbusy
174          * and retry.
175          */
176         if (busyp->flags & XFS_EXTENT_BUSY_DISCARDED) {
177                 spin_unlock(&pag->pagb_lock);
178                 delay(1);
179                 spin_lock(&pag->pagb_lock);
180                 return false;
181         }
182
183         /*
184          * If there is a busy extent overlapping a user allocation, we have
185          * no choice but to force the log and retry the search.
186          *
187          * Fortunately this does not happen during normal operation, but
188          * only if the filesystem is very low on space and has to dip into
189          * the AGFL for normal allocations.
190          */
191         if (userdata)
192                 goto out_force_log;
193
194         if (bbno < fbno && bend > fend) {
195                 /*
196                  * Case 1:
197                  *    bbno           bend
198                  *    +BBBBBBBBBBBBBBBBB+
199                  *        +---------+
200                  *        fbno   fend
201                  */
202
203                 /*
204                  * We would have to split the busy extent to be able to track
205                  * it correct, which we cannot do because we would have to
206                  * modify the list of busy extents attached to the transaction
207                  * or CIL context, which is immutable.
208                  *
209                  * Force out the log to clear the busy extent and retry the
210                  * search.
211                  */
212                 goto out_force_log;
213         } else if (bbno >= fbno && bend <= fend) {
214                 /*
215                  * Case 2:
216                  *    bbno           bend
217                  *    +BBBBBBBBBBBBBBBBB+
218                  *    +-----------------+
219                  *    fbno           fend
220                  *
221                  * Case 3:
222                  *    bbno           bend
223                  *    +BBBBBBBBBBBBBBBBB+
224                  *    +--------------------------+
225                  *    fbno                    fend
226                  *
227                  * Case 4:
228                  *             bbno           bend
229                  *             +BBBBBBBBBBBBBBBBB+
230                  *    +--------------------------+
231                  *    fbno                    fend
232                  *
233                  * Case 5:
234                  *             bbno           bend
235                  *             +BBBBBBBBBBBBBBBBB+
236                  *    +-----------------------------------+
237                  *    fbno                             fend
238                  *
239                  */
240
241                 /*
242                  * The busy extent is fully covered by the extent we are
243                  * allocating, and can simply be removed from the rbtree.
244                  * However we cannot remove it from the immutable list
245                  * tracking busy extents in the transaction or CIL context,
246                  * so set the length to zero to mark it invalid.
247                  *
248                  * We also need to restart the busy extent search from the
249                  * tree root, because erasing the node can rearrange the
250                  * tree topology.
251                  */
252                 rb_erase(&busyp->rb_node, &pag->pagb_tree);
253                 busyp->length = 0;
254                 return false;
255         } else if (fend < bend) {
256                 /*
257                  * Case 6:
258                  *              bbno           bend
259                  *             +BBBBBBBBBBBBBBBBB+
260                  *             +---------+
261                  *             fbno   fend
262                  *
263                  * Case 7:
264                  *             bbno           bend
265                  *             +BBBBBBBBBBBBBBBBB+
266                  *    +------------------+
267                  *    fbno            fend
268                  *
269                  */
270                 busyp->bno = fend;
271         } else if (bbno < fbno) {
272                 /*
273                  * Case 8:
274                  *    bbno           bend
275                  *    +BBBBBBBBBBBBBBBBB+
276                  *        +-------------+
277                  *        fbno       fend
278                  *
279                  * Case 9:
280                  *    bbno           bend
281                  *    +BBBBBBBBBBBBBBBBB+
282                  *        +----------------------+
283                  *        fbno                fend
284                  */
285                 busyp->length = fbno - busyp->bno;
286         } else {
287                 ASSERT(0);
288         }
289
290         trace_xfs_extent_busy_reuse(mp, pag->pag_agno, fbno, flen);
291         return true;
292
293 out_force_log:
294         spin_unlock(&pag->pagb_lock);
295         xfs_log_force(mp, XFS_LOG_SYNC);
296         trace_xfs_extent_busy_force(mp, pag->pag_agno, fbno, flen);
297         spin_lock(&pag->pagb_lock);
298         return false;
299 }
300
301
302 /*
303  * For a given extent [fbno, flen], make sure we can reuse it safely.
304  */
305 void
306 xfs_extent_busy_reuse(
307         struct xfs_mount        *mp,
308         xfs_agnumber_t          agno,
309         xfs_agblock_t           fbno,
310         xfs_extlen_t            flen,
311         bool                    userdata)
312 {
313         struct xfs_perag        *pag;
314         struct rb_node          *rbp;
315
316         ASSERT(flen > 0);
317
318         pag = xfs_perag_get(mp, agno);
319         spin_lock(&pag->pagb_lock);
320 restart:
321         rbp = pag->pagb_tree.rb_node;
322         while (rbp) {
323                 struct xfs_extent_busy *busyp =
324                         rb_entry(rbp, struct xfs_extent_busy, rb_node);
325                 xfs_agblock_t   bbno = busyp->bno;
326                 xfs_agblock_t   bend = bbno + busyp->length;
327
328                 if (fbno + flen <= bbno) {
329                         rbp = rbp->rb_left;
330                         continue;
331                 } else if (fbno >= bend) {
332                         rbp = rbp->rb_right;
333                         continue;
334                 }
335
336                 if (!xfs_extent_busy_update_extent(mp, pag, busyp, fbno, flen,
337                                                   userdata))
338                         goto restart;
339         }
340         spin_unlock(&pag->pagb_lock);
341         xfs_perag_put(pag);
342 }
343
344 /*
345  * For a given extent [fbno, flen], search the busy extent list to find a
346  * subset of the extent that is not busy.  If *rlen is smaller than
347  * args->minlen no suitable extent could be found, and the higher level
348  * code needs to force out the log and retry the allocation.
349  */
350 void
351 xfs_extent_busy_trim(
352         struct xfs_alloc_arg    *args,
353         xfs_agblock_t           bno,
354         xfs_extlen_t            len,
355         xfs_agblock_t           *rbno,
356         xfs_extlen_t            *rlen)
357 {
358         xfs_agblock_t           fbno;
359         xfs_extlen_t            flen;
360         struct rb_node          *rbp;
361
362         ASSERT(len > 0);
363
364         spin_lock(&args->pag->pagb_lock);
365 restart:
366         fbno = bno;
367         flen = len;
368         rbp = args->pag->pagb_tree.rb_node;
369         while (rbp && flen >= args->minlen) {
370                 struct xfs_extent_busy *busyp =
371                         rb_entry(rbp, struct xfs_extent_busy, rb_node);
372                 xfs_agblock_t   fend = fbno + flen;
373                 xfs_agblock_t   bbno = busyp->bno;
374                 xfs_agblock_t   bend = bbno + busyp->length;
375
376                 if (fend <= bbno) {
377                         rbp = rbp->rb_left;
378                         continue;
379                 } else if (fbno >= bend) {
380                         rbp = rbp->rb_right;
381                         continue;
382                 }
383
384                 /*
385                  * If this is a metadata allocation, try to reuse the busy
386                  * extent instead of trimming the allocation.
387                  */
388                 if (!args->userdata &&
389                     !(busyp->flags & XFS_EXTENT_BUSY_DISCARDED)) {
390                         if (!xfs_extent_busy_update_extent(args->mp, args->pag,
391                                                           busyp, fbno, flen,
392                                                           false))
393                                 goto restart;
394                         continue;
395                 }
396
397                 if (bbno <= fbno) {
398                         /* start overlap */
399
400                         /*
401                          * Case 1:
402                          *    bbno           bend
403                          *    +BBBBBBBBBBBBBBBBB+
404                          *        +---------+
405                          *        fbno   fend
406                          *
407                          * Case 2:
408                          *    bbno           bend
409                          *    +BBBBBBBBBBBBBBBBB+
410                          *    +-------------+
411                          *    fbno       fend
412                          *
413                          * Case 3:
414                          *    bbno           bend
415                          *    +BBBBBBBBBBBBBBBBB+
416                          *        +-------------+
417                          *        fbno       fend
418                          *
419                          * Case 4:
420                          *    bbno           bend
421                          *    +BBBBBBBBBBBBBBBBB+
422                          *    +-----------------+
423                          *    fbno           fend
424                          *
425                          * No unbusy region in extent, return failure.
426                          */
427                         if (fend <= bend)
428                                 goto fail;
429
430                         /*
431                          * Case 5:
432                          *    bbno           bend
433                          *    +BBBBBBBBBBBBBBBBB+
434                          *        +----------------------+
435                          *        fbno                fend
436                          *
437                          * Case 6:
438                          *    bbno           bend
439                          *    +BBBBBBBBBBBBBBBBB+
440                          *    +--------------------------+
441                          *    fbno                    fend
442                          *
443                          * Needs to be trimmed to:
444                          *                       +-------+
445                          *                       fbno fend
446                          */
447                         fbno = bend;
448                 } else if (bend >= fend) {
449                         /* end overlap */
450
451                         /*
452                          * Case 7:
453                          *             bbno           bend
454                          *             +BBBBBBBBBBBBBBBBB+
455                          *    +------------------+
456                          *    fbno            fend
457                          *
458                          * Case 8:
459                          *             bbno           bend
460                          *             +BBBBBBBBBBBBBBBBB+
461                          *    +--------------------------+
462                          *    fbno                    fend
463                          *
464                          * Needs to be trimmed to:
465                          *    +-------+
466                          *    fbno fend
467                          */
468                         fend = bbno;
469                 } else {
470                         /* middle overlap */
471
472                         /*
473                          * Case 9:
474                          *             bbno           bend
475                          *             +BBBBBBBBBBBBBBBBB+
476                          *    +-----------------------------------+
477                          *    fbno                             fend
478                          *
479                          * Can be trimmed to:
480                          *    +-------+        OR         +-------+
481                          *    fbno fend                   fbno fend
482                          *
483                          * Backward allocation leads to significant
484                          * fragmentation of directories, which degrades
485                          * directory performance, therefore we always want to
486                          * choose the option that produces forward allocation
487                          * patterns.
488                          * Preferring the lower bno extent will make the next
489                          * request use "fend" as the start of the next
490                          * allocation;  if the segment is no longer busy at
491                          * that point, we'll get a contiguous allocation, but
492                          * even if it is still busy, we will get a forward
493                          * allocation.
494                          * We try to avoid choosing the segment at "bend",
495                          * because that can lead to the next allocation
496                          * taking the segment at "fbno", which would be a
497                          * backward allocation.  We only use the segment at
498                          * "fbno" if it is much larger than the current
499                          * requested size, because in that case there's a
500                          * good chance subsequent allocations will be
501                          * contiguous.
502                          */
503                         if (bbno - fbno >= args->maxlen) {
504                                 /* left candidate fits perfect */
505                                 fend = bbno;
506                         } else if (fend - bend >= args->maxlen * 4) {
507                                 /* right candidate has enough free space */
508                                 fbno = bend;
509                         } else if (bbno - fbno >= args->minlen) {
510                                 /* left candidate fits minimum requirement */
511                                 fend = bbno;
512                         } else {
513                                 goto fail;
514                         }
515                 }
516
517                 flen = fend - fbno;
518         }
519         spin_unlock(&args->pag->pagb_lock);
520
521         if (fbno != bno || flen != len) {
522                 trace_xfs_extent_busy_trim(args->mp, args->agno, bno, len,
523                                           fbno, flen);
524         }
525         *rbno = fbno;
526         *rlen = flen;
527         return;
528 fail:
529         /*
530          * Return a zero extent length as failure indications.  All callers
531          * re-check if the trimmed extent satisfies the minlen requirement.
532          */
533         spin_unlock(&args->pag->pagb_lock);
534         trace_xfs_extent_busy_trim(args->mp, args->agno, bno, len, fbno, 0);
535         *rbno = fbno;
536         *rlen = 0;
537 }
538
539 STATIC void
540 xfs_extent_busy_clear_one(
541         struct xfs_mount        *mp,
542         struct xfs_perag        *pag,
543         struct xfs_extent_busy  *busyp)
544 {
545         if (busyp->length) {
546                 trace_xfs_extent_busy_clear(mp, busyp->agno, busyp->bno,
547                                                 busyp->length);
548                 rb_erase(&busyp->rb_node, &pag->pagb_tree);
549         }
550
551         list_del_init(&busyp->list);
552         kmem_free(busyp);
553 }
554
555 /*
556  * Remove all extents on the passed in list from the busy extents tree.
557  * If do_discard is set skip extents that need to be discarded, and mark
558  * these as undergoing a discard operation instead.
559  */
560 void
561 xfs_extent_busy_clear(
562         struct xfs_mount        *mp,
563         struct list_head        *list,
564         bool                    do_discard)
565 {
566         struct xfs_extent_busy  *busyp, *n;
567         struct xfs_perag        *pag = NULL;
568         xfs_agnumber_t          agno = NULLAGNUMBER;
569
570         list_for_each_entry_safe(busyp, n, list, list) {
571                 if (busyp->agno != agno) {
572                         if (pag) {
573                                 spin_unlock(&pag->pagb_lock);
574                                 xfs_perag_put(pag);
575                         }
576                         pag = xfs_perag_get(mp, busyp->agno);
577                         spin_lock(&pag->pagb_lock);
578                         agno = busyp->agno;
579                 }
580
581                 if (do_discard && busyp->length &&
582                     !(busyp->flags & XFS_EXTENT_BUSY_SKIP_DISCARD))
583                         busyp->flags = XFS_EXTENT_BUSY_DISCARDED;
584                 else
585                         xfs_extent_busy_clear_one(mp, pag, busyp);
586         }
587
588         if (pag) {
589                 spin_unlock(&pag->pagb_lock);
590                 xfs_perag_put(pag);
591         }
592 }
593
594 /*
595  * Callback for list_sort to sort busy extents by the AG they reside in.
596  */
597 int
598 xfs_extent_busy_ag_cmp(
599         void                    *priv,
600         struct list_head        *a,
601         struct list_head        *b)
602 {
603         return container_of(a, struct xfs_extent_busy, list)->agno -
604                 container_of(b, struct xfs_extent_busy, list)->agno;
605 }