]> Pileus Git - ~andy/linux/blob - drivers/staging/lustre/lustre/ptlrpc/recover.c
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml
[~andy/linux] / drivers / staging / lustre / lustre / ptlrpc / recover.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2011, 2012, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/ptlrpc/recover.c
37  *
38  * Author: Mike Shaver <shaver@clusterfs.com>
39  */
40
41 #define DEBUG_SUBSYSTEM S_RPC
42 # include <linux/libcfs/libcfs.h>
43
44 #include <obd_support.h>
45 #include <lustre_ha.h>
46 #include <lustre_net.h>
47 #include <lustre_import.h>
48 #include <lustre_export.h>
49 #include <obd.h>
50 #include <obd_ost.h>
51 #include <obd_class.h>
52 #include <obd_lov.h> /* for IOC_LOV_SET_OSC_ACTIVE */
53 #include <linux/list.h>
54
55 #include "ptlrpc_internal.h"
56
57 /**
58  * Start recovery on disconnected import.
59  * This is done by just attempting a connect
60  */
61 void ptlrpc_initiate_recovery(struct obd_import *imp)
62 {
63         CDEBUG(D_HA, "%s: starting recovery\n", obd2cli_tgt(imp->imp_obd));
64         ptlrpc_connect_import(imp);
65 }
66
67 /**
68  * Identify what request from replay list needs to be replayed next
69  * (based on what we have already replayed) and send it to server.
70  */
71 int ptlrpc_replay_next(struct obd_import *imp, int *inflight)
72 {
73         int rc = 0;
74         struct list_head *tmp, *pos;
75         struct ptlrpc_request *req = NULL;
76         __u64 last_transno;
77
78         *inflight = 0;
79
80         /* It might have committed some after we last spoke, so make sure we
81          * get rid of them now.
82          */
83         spin_lock(&imp->imp_lock);
84         imp->imp_last_transno_checked = 0;
85         ptlrpc_free_committed(imp);
86         last_transno = imp->imp_last_replay_transno;
87         spin_unlock(&imp->imp_lock);
88
89         CDEBUG(D_HA, "import %p from %s committed "LPU64" last "LPU64"\n",
90                imp, obd2cli_tgt(imp->imp_obd),
91                imp->imp_peer_committed_transno, last_transno);
92
93         /* Do I need to hold a lock across this iteration?  We shouldn't be
94          * racing with any additions to the list, because we're in recovery
95          * and are therefore not processing additional requests to add.  Calls
96          * to ptlrpc_free_committed might commit requests, but nothing "newer"
97          * than the one we're replaying (it can't be committed until it's
98          * replayed, and we're doing that here).  l_f_e_safe protects against
99          * problems with the current request being committed, in the unlikely
100          * event of that race.  So, in conclusion, I think that it's safe to
101          * perform this list-walk without the imp_lock held.
102          *
103          * But, the {mdc,osc}_replay_open callbacks both iterate
104          * request lists, and have comments saying they assume the
105          * imp_lock is being held by ptlrpc_replay, but it's not. it's
106          * just a little race...
107          */
108         list_for_each_safe(tmp, pos, &imp->imp_replay_list) {
109                 req = list_entry(tmp, struct ptlrpc_request,
110                                      rq_replay_list);
111
112                 /* If need to resend the last sent transno (because a
113                    reconnect has occurred), then stop on the matching
114                    req and send it again. If, however, the last sent
115                    transno has been committed then we continue replay
116                    from the next request. */
117                 if (req->rq_transno > last_transno) {
118                         if (imp->imp_resend_replay)
119                                 lustre_msg_add_flags(req->rq_reqmsg,
120                                                      MSG_RESENT);
121                         break;
122                 }
123                 req = NULL;
124         }
125
126         spin_lock(&imp->imp_lock);
127         imp->imp_resend_replay = 0;
128         spin_unlock(&imp->imp_lock);
129
130         if (req != NULL) {
131                 rc = ptlrpc_replay_req(req);
132                 if (rc) {
133                         CERROR("recovery replay error %d for req "
134                                LPU64"\n", rc, req->rq_xid);
135                         return rc;
136                 }
137                 *inflight = 1;
138         }
139         return rc;
140 }
141
142 /**
143  * Schedule resending of request on sending_list. This is done after
144  * we completed replaying of requests and locks.
145  */
146 int ptlrpc_resend(struct obd_import *imp)
147 {
148         struct ptlrpc_request *req, *next;
149
150         /* As long as we're in recovery, nothing should be added to the sending
151          * list, so we don't need to hold the lock during this iteration and
152          * resend process.
153          */
154         /* Well... what if lctl recover is called twice at the same time?
155          */
156         spin_lock(&imp->imp_lock);
157         if (imp->imp_state != LUSTRE_IMP_RECOVER) {
158                 spin_unlock(&imp->imp_lock);
159                 return -1;
160         }
161
162         list_for_each_entry_safe(req, next, &imp->imp_sending_list,
163                                      rq_list) {
164                 LASSERTF((long)req > PAGE_CACHE_SIZE && req != LP_POISON,
165                          "req %p bad\n", req);
166                 LASSERTF(req->rq_type != LI_POISON, "req %p freed\n", req);
167                 if (!ptlrpc_no_resend(req))
168                         ptlrpc_resend_req(req);
169         }
170         spin_unlock(&imp->imp_lock);
171
172         return 0;
173 }
174 EXPORT_SYMBOL(ptlrpc_resend);
175
176 /**
177  * Go through all requests in delayed list and wake their threads
178  * for resending
179  */
180 void ptlrpc_wake_delayed(struct obd_import *imp)
181 {
182         struct list_head *tmp, *pos;
183         struct ptlrpc_request *req;
184
185         spin_lock(&imp->imp_lock);
186         list_for_each_safe(tmp, pos, &imp->imp_delayed_list) {
187                 req = list_entry(tmp, struct ptlrpc_request, rq_list);
188
189                 DEBUG_REQ(D_HA, req, "waking (set %p):", req->rq_set);
190                 ptlrpc_client_wake_req(req);
191         }
192         spin_unlock(&imp->imp_lock);
193 }
194 EXPORT_SYMBOL(ptlrpc_wake_delayed);
195
196 void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req)
197 {
198         struct obd_import *imp = failed_req->rq_import;
199
200         CDEBUG(D_HA, "import %s of %s@%s abruptly disconnected: reconnecting\n",
201                imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd),
202                imp->imp_connection->c_remote_uuid.uuid);
203
204         if (ptlrpc_set_import_discon(imp,
205                               lustre_msg_get_conn_cnt(failed_req->rq_reqmsg))) {
206                 if (!imp->imp_replayable) {
207                         CDEBUG(D_HA, "import %s@%s for %s not replayable, "
208                                "auto-deactivating\n",
209                                obd2cli_tgt(imp->imp_obd),
210                                imp->imp_connection->c_remote_uuid.uuid,
211                                imp->imp_obd->obd_name);
212                         ptlrpc_deactivate_import(imp);
213                 }
214                 /* to control recovery via lctl {disable|enable}_recovery */
215                 if (imp->imp_deactive == 0)
216                         ptlrpc_connect_import(imp);
217         }
218
219         /* Wait for recovery to complete and resend. If evicted, then
220            this request will be errored out later.*/
221         spin_lock(&failed_req->rq_lock);
222         if (!failed_req->rq_no_resend)
223                 failed_req->rq_resend = 1;
224         spin_unlock(&failed_req->rq_lock);
225 }
226
227 /**
228  * Administratively active/deactive a client.
229  * This should only be called by the ioctl interface, currently
230  *  - the lctl deactivate and activate commands
231  *  - echo 0/1 >> /proc/osc/XXX/active
232  *  - client umount -f (ll_umount_begin)
233  */
234 int ptlrpc_set_import_active(struct obd_import *imp, int active)
235 {
236         struct obd_device *obd = imp->imp_obd;
237         int rc = 0;
238
239         LASSERT(obd);
240
241         /* When deactivating, mark import invalid, and abort in-flight
242          * requests. */
243         if (!active) {
244                 LCONSOLE_WARN("setting import %s INACTIVE by administrator "
245                               "request\n", obd2cli_tgt(imp->imp_obd));
246
247                 /* set before invalidate to avoid messages about imp_inval
248                  * set without imp_deactive in ptlrpc_import_delay_req */
249                 spin_lock(&imp->imp_lock);
250                 imp->imp_deactive = 1;
251                 spin_unlock(&imp->imp_lock);
252
253                 obd_import_event(imp->imp_obd, imp, IMP_EVENT_DEACTIVATE);
254
255                 ptlrpc_invalidate_import(imp);
256         }
257
258         /* When activating, mark import valid, and attempt recovery */
259         if (active) {
260                 CDEBUG(D_HA, "setting import %s VALID\n",
261                        obd2cli_tgt(imp->imp_obd));
262
263                 spin_lock(&imp->imp_lock);
264                 imp->imp_deactive = 0;
265                 spin_unlock(&imp->imp_lock);
266                 obd_import_event(imp->imp_obd, imp, IMP_EVENT_ACTIVATE);
267
268                 rc = ptlrpc_recover_import(imp, NULL, 0);
269         }
270
271         return rc;
272 }
273 EXPORT_SYMBOL(ptlrpc_set_import_active);
274
275 /* Attempt to reconnect an import */
276 int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid, int async)
277 {
278         int rc = 0;
279
280         spin_lock(&imp->imp_lock);
281         if (imp->imp_state == LUSTRE_IMP_NEW || imp->imp_deactive ||
282             atomic_read(&imp->imp_inval_count))
283                 rc = -EINVAL;
284         spin_unlock(&imp->imp_lock);
285         if (rc)
286                 GOTO(out, rc);
287
288         /* force import to be disconnected. */
289         ptlrpc_set_import_discon(imp, 0);
290
291         if (new_uuid) {
292                 struct obd_uuid uuid;
293
294                 /* intruct import to use new uuid */
295                 obd_str2uuid(&uuid, new_uuid);
296                 rc = import_set_conn_priority(imp, &uuid);
297                 if (rc)
298                         GOTO(out, rc);
299         }
300
301         /* Check if reconnect is already in progress */
302         spin_lock(&imp->imp_lock);
303         if (imp->imp_state != LUSTRE_IMP_DISCON) {
304                 imp->imp_force_verify = 1;
305                 rc = -EALREADY;
306         }
307         spin_unlock(&imp->imp_lock);
308         if (rc)
309                 GOTO(out, rc);
310
311         rc = ptlrpc_connect_import(imp);
312         if (rc)
313                 GOTO(out, rc);
314
315         if (!async) {
316                 struct l_wait_info lwi;
317                 int secs = cfs_time_seconds(obd_timeout);
318
319                 CDEBUG(D_HA, "%s: recovery started, waiting %u seconds\n",
320                        obd2cli_tgt(imp->imp_obd), secs);
321
322                 lwi = LWI_TIMEOUT(secs, NULL, NULL);
323                 rc = l_wait_event(imp->imp_recovery_waitq,
324                                   !ptlrpc_import_in_recovery(imp), &lwi);
325                 CDEBUG(D_HA, "%s: recovery finished\n",
326                        obd2cli_tgt(imp->imp_obd));
327         }
328
329 out:
330         return rc;
331 }
332 EXPORT_SYMBOL(ptlrpc_recover_import);
333
334 int ptlrpc_import_in_recovery(struct obd_import *imp)
335 {
336         int in_recovery = 1;
337         spin_lock(&imp->imp_lock);
338         if (imp->imp_state == LUSTRE_IMP_FULL ||
339             imp->imp_state == LUSTRE_IMP_CLOSED ||
340             imp->imp_state == LUSTRE_IMP_DISCON)
341                 in_recovery = 0;
342         spin_unlock(&imp->imp_lock);
343         return in_recovery;
344 }