4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2012, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/ptlrpc/recover.c
38 * Author: Mike Shaver <shaver@clusterfs.com>
41 #define DEBUG_SUBSYSTEM S_RPC
42 # include <linux/libcfs/libcfs.h>
44 #include <obd_support.h>
45 #include <lustre_ha.h>
46 #include <lustre_net.h>
47 #include <lustre_import.h>
48 #include <lustre_export.h>
51 #include <obd_class.h>
52 #include <obd_lov.h> /* for IOC_LOV_SET_OSC_ACTIVE */
53 #include <linux/list.h>
55 #include "ptlrpc_internal.h"
58 * Start recovery on disconnected import.
59 * This is done by just attempting a connect
61 void ptlrpc_initiate_recovery(struct obd_import *imp)
63 CDEBUG(D_HA, "%s: starting recovery\n", obd2cli_tgt(imp->imp_obd));
64 ptlrpc_connect_import(imp);
68 * Identify what request from replay list needs to be replayed next
69 * (based on what we have already replayed) and send it to server.
71 int ptlrpc_replay_next(struct obd_import *imp, int *inflight)
74 struct list_head *tmp, *pos;
75 struct ptlrpc_request *req = NULL;
80 /* It might have committed some after we last spoke, so make sure we
81 * get rid of them now.
83 spin_lock(&imp->imp_lock);
84 imp->imp_last_transno_checked = 0;
85 ptlrpc_free_committed(imp);
86 last_transno = imp->imp_last_replay_transno;
87 spin_unlock(&imp->imp_lock);
89 CDEBUG(D_HA, "import %p from %s committed "LPU64" last "LPU64"\n",
90 imp, obd2cli_tgt(imp->imp_obd),
91 imp->imp_peer_committed_transno, last_transno);
93 /* Do I need to hold a lock across this iteration? We shouldn't be
94 * racing with any additions to the list, because we're in recovery
95 * and are therefore not processing additional requests to add. Calls
96 * to ptlrpc_free_committed might commit requests, but nothing "newer"
97 * than the one we're replaying (it can't be committed until it's
98 * replayed, and we're doing that here). l_f_e_safe protects against
99 * problems with the current request being committed, in the unlikely
100 * event of that race. So, in conclusion, I think that it's safe to
101 * perform this list-walk without the imp_lock held.
103 * But, the {mdc,osc}_replay_open callbacks both iterate
104 * request lists, and have comments saying they assume the
105 * imp_lock is being held by ptlrpc_replay, but it's not. it's
106 * just a little race...
108 list_for_each_safe(tmp, pos, &imp->imp_replay_list) {
109 req = list_entry(tmp, struct ptlrpc_request,
112 /* If need to resend the last sent transno (because a
113 reconnect has occurred), then stop on the matching
114 req and send it again. If, however, the last sent
115 transno has been committed then we continue replay
116 from the next request. */
117 if (req->rq_transno > last_transno) {
118 if (imp->imp_resend_replay)
119 lustre_msg_add_flags(req->rq_reqmsg,
126 spin_lock(&imp->imp_lock);
127 imp->imp_resend_replay = 0;
128 spin_unlock(&imp->imp_lock);
131 rc = ptlrpc_replay_req(req);
133 CERROR("recovery replay error %d for req "
134 LPU64"\n", rc, req->rq_xid);
143 * Schedule resending of request on sending_list. This is done after
144 * we completed replaying of requests and locks.
146 int ptlrpc_resend(struct obd_import *imp)
148 struct ptlrpc_request *req, *next;
150 /* As long as we're in recovery, nothing should be added to the sending
151 * list, so we don't need to hold the lock during this iteration and
154 /* Well... what if lctl recover is called twice at the same time?
156 spin_lock(&imp->imp_lock);
157 if (imp->imp_state != LUSTRE_IMP_RECOVER) {
158 spin_unlock(&imp->imp_lock);
162 list_for_each_entry_safe(req, next, &imp->imp_sending_list,
164 LASSERTF((long)req > PAGE_CACHE_SIZE && req != LP_POISON,
165 "req %p bad\n", req);
166 LASSERTF(req->rq_type != LI_POISON, "req %p freed\n", req);
167 if (!ptlrpc_no_resend(req))
168 ptlrpc_resend_req(req);
170 spin_unlock(&imp->imp_lock);
174 EXPORT_SYMBOL(ptlrpc_resend);
177 * Go through all requests in delayed list and wake their threads
180 void ptlrpc_wake_delayed(struct obd_import *imp)
182 struct list_head *tmp, *pos;
183 struct ptlrpc_request *req;
185 spin_lock(&imp->imp_lock);
186 list_for_each_safe(tmp, pos, &imp->imp_delayed_list) {
187 req = list_entry(tmp, struct ptlrpc_request, rq_list);
189 DEBUG_REQ(D_HA, req, "waking (set %p):", req->rq_set);
190 ptlrpc_client_wake_req(req);
192 spin_unlock(&imp->imp_lock);
194 EXPORT_SYMBOL(ptlrpc_wake_delayed);
196 void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req)
198 struct obd_import *imp = failed_req->rq_import;
200 CDEBUG(D_HA, "import %s of %s@%s abruptly disconnected: reconnecting\n",
201 imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd),
202 imp->imp_connection->c_remote_uuid.uuid);
204 if (ptlrpc_set_import_discon(imp,
205 lustre_msg_get_conn_cnt(failed_req->rq_reqmsg))) {
206 if (!imp->imp_replayable) {
207 CDEBUG(D_HA, "import %s@%s for %s not replayable, "
208 "auto-deactivating\n",
209 obd2cli_tgt(imp->imp_obd),
210 imp->imp_connection->c_remote_uuid.uuid,
211 imp->imp_obd->obd_name);
212 ptlrpc_deactivate_import(imp);
214 /* to control recovery via lctl {disable|enable}_recovery */
215 if (imp->imp_deactive == 0)
216 ptlrpc_connect_import(imp);
219 /* Wait for recovery to complete and resend. If evicted, then
220 this request will be errored out later.*/
221 spin_lock(&failed_req->rq_lock);
222 if (!failed_req->rq_no_resend)
223 failed_req->rq_resend = 1;
224 spin_unlock(&failed_req->rq_lock);
228 * Administratively active/deactive a client.
229 * This should only be called by the ioctl interface, currently
230 * - the lctl deactivate and activate commands
231 * - echo 0/1 >> /proc/osc/XXX/active
232 * - client umount -f (ll_umount_begin)
234 int ptlrpc_set_import_active(struct obd_import *imp, int active)
236 struct obd_device *obd = imp->imp_obd;
241 /* When deactivating, mark import invalid, and abort in-flight
244 LCONSOLE_WARN("setting import %s INACTIVE by administrator "
245 "request\n", obd2cli_tgt(imp->imp_obd));
247 /* set before invalidate to avoid messages about imp_inval
248 * set without imp_deactive in ptlrpc_import_delay_req */
249 spin_lock(&imp->imp_lock);
250 imp->imp_deactive = 1;
251 spin_unlock(&imp->imp_lock);
253 obd_import_event(imp->imp_obd, imp, IMP_EVENT_DEACTIVATE);
255 ptlrpc_invalidate_import(imp);
258 /* When activating, mark import valid, and attempt recovery */
260 CDEBUG(D_HA, "setting import %s VALID\n",
261 obd2cli_tgt(imp->imp_obd));
263 spin_lock(&imp->imp_lock);
264 imp->imp_deactive = 0;
265 spin_unlock(&imp->imp_lock);
266 obd_import_event(imp->imp_obd, imp, IMP_EVENT_ACTIVATE);
268 rc = ptlrpc_recover_import(imp, NULL, 0);
273 EXPORT_SYMBOL(ptlrpc_set_import_active);
275 /* Attempt to reconnect an import */
276 int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid, int async)
280 spin_lock(&imp->imp_lock);
281 if (imp->imp_state == LUSTRE_IMP_NEW || imp->imp_deactive ||
282 atomic_read(&imp->imp_inval_count))
284 spin_unlock(&imp->imp_lock);
288 /* force import to be disconnected. */
289 ptlrpc_set_import_discon(imp, 0);
292 struct obd_uuid uuid;
294 /* intruct import to use new uuid */
295 obd_str2uuid(&uuid, new_uuid);
296 rc = import_set_conn_priority(imp, &uuid);
301 /* Check if reconnect is already in progress */
302 spin_lock(&imp->imp_lock);
303 if (imp->imp_state != LUSTRE_IMP_DISCON) {
304 imp->imp_force_verify = 1;
307 spin_unlock(&imp->imp_lock);
311 rc = ptlrpc_connect_import(imp);
316 struct l_wait_info lwi;
317 int secs = cfs_time_seconds(obd_timeout);
319 CDEBUG(D_HA, "%s: recovery started, waiting %u seconds\n",
320 obd2cli_tgt(imp->imp_obd), secs);
322 lwi = LWI_TIMEOUT(secs, NULL, NULL);
323 rc = l_wait_event(imp->imp_recovery_waitq,
324 !ptlrpc_import_in_recovery(imp), &lwi);
325 CDEBUG(D_HA, "%s: recovery finished\n",
326 obd2cli_tgt(imp->imp_obd));
332 EXPORT_SYMBOL(ptlrpc_recover_import);
334 int ptlrpc_import_in_recovery(struct obd_import *imp)
337 spin_lock(&imp->imp_lock);
338 if (imp->imp_state == LUSTRE_IMP_FULL ||
339 imp->imp_state == LUSTRE_IMP_CLOSED ||
340 imp->imp_state == LUSTRE_IMP_DISCON)
342 spin_unlock(&imp->imp_lock);