Skip to content
Snippets Groups Projects
Commit 22368ff1 authored by Olga Kornievskaia's avatar Olga Kornievskaia Committed by Anna Schumaker
Browse files

PNFS for stateid errors retry against MDS first


Upon receiving a stateid error such as BAD_STATEID, the client
should retry the operation against the MDS before deciding to
do stateid recovery.

Previously, the code would initiate state recovery and it could
lead to a race in a state manager that could chose an incorrect
recovery method which would lead to the EIO failure for the
application.

Signed-off-by: default avatarOlga Kornievskaia <kolga@netapp.com>
Signed-off-by: default avatarAnna Schumaker <Anna.Schumaker@Netapp.com>
parent a0bc01e0
No related branches found
No related tags found
No related merge requests found
...@@ -126,32 +126,13 @@ static int filelayout_async_handle_error(struct rpc_task *task, ...@@ -126,32 +126,13 @@ static int filelayout_async_handle_error(struct rpc_task *task,
{ {
struct pnfs_layout_hdr *lo = lseg->pls_layout; struct pnfs_layout_hdr *lo = lseg->pls_layout;
struct inode *inode = lo->plh_inode; struct inode *inode = lo->plh_inode;
struct nfs_server *mds_server = NFS_SERVER(inode);
struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
struct nfs_client *mds_client = mds_server->nfs_client;
struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table; struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
if (task->tk_status >= 0) if (task->tk_status >= 0)
return 0; return 0;
switch (task->tk_status) { switch (task->tk_status) {
/* MDS state errors */
case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
case -NFS4ERR_OPENMODE:
if (state == NULL)
break;
if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
goto out_bad_stateid;
goto wait_on_recovery;
case -NFS4ERR_EXPIRED:
if (state != NULL) {
if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
goto out_bad_stateid;
}
nfs4_schedule_lease_recovery(mds_client);
goto wait_on_recovery;
/* DS session errors */ /* DS session errors */
case -NFS4ERR_BADSESSION: case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT: case -NFS4ERR_BADSLOT:
...@@ -212,17 +193,8 @@ static int filelayout_async_handle_error(struct rpc_task *task, ...@@ -212,17 +193,8 @@ static int filelayout_async_handle_error(struct rpc_task *task,
task->tk_status); task->tk_status);
return -NFS4ERR_RESET_TO_MDS; return -NFS4ERR_RESET_TO_MDS;
} }
out:
task->tk_status = 0; task->tk_status = 0;
return -EAGAIN; return -EAGAIN;
out_bad_stateid:
task->tk_status = -EIO;
return 0;
wait_on_recovery:
rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL);
if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0)
rpc_wake_up_queued_task(&mds_client->cl_rpcwaitq, task);
goto out;
} }
/* NFS_PROTO call done callback routines */ /* NFS_PROTO call done callback routines */
......
...@@ -1050,34 +1050,10 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, ...@@ -1050,34 +1050,10 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
{ {
struct pnfs_layout_hdr *lo = lseg->pls_layout; struct pnfs_layout_hdr *lo = lseg->pls_layout;
struct inode *inode = lo->plh_inode; struct inode *inode = lo->plh_inode;
struct nfs_server *mds_server = NFS_SERVER(inode);
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
struct nfs_client *mds_client = mds_server->nfs_client;
struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table; struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
switch (task->tk_status) { switch (task->tk_status) {
/* MDS state errors */
case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
if (state == NULL)
break;
nfs_remove_bad_delegation(state->inode, NULL);
case -NFS4ERR_OPENMODE:
if (state == NULL)
break;
if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
goto out_bad_stateid;
goto wait_on_recovery;
case -NFS4ERR_EXPIRED:
if (state != NULL) {
if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
goto out_bad_stateid;
}
nfs4_schedule_lease_recovery(mds_client);
goto wait_on_recovery;
/* DS session errors */
case -NFS4ERR_BADSESSION: case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT: case -NFS4ERR_BADSLOT:
case -NFS4ERR_BAD_HIGH_SLOT: case -NFS4ERR_BAD_HIGH_SLOT:
...@@ -1137,17 +1113,8 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, ...@@ -1137,17 +1113,8 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
task->tk_status); task->tk_status);
return -NFS4ERR_RESET_TO_MDS; return -NFS4ERR_RESET_TO_MDS;
} }
out:
task->tk_status = 0; task->tk_status = 0;
return -EAGAIN; return -EAGAIN;
out_bad_stateid:
task->tk_status = -EIO;
return 0;
wait_on_recovery:
rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL);
if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0)
rpc_wake_up_queued_task(&mds_client->cl_rpcwaitq, task);
goto out;
} }
/* Retry all errors through either pNFS or MDS except for -EJUKEBOX */ /* Retry all errors through either pNFS or MDS except for -EJUKEBOX */
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment