Skip to content

Commit

Permalink
scsi: qla2xxx: Fix hang on NVMe command timeouts
Browse files Browse the repository at this point in the history
The abort callback gets called only when it gets posted to firmware. The
refcounting is done properly in the callback. On internal errors, the
callback is not invoked leading to a hung I/O. Fix this by having separate
error code when command gets returned from firmware.

Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Arun Easi <[email protected]>
Signed-off-by: Nilesh Javali <[email protected]>
Signed-off-by: Martin K. Petersen <[email protected]>
  • Loading branch information
Arun Easi authored and martinkpetersen committed Aug 24, 2021
1 parent f6e327f commit 2cabf10
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 14 deletions.
3 changes: 3 additions & 0 deletions drivers/scsi/qla2xxx/qla_def.h
Original file line number Diff line number Diff line change
Expand Up @@ -5166,6 +5166,9 @@ struct secure_flash_update_block_pk {
#define QLA_BUSY 0x107
#define QLA_ALREADY_REGISTERED 0x109
#define QLA_OS_TIMER_EXPIRED 0x10a
#define QLA_ERR_NO_QPAIR 0x10b
#define QLA_ERR_NOT_FOUND 0x10c
#define QLA_ERR_FROM_FW 0x10d

#define NVRAM_DELAY() udelay(10)

Expand Down
6 changes: 3 additions & 3 deletions drivers/scsi/qla2xxx/qla_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait)
sp = qla2xxx_get_qpair_sp(cmd_sp->vha, cmd_sp->qpair, cmd_sp->fcport,
GFP_ATOMIC);
if (!sp)
return rval;
return QLA_MEMORY_ALLOC_FAILED;

abt_iocb = &sp->u.iocb_cmd;
sp->type = SRB_ABT_CMD;
Expand Down Expand Up @@ -190,7 +190,7 @@ int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait)
if (wait) {
wait_for_completion(&abt_iocb->u.abt.comp);
rval = abt_iocb->u.abt.comp_status == CS_COMPLETE ?
QLA_SUCCESS : QLA_FUNCTION_FAILED;
QLA_SUCCESS : QLA_ERR_FROM_FW;
sp->free(sp);
}

Expand Down Expand Up @@ -1988,7 +1988,7 @@ qla24xx_async_abort_command(srb_t *sp)

if (handle == req->num_outstanding_cmds) {
/* Command not found. */
return QLA_FUNCTION_FAILED;
return QLA_ERR_NOT_FOUND;
}
if (sp->type == SRB_FXIOCB_DCMD)
return qlafx00_fx_disc(vha, &vha->hw->mr.fcport,
Expand Down
4 changes: 2 additions & 2 deletions drivers/scsi/qla2xxx/qla_mbx.c
Original file line number Diff line number Diff line change
Expand Up @@ -3245,7 +3245,7 @@ qla24xx_abort_command(srb_t *sp)
if (sp->qpair)
req = sp->qpair->req;
else
return QLA_FUNCTION_FAILED;
return QLA_ERR_NO_QPAIR;

if (ql2xasynctmfenable)
return qla24xx_async_abort_command(sp);
Expand All @@ -3258,7 +3258,7 @@ qla24xx_abort_command(srb_t *sp)
spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
if (handle == req->num_outstanding_cmds) {
/* Command not found. */
return QLA_FUNCTION_FAILED;
return QLA_ERR_NOT_FOUND;
}

abt = dma_pool_zalloc(ha->s_dma_pool, GFP_KERNEL, &abt_dma);
Expand Down
26 changes: 17 additions & 9 deletions drivers/scsi/qla2xxx/qla_nvme.c
Original file line number Diff line number Diff line change
Expand Up @@ -227,11 +227,11 @@ static void qla_nvme_abort_work(struct work_struct *work)
srb_t *sp = priv->sp;
fc_port_t *fcport = sp->fcport;
struct qla_hw_data *ha = fcport->vha->hw;
int rval;
int rval, abts_done_called = 1;

ql_dbg(ql_dbg_io, fcport->vha, 0xffff,
"%s called for sp=%p, hndl=%x on fcport=%p deleted=%d\n",
__func__, sp, sp->handle, fcport, fcport->deleted);
"%s called for sp=%p, hndl=%x on fcport=%p desc=%p deleted=%d\n",
__func__, sp, sp->handle, fcport, sp->u.iocb_cmd.u.nvme.desc, fcport->deleted);

if (!ha->flags.fw_started || fcport->deleted == QLA_SESS_DELETED)
goto out;
Expand All @@ -251,12 +251,20 @@ static void qla_nvme_abort_work(struct work_struct *work)
__func__, (rval != QLA_SUCCESS) ? "Failed to abort" : "Aborted",
sp, sp->handle, fcport, rval);

/*
* If async tmf is enabled, the abort callback is called only on
* return codes QLA_SUCCESS and QLA_ERR_FROM_FW.
*/
if (ql2xasynctmfenable &&
rval != QLA_SUCCESS && rval != QLA_ERR_FROM_FW)
abts_done_called = 0;

/*
* Returned before decreasing kref so that I/O requests
* are waited until ABTS complete. This kref is decreased
* at qla24xx_abort_sp_done function.
*/
if (ql2xabts_wait_nvme && QLA_ABTS_WAIT_ENABLED(sp))
if (abts_done_called && ql2xabts_wait_nvme && QLA_ABTS_WAIT_ENABLED(sp))
return;
out:
/* kref_get was done before work was schedule. */
Expand Down Expand Up @@ -804,14 +812,14 @@ void qla_nvme_abort_process_comp_status(struct abort_entry_24xx *abt, srb_t *ori
case CS_PORT_LOGGED_OUT:
/* BA_RJT was received for the ABTS */
case CS_PORT_CONFIG_CHG:
ql_dbg(ql_dbg_async + ql_dbg_mbx, vha, 0xf09d,
ql_dbg(ql_dbg_async, vha, 0xf09d,
"Abort I/O IOCB completed with error, comp_status=%x\n",
comp_status);
break;

/* BA_RJT was received for the ABTS */
case CS_REJECT_RECEIVED:
ql_dbg(ql_dbg_async + ql_dbg_mbx, vha, 0xf09e,
ql_dbg(ql_dbg_async, vha, 0xf09e,
"BA_RJT was received for the ABTS rjt_vendorUnique = %u",
abt->fw.ba_rjt_vendorUnique);
ql_dbg(ql_dbg_async + ql_dbg_mbx, vha, 0xf09e,
Expand All @@ -820,18 +828,18 @@ void qla_nvme_abort_process_comp_status(struct abort_entry_24xx *abt, srb_t *ori
break;

case CS_COMPLETE:
ql_dbg(ql_dbg_async + ql_dbg_mbx, vha, 0xf09f,
ql_dbg(ql_dbg_async + ql_dbg_verbose, vha, 0xf09f,
"IOCB request is completed successfully comp_status=%x\n",
comp_status);
break;

case CS_IOCB_ERROR:
ql_dbg(ql_dbg_async + ql_dbg_mbx, vha, 0xf0a0,
ql_dbg(ql_dbg_async, vha, 0xf0a0,
"IOCB request is failed, comp_status=%x\n", comp_status);
break;

default:
ql_dbg(ql_dbg_async + ql_dbg_mbx, vha, 0xf0a1,
ql_dbg(ql_dbg_async, vha, 0xf0a1,
"Invalid Abort IO IOCB Completion Status %x\n",
comp_status);
break;
Expand Down

0 comments on commit 2cabf10

Please sign in to comment.