UFS讀寫請求完成的處理流程

      UFS傳輸分爲三個過程:製作並下發Request階段 , 處理Request請求階段 ,  Request請求完成後的階段,這裏主要是簡單說明一下Request請求完成後的階段,即在ufs request請求處理完成之後系統還需要做清理的工作,釋放資源等,ufs host需要接收ufs devices返回的請求完成的狀態走對應的流程, ufs devices返回一些狀態status.

 

/*
 *function:    scsi_request_fn()
 *
 * Purpose:     Main strategy routine for SCSI.
 *
 * Arguments:   q       - Pointer to actual queue.
 *
 * Returns:     Nothing
 *
 * Lock status: IO request lock assumed to be held when called.
 */
static void scsi_request_fn(struct request_queue *q)
    __releases(q->queue_lock)
    __acquires(q->queue_lock)
{
    struct scsi_device *sdev = q->queuedata;
    struct Scsi_Host *shost;
    struct scsi_cmnd *cmd;
    struct request *req;

    /*
     * To start with, we keep looping until the queue is empty, or until
     * the host is no longer able to accept any more requests.
     */
    shost = sdev->host;
    for (;;) {
        int rtn;
        /*
         * get next queueable request.  We do this early to make sure
         * that the request is fully prepared even if we cannot
         * accept it.
         */
        req = blk_peek_request(q);
        if (!req)
            break;

        if (unlikely(!scsi_device_online(sdev))) {
            sdev_printk(KERN_ERR, sdev,
                    "rejecting I/O to offline device\n");
            scsi_kill_request(req, q);
            continue;
        }

        if (!scsi_dev_queue_ready(q, sdev))
            break;

        /*
         * Remove the request from the request list.
         */
        if (!(blk_queue_tagged(q) && !blk_queue_start_tag(q, req)))
            blk_start_request(req);

        spin_unlock_irq(q->queue_lock);
        cmd = blk_mq_rq_to_pdu(req);
        if (cmd != req->special) {
            printk(KERN_CRIT "impossible request in %s.\n"
                     "please mail a stack trace to "
                     "[email protected]\n",
                     __func__);
            blk_dump_rq_flags(req, "foo");
            BUG();
        }

        /*
         * We hit this when the driver is using a host wide
         * tag map. For device level tag maps the queue_depth check
         * in the device ready fn would prevent us from trying
         * to allocate a tag. Since the map is a shared host resource
         * we add the dev to the starved list so it eventually gets
         * a run when a tag is freed.
         */
        if (blk_queue_tagged(q) && !(req->rq_flags & RQF_QUEUED)) {
            spin_lock_irq(shost->host_lock);
            if (list_empty(&sdev->starved_entry))
                list_add_tail(&sdev->starved_entry,
                          &shost->starved_list);
            spin_unlock_irq(shost->host_lock);
            goto not_ready;
        }

        if (!scsi_target_queue_ready(shost, sdev))
            goto not_ready;

        if (!scsi_host_queue_ready(q, shost, sdev))
            goto host_not_ready;
    
        if (sdev->simple_tags)
            cmd->flags |= SCMD_TAGGED;
        else
            cmd->flags &= ~SCMD_TAGGED;

        /*
         * Finally, initialize any error handling parameters, and set up
         * the timers for timeouts.
         */
        scsi_init_cmd_errh(cmd);

        /*
         * Dispatch the command to the low-level driver.
         */
        cmd->scsi_done = scsi_done;
        rtn = scsi_dispatch_cmd(cmd);
        if (rtn) {
            scsi_queue_insert(cmd, rtn);
            spin_lock_irq(q->queue_lock);
            goto out_delay;
        }
        spin_lock_irq(q->queue_lock);
    }

    return;

 host_not_ready:
    if (scsi_target(sdev)->can_queue > 0)
        atomic_dec(&scsi_target(sdev)->target_busy);
 not_ready:
    /*
     * lock q, handle tag, requeue req, and decrement device_busy. We
     * must return with queue_lock held.
     *
     * Decrementing device_busy without checking it is OK, as all such
     * cases (host limits or settings) should run the queue at some
     * later time.
     */
    spin_lock_irq(q->queue_lock);
    blk_requeue_request(q, req);
    atomic_dec(&sdev->device_busy);
out_delay:
    if (!atomic_read(&sdev->device_busy) && !scsi_device_blocked(sdev))
        blk_delay_queue(q, SCSI_QUEUE_DELAY);
}

scsi_request是scsi讀寫策略的主要處理例程函數,上層經過FS, BIO層,IO調度層下發的讀寫請求都會調用到這裏去進行處理

 

/**
 * scsi_dispatch_command - Dispatch a command to the low-level driver.
 * @cmd: command block we are dispatching.
 *
 * Return: nonzero return request was rejected and device's queue needs to be
 * plugged.
 */
static int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
{
    struct Scsi_Host *host = cmd->device->host;
    int rtn = 0;

    atomic_inc(&cmd->device->iorequest_cnt);

    /* check if the device is still usable */
    if (unlikely(cmd->device->sdev_state == SDEV_DEL)) {
        /* in SDEV_DEL we error all commands. DID_NO_CONNECT
         * returns an immediate error upwards, and signals
         * that the device is no longer present */
        cmd->result = DID_NO_CONNECT << 16;
        goto done;
    }

    /* Check to see if the scsi lld made this device blocked. */
    if (unlikely(scsi_device_blocked(cmd->device))) {
        /*
         * in blocked state, the command is just put back on
         * the device queue.  The suspend state has already
         * blocked the queue so future requests should not
         * occur until the device transitions out of the
         * suspend state.
         */
        SCSI_LOG_MLQUEUE(3, scmd_printk(KERN_INFO, cmd,
            "queuecommand : device blocked\n"));
        return SCSI_MLQUEUE_DEVICE_BUSY;
    }

    /* Store the LUN value in cmnd, if needed. */
    if (cmd->device->lun_in_cdb)
        cmd->cmnd[1] = (cmd->cmnd[1] & 0x1f) |
                   (cmd->device->lun << 5 & 0xe0);

    scsi_log_send(cmd);

    /*
     * Before we queue this command, check if the command
     * length exceeds what the host adapter can handle.
     */
    if (cmd->cmd_len > cmd->device->host->max_cmd_len) {
        SCSI_LOG_MLQUEUE(3, scmd_printk(KERN_INFO, cmd,
                   "queuecommand : command too long. "
                   "cdb_size=%d host->max_cmd_len=%d\n",
                   cmd->cmd_len, cmd->device->host->max_cmd_len));
        cmd->result = (DID_ABORT << 16);
        goto done;
    }

    if (unlikely(host->shost_state == SHOST_DEL)) {
        cmd->result = (DID_NO_CONNECT << 16);
        goto done;

    }

    trace_scsi_dispatch_cmd_start(cmd);
    rtn = host->hostt->queuecommand(host, cmd);
    if (rtn) {
        trace_scsi_dispatch_cmd_error(cmd, rtn);
        if (rtn != SCSI_MLQUEUE_DEVICE_BUSY &&
            rtn != SCSI_MLQUEUE_TARGET_BUSY)
            rtn = SCSI_MLQUEUE_HOST_BUSY;

        SCSI_LOG_MLQUEUE(3, scmd_printk(KERN_INFO, cmd,
            "queuecommand : request rejected\n"));
    }

    return rtn;
 done:
    cmd->scsi_done(cmd);
    return 0;
}
scsi_dispatch_command 這個是SCSI層下發一個讀寫請求的scsi command到low level driver(即ufs host driver, 如ufs-qcom.c)

 

/**
 * scsi_done - Invoke completion on finished SCSI command.
 * @cmd: The SCSI Command for which a low-level device driver (LLDD) gives
 * ownership back to SCSI Core -- i.e. the LLDD has finished with it.
 *
 * Description: This function is the mid-level's (SCSI Core) interrupt routine,
 * which regains ownership of the SCSI command (de facto) from a LLDD, and


 * calls blk_complete_request() for further processing.
 *
 * This function is interrupt context safe.
 */
static void scsi_done(struct scsi_cmnd *cmd)
{
    trace_scsi_dispatch_cmd_done(cmd);
    blk_complete_request(cmd->request);
}

其中scsi_done是一個回調接口,在scsi command命令處理完之後回調此接口,這個回調接口的運行環境需要是安全的中斷上下文,注意不要有睡眠sleep的動作,底層設備驅動程序(LLDD:ufs-qcom.c)爲其提供的SCSI命令所有權返回給SCSI核心-即LLDD已完成 。

/**
 * blk_complete_request - end I/O on a request
 * @req:      the request being processed
 *
 * Description:
 *     Ends all I/O on a request. It does not handle partial completions,
 *     unless the driver actually implements this in its completion callback
 *     through requeueing. The actual completion happens out-of-order,
 *     through a softirq handler. The user must have registered a completion
 *     callback through blk_queue_softirq_done().
 **/
void blk_complete_request(struct request *req)
{
    if (unlikely(blk_should_fake_timeout(req->q)))
        return;
    if (!blk_mark_rq_complete(req))
        __blk_complete_request(req);
}

根據請求去結束所有的IO,而不是部分的IO, 除非驅動程序實際上通過重新排隊在其完成回調中實現此目的, IO的實際完成是亂序的,需要通過softirq handler去處理,用戶必須已經通過blk_queue_softirq_done()註冊了softirq handler完成回調。

void __blk_complete_request(struct request *req)
{
    int ccpu, cpu;
    struct request_queue *q = req->q;
    unsigned long flags;
    bool shared = false;

    BUG_ON(!q->softirq_done_fn);

    local_irq_save(flags);
    cpu = smp_processor_id();

    /*
     * Select completion CPU
     */
    if (req->cpu != -1) {
        ccpu = req->cpu;
        if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags))
            shared = cpus_share_cache(cpu, ccpu);
    } else
        ccpu = cpu;

    /*
     * If current CPU and requested CPU share a cache, run the softirq on
     * the current CPU. One might concern this is just like
     * QUEUE_FLAG_SAME_FORCE, but actually not. blk_complete_request() is
     * running in interrupt handler, and currently I/O controller doesn't
     * support multiple interrupts, so current CPU is unique actually. This
     * avoids IPI sending from current CPU to the first CPU of a group.
     */
    if (ccpu == cpu || shared) {
        struct list_head *list;
do_local:
        list = this_cpu_ptr(&blk_cpu_done);
        list_add_tail(&req->ipi_list, list);

        /*
         * if the list only contains our just added request,
         * signal a raise of the softirq. If there are already
         * entries there, someone already raised the irq but it
         * hasn't run yet.
         */
        if (list->next == &req->ipi_list)
            raise_softirq_irqoff(BLOCK_SOFTIRQ);
    } else if (raise_blk_irq(ccpu, req))
        goto do_local;

    local_irq_restore(flags);
}
 

/*
 * This function must run with irqs disabled!
 */
inline void raise_softirq_irqoff(unsigned int nr)
{
    __raise_softirq_irqoff(nr);

    /*
     * If we're in an interrupt or softirq, we're done
     * (this also catches softirq-disabled code). We will
     * actually run the softirq once we return from
     * the irq or softirq.
     *
     * Otherwise we wake up ksoftirqd to make sure we
     * schedule the softirq soon.
     */
    if (!in_interrupt())
        wakeup_softirqd();
}
 

raise_softirq_irqoff這個接口是喚醒註冊的softirq函數(軟中斷處理函數),此函數必須在禁用硬件中斷(hard irq,中斷上半部)的環境下運行,即不能在中斷上下文的環境下運行

當需要調用軟中斷時,需要調用raise_softirq_off函數激活軟中斷,這裏使用術語“激活”而非“調用”,是因爲在很多情況下不能直接調用軟中斷。所以只能快速地將其標誌爲“可執行”,等待未來某一時刻調用。爲什麼“在很多情況下不能直接調用軟中斷”?試想一下下半部引入的理念,就是爲了讓上半部更快地執行。如果在中斷程序代碼中直接調用軟中斷函數,那麼就失去了上半部與下半部的區別,也就是失去了其存在的意義。

內核使用一個名爲__softirq_pending的位圖來描述軟中斷,每一個位對應一個軟中斷,位圖包含在結構irq_stat中:

  1. typedef struct {
  2.         unsigned int __softirq_pending;
  3.         ……
  4. } ____cacheline_aligned irq_cpustat_t;
  5.  
  6. DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);

宏or_softirq_pending用於設置相應的位(位或操作):

  1. #define or_softirq_pending(x)        percpu_or(irq_stat.__softirq_pending, (x))
  1. /*
  2. * This function must run with irqs disabled!
  3. */
  4. inline void raise_softirq_irqoff(unsigned int nr)
  5. {
  6.         //置位圖,即標記爲可執行狀態
  7.         __raise_softirq_irqoff(nr);
  8.  
  9.         /*
  10.          * If we're in an interrupt or softirq, we're done
  11.          * (this also catches softirq-disabled code). We will
  12.          * actually run the softirq once we return from
  13.          * the irq or softirq.
  14.          *
  15.          * Otherwise we wake up ksoftirqd to make sure we
  16.          * schedule the softirq soon.
  17.          */
  18.         //設置了位圖後,可以判斷是否已經沒有在中斷上下文中了,如果沒有,則是一個立即調用軟中斷的好時機。
  19.         //in_interrupt另一個作用是判斷軟中斷是否被禁用。
  20.         //wakeup_softirqd喚醒軟中斷的守護進程ksoftirq。
  21.         if (!in_interrupt())
  22.                 wakeup_softirqd();
  23. }

現在可以來看"激活"軟中斷的所有含義了,raise_softirq函數完成這一操作:

  1. void raise_softirq(unsigned int nr)
  2. {
  3.         unsigned long flags;
  4.  
  5.         //所有操作,應該關閉中斷,避免嵌套調用
  6.         local_irq_save(flags);
  7.         raise_softirq_irqoff(nr);
  8.         local_irq_restore(flags);
  9. }

可見,激活的操作,主要是兩點:
<1>、最重要的,就是置相應的位圖,等待將來被處理;
<2>、如果此時已經沒有在中斷上下文中,則立即調用(其實是內核線程的喚醒操作),現在就是將來; 

是的,除了raise_softirq在,可能會(嗯,重要的是“可能”)通過wakeup_softirqd喚醒ksoftirqd外,還得明白軟中斷的其它調用時機。

A、當do_IRQ完成了I/O中斷時調用irq_exit:

  1. #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
  2. # define invoke_softirq()        __do_softirq()
  3. #else
  4. # define invoke_softirq()        do_softirq()
  5. #endif
  6.  
  7. void irq_exit(void)
  8. {
  9.         account_system_vtime(current);
  10.         trace_hardirq_exit();
  11.         sub_preempt_count(IRQ_EXIT_OFFSET);
  12.         if (!in_interrupt() && local_softirq_pending())
  13.                 invoke_softirq();                //調用軟中斷

B、如果系統使用I/O APIC,在處理完本地時鐘中斷時:

C、local_bh_enable

local_bh_enable就是打開下半部,當然重中之中就是軟中斷了:

不論是哪種調用方式,最終都會觸發到軟中斷的核心處理函數do_softirq,它處理當前CPU上的所有軟中斷。
內核將軟中斷設計儘量與平臺無關,但是在某些情況下,它們還是會有差異。


 軟中斷守護daemon是軟中斷機制的實現核心,其實現過程也比較簡單,通過查詢軟中斷狀態irq_stat來判斷事件是否發生,如果發生,那麼映 射到軟中斷向量表,調用執行註冊的action函數就可以了。從這一點分析可以看出,軟中斷的服務程序的執行上下文爲軟中斷daemon。在Linux中 軟中斷daemon線程函數爲do_softirq()。

asmlinkage __visible void do_softirq(void)
{
    __u32 pending;
    unsigned long flags;

    if (in_interrupt())
        return;

    local_irq_save(flags);

    pending = local_softirq_pending();

    if (pending && !ksoftirqd_running(pending))
        do_softirq_own_stack();

    local_irq_restore(flags);
}
 

不論是哪個版本,都將調用__do_softirq函數:

static inline void do_softirq_own_stack(void)
{
    __do_softirq();
}

 

asmlinkage __visible void __softirq_entry __do_softirq(void)
{
    unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
    unsigned long old_flags = current->flags;
    int max_restart = MAX_SOFTIRQ_RESTART;
    struct softirq_action *h;
    bool in_hardirq;
    __u32 pending;
    int softirq_bit;

    /*
     * Mask out PF_MEMALLOC s current task context is borrowed for the
     * softirq. A softirq handled such as network RX might set PF_MEMALLOC
     * again if the socket is related to swap
     */
    current->flags &= ~PF_MEMALLOC;

    pending = local_softirq_pending();   //保存軟中斷的pending位圖
    account_irq_enter_time(current);  //記錄irq 進程的運行時間

    __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET); //關閉本地CPU下半部。爲了保證同一個CPU上的軟中斷以串行方式執行
    in_hardirq = lockdep_softirq_start();

restart:
    /* Reset the pending bitmask before enabling irqs */
    set_softirq_pending(0); //在使能中斷之前復位清除block softirq的軟中斷位圖

    local_irq_enable(); // (1)鎖中斷,只是爲了保持位圖的互斥,位圖處理完畢。後面的代碼可以直接使用保存的pending,(2)而中斷處理程序在激活的時候,也可以放心地使用irq_stat.__softirq_pending  (3)所以,可以開中斷了

    h = softirq_vec; //     取得軟中斷向量
 

    while ((softirq_bit = ffs(pending))) {  //逐步取位圖的每一位,判斷該位上是否有軟中斷被設置。若有,處理之
        unsigned int vec_nr;
        int prev_count;

        h += softirq_bit - 1;

        vec_nr = h - softirq_vec;
        prev_count = preempt_count();    // 保存搶佔計數器
 

        kstat_incr_softirqs_this_cpu(vec_nr);

        trace_softirq_entry(vec_nr);
        h->action(h);   //調用Block軟中斷處理函數,這裏指的是blk_done_softirq
        trace_softirq_exit(vec_nr);
        if (unlikely(prev_count != preempt_count())) {  //判斷軟中斷是否被搶佔,如果是,則輸出一段錯誤信息
            pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
                   vec_nr, softirq_to_name[vec_nr], h->action,
                   prev_count, preempt_count());
            preempt_count_set(prev_count);
        }
        h++;   //指向中斷向量表的下一個軟中斷槽位
        pending >>= softirq_bit;  //移位,取下一個軟中斷位
    }

    rcu_bh_qs();
    local_irq_disable();  // 當軟中斷處理完畢後,因爲前面已經開了中斷了,所以有可能新的軟中斷已經又被設置,(2)軟中斷調度程序會嘗試重新軟中斷,其最大重啓次數由max_restart決定 (3)所以,這裏必須再次關閉中斷,再來一次

    pending = local_softirq_pending();  //獲取當前的軟中斷向量表槽位的位圖
    if (pending) {
        if (time_before(jiffies, end) && !need_resched() &&
            --max_restart)  //有軟中斷被設置,且沒有超過最大重啓次數,再來一次先
            goto restart;

        wakeup_softirqd();  //超過最大重啓次數,還有軟中斷待處理,調用wakeup_softirqd。其任處是喚醒軟中斷守護進程ksoftirqd。
    }

    lockdep_softirq_end(in_hardirq);
    account_irq_exit_time(current);

    __local_bh_enable(SOFTIRQ_OFFSET);    //恢復下半部

    WARN_ON_ONCE(in_interrupt());
    current_restore_flags(old_flags, PF_MEMALLOC);
}

 

觸發軟中斷事務通過raise_softirq()來實現,該函數就是在中斷關閉的情況下設置軟中斷狀態位,然後判斷如果不在中斷上下文,那麼直接喚醒守護daemon。

構成軟中斷機制的核心元素包括:

1、  軟中斷狀態寄存器soft interrupt state(irq_stat)

2、  軟中斷向量表(softirq_vec)

3、  軟中斷守護daemon

/* PLEASE, avoid to allocate new softirqs, if you need not _really_ high
   frequency threaded job scheduling. For almost all the purposes
   tasklets are more than enough. F.e. all serial device BHs et
   al. should be converted to tasklets, not to softirqs.
 */

enum
{
    HI_SOFTIRQ=0,
    TIMER_SOFTIRQ,
    NET_TX_SOFTIRQ,
    NET_RX_SOFTIRQ,
    BLOCK_SOFTIRQ,
    IRQ_POLL_SOFTIRQ,
    TASKLET_SOFTIRQ,
    SCHED_SOFTIRQ,
    HRTIMER_SOFTIRQ, /* Unused, but kept as tools rely on the
                numbering. Sigh! */
    RCU_SOFTIRQ,    /* Preferable RCU should always be the last softirq */

    NR_SOFTIRQS
};

static __init int blk_softirq_init(void)
{
    int i;

    for_each_possible_cpu(i)
        INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));

    open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
    cpuhp_setup_state_nocalls(CPUHP_BLOCK_SOFTIRQ_DEAD,
                  "block/softirq:dead", NULL,
                  blk_softirq_cpu_dead);
    return 0;
}

 

blk_softirq_init是註冊block softirq hander的入口函數,將BLOCK_SOFTIRQ的向量表的註冊的處理函數設置爲blk_done_softirq

 

void open_softirq(int nr, void (*action)(struct softirq_action *))
{
    softirq_vec[nr].action = action;
}

向內核註冊一個軟中斷,其實質是設置軟中斷向量表相應槽位,註冊其處理函數:

 

/*
 * Softirq action handler - move entries to local list and loop over them
 * while passing them to the queue registered handler.
 */
static __latent_entropy void blk_done_softirq(struct softirq_action *h)
{
    struct list_head *cpu_list, local_list;

    local_irq_disable();
    cpu_list = this_cpu_ptr(&blk_cpu_done);
    list_replace_init(cpu_list, &local_list);
    local_irq_enable();

    while (!list_empty(&local_list)) {
        struct request *rq;

        rq = list_entry(local_list.next, struct request, ipi_list);
        list_del_init(&rq->ipi_list);
        rq->q->softirq_done_fn(rq);
    }
}

這個是block softirq的執行處理函數,將入口移動到本地的鏈表循環遍歷它們並將它們傳遞到隊列註冊的處理函數,這裏指的是處理函數是scsi_softirq_done

 

static void scsi_softirq_done(struct request *rq)
{
    struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
    unsigned long wait_for = (cmd->allowed + 1) * rq->timeout;
    int disposition;

    INIT_LIST_HEAD(&cmd->eh_entry);

    atomic_inc(&cmd->device->iodone_cnt);
    if (cmd->result)
        atomic_inc(&cmd->device->ioerr_cnt);

    disposition = scsi_decide_disposition(cmd);
    if (disposition != SUCCESS &&
        time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) {
        sdev_printk(KERN_ERR, cmd->device,
                "timing out command, waited %lus\n",
                wait_for/HZ);
        disposition = SUCCESS;
    }

    scsi_log_completion(cmd, disposition);

    switch (disposition) {
        case SUCCESS:
            scsi_finish_command(cmd);
            break;
        case NEEDS_RETRY:
            scsi_queue_insert(cmd, SCSI_MLQUEUE_EH_RETRY);
            break;
        case ADD_TO_MLQUEUE:
            scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY);
            break;
        default:
            scsi_eh_scmd_add(cmd);
            break;
    }
}

繼續處理IO完成的工作

/**
 * scsi_finish_command - cleanup and pass command back to upper layer
 * @cmd: the command
 *
 * Description: Pass command off to upper layer for finishing of I/O
 *              request, waking processes that are waiting on results,
 *              etc.
 */
void scsi_finish_command(struct scsi_cmnd *cmd)
{
    struct scsi_device *sdev = cmd->device;
    struct scsi_target *starget = scsi_target(sdev);
    struct Scsi_Host *shost = sdev->host;
    struct scsi_driver *drv;
    unsigned int good_bytes;

    scsi_device_unbusy(sdev);

    /*
     * Clear the flags that say that the device/target/host is no longer
     * capable of accepting new commands.
     */
    if (atomic_read(&shost->host_blocked))
        atomic_set(&shost->host_blocked, 0);
    if (atomic_read(&starget->target_blocked))
        atomic_set(&starget->target_blocked, 0);
    if (atomic_read(&sdev->device_blocked))
        atomic_set(&sdev->device_blocked, 0);

    /*
     * If we have valid sense information, then some kind of recovery
     * must have taken place.  Make a note of this.
     */
    if (SCSI_SENSE_VALID(cmd))
        cmd->result |= (DRIVER_SENSE << 24);

    SCSI_LOG_MLCOMPLETE(4, sdev_printk(KERN_INFO, sdev,
                "Notifying upper driver of completion "
                "(result %x)\n", cmd->result));

    good_bytes = scsi_bufflen(cmd);
        if (!blk_rq_is_passthrough(cmd->request)) {
        int old_good_bytes = good_bytes;
        drv = scsi_cmd_to_driver(cmd);
        if (drv->done)
            good_bytes = drv->done(cmd);
        /*
         * USB may not give sense identifying bad sector and
         * simply return a residue instead, so subtract off the
         * residue if drv->done() error processing indicates no
         * change to the completion length.
         */
        if (good_bytes == old_good_bytes)
            good_bytes -= scsi_get_resid(cmd);
    }
    scsi_io_completion(cmd, good_bytes);
}

這裏scsi_finish_command是將scsi command傳遞給上層爲了完成本次IO, 喚醒正在等待結果的回調函數等等

 

    good_bytes = drv->done(cmd);

/**
 *    sd_done - bottom half handler: called when the lower level
 *    driver has completed (successfully or otherwise) a scsi command.
 *    @SCpnt: mid-level's per command structure.
 *
 *    Note: potentially run from within an ISR. Must not block.
 **/
static int sd_done(struct scsi_cmnd *SCpnt)
{
    int result = SCpnt->result;
    unsigned int good_bytes = result ? 0 : scsi_bufflen(SCpnt);
    unsigned int sector_size = SCpnt->device->sector_size;
    unsigned int resid;
    struct scsi_sense_hdr sshdr;
    struct scsi_disk *sdkp = scsi_disk(SCpnt->request->rq_disk);
    struct request *req = SCpnt->request;
    int sense_valid = 0;
    int sense_deferred = 0;

    switch (req_op(req)) {
    case REQ_OP_DISCARD:
    case REQ_OP_WRITE_ZEROES:
    case REQ_OP_WRITE_SAME:
    case REQ_OP_ZONE_RESET:
        if (!result) {
            good_bytes = blk_rq_bytes(req);
            scsi_set_resid(SCpnt, 0);
        } else {
            good_bytes = 0;
            scsi_set_resid(SCpnt, blk_rq_bytes(req));
        }
        break;
    case REQ_OP_ZONE_REPORT:
        if (!result) {
            good_bytes = scsi_bufflen(SCpnt)
                - scsi_get_resid(SCpnt);
            scsi_set_resid(SCpnt, 0);
        } else {
            good_bytes = 0;
            scsi_set_resid(SCpnt, blk_rq_bytes(req));
        }
        break;
    default:
        /*
         * In case of bogus fw or device, we could end up having
         * an unaligned partial completion. Check this here and force
         * alignment.
         */
        resid = scsi_get_resid(SCpnt);
        if (resid & (sector_size - 1)) {
            sd_printk(KERN_INFO, sdkp,
                "Unaligned partial completion (resid=%u, sector_sz=%u)\n",
                resid, sector_size);
            resid = min(scsi_bufflen(SCpnt),
                    round_up(resid, sector_size));
            scsi_set_resid(SCpnt, resid);
        }
    }

    if (result) {
        sense_valid = scsi_command_normalize_sense(SCpnt, &sshdr);
        if (sense_valid)
            sense_deferred = scsi_sense_is_deferred(&sshdr);
    }
    sdkp->medium_access_timed_out = 0;

    if (driver_byte(result) != DRIVER_SENSE &&
        (!sense_valid || sense_deferred))
        goto out;

    switch (sshdr.sense_key) {
    case HARDWARE_ERROR:
    case MEDIUM_ERROR:
        good_bytes = sd_completed_bytes(SCpnt);
        break;
    case RECOVERED_ERROR:
        good_bytes = scsi_bufflen(SCpnt);
        break;
    case NO_SENSE:
        /* This indicates a false check condition, so ignore it.  An
         * unknown amount of data was transferred so treat it as an
         * error.
         */
        SCpnt->result = 0;
        memset(SCpnt->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE);
        break;
    case ABORTED_COMMAND:
        if (sshdr.asc == 0x10)  /* DIF: Target detected corruption */
            good_bytes = sd_completed_bytes(SCpnt);
        break;
    case ILLEGAL_REQUEST:
        switch (sshdr.asc) {
        case 0x10:    /* DIX: Host detected corruption */
            good_bytes = sd_completed_bytes(SCpnt);
            break;
        case 0x20:    /* INVALID COMMAND OPCODE */
        case 0x24:    /* INVALID FIELD IN CDB */
            switch (SCpnt->cmnd[0]) {
            case UNMAP:
                sd_config_discard(sdkp, SD_LBP_DISABLE);
                break;
            case WRITE_SAME_16:
            case WRITE_SAME:
                if (SCpnt->cmnd[1] & 8) { /* UNMAP */
                    sd_config_discard(sdkp, SD_LBP_DISABLE);
                } else {
                    sdkp->device->no_write_same = 1;
                    sd_config_write_same(sdkp);
                    req->__data_len = blk_rq_bytes(req);
                    req->rq_flags |= RQF_QUIET;
                }
                break;
            }
        }
        break;
    default:
        break;
    }

 out:
    if (sd_is_zoned(sdkp))
        sd_zbc_complete(SCpnt, good_bytes, &sshdr);

    SCSI_LOG_HLCOMPLETE(1, scmd_printk(KERN_INFO, SCpnt,
                       "sd_done: completed %d of %d bytes\n",
                       good_bytes, scsi_bufflen(SCpnt)));

    if (rq_data_dir(SCpnt->request) == READ && scsi_prot_sg_count(SCpnt))
        sd_dif_complete(SCpnt, good_bytes);

    return good_bytes;
}

sd_done這個回調函數是中斷下半部的處理函數,不能被阻塞,因爲可能還運行在中斷處理程序中,在low level driver(ufs控制器驅動程序: ufs-qcom.c)在完成(成功或者其他的狀態)了scsi command之後調用此接口

bottom half handler: called when the lower level
 *    driver has completed (successfully or otherwise) a scsi command

/*
 * Function:    scsi_io_completion()
 *
 * Purpose:     Completion processing for block device I/O requests.
 *
 * Arguments:   cmd   - command that is finished.
 *
 * Lock status: Assumed that no lock is held upon entry.
 *
 * Returns:     Nothing
 *
 * Notes:       We will finish off the specified number of sectors.  If we
 *        are done, the command block will be released and the queue
 *        function will be goosed.  If we are not done then we have to
 *        figure out what to do next:
 *
 *        a) We can call scsi_requeue_command().  The request
 *           will be unprepared and put back on the queue.  Then
 *           a new command will be created for it.  This should
 *           be used if we made forward progress, or if we want
 *           to switch from READ(10) to READ(6) for example.
 *
 *        b) We can call __scsi_queue_insert().  The request will
 *           be put back on the queue and retried using the same
 *           command as before, possibly after a delay.
 *
 *        c) We can call scsi_end_request() with -EIO to fail
 *           the remainder of the request.
 */
void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
{
    int result = cmd->result;
    struct request_queue *q = cmd->device->request_queue;
    struct request *req = cmd->request;
    blk_status_t error = BLK_STS_OK;
    struct scsi_sense_hdr sshdr;
    bool sense_valid = false;
    int sense_deferred = 0, level = 0;
    enum {ACTION_FAIL, ACTION_REPREP, ACTION_RETRY,
          ACTION_DELAYED_RETRY} action;
    unsigned long wait_for = (cmd->allowed + 1) * req->timeout;

    if (result) {
        sense_valid = scsi_command_normalize_sense(cmd, &sshdr);
        if (sense_valid)
            sense_deferred = scsi_sense_is_deferred(&sshdr);
    }

    if (blk_rq_is_passthrough(req)) {
        if (result) {
            if (sense_valid) {
                /*
                 * SG_IO wants current and deferred errors
                 */
                scsi_req(req)->sense_len =
                    min(8 + cmd->sense_buffer[7],
                        SCSI_SENSE_BUFFERSIZE);
            }
            if (!sense_deferred)
                error = __scsi_error_from_host_byte(cmd, result);
        }
        /*
         * __scsi_error_from_host_byte may have reset the host_byte
         */
        scsi_req(req)->result = cmd->result;
        scsi_req(req)->resid_len = scsi_get_resid(cmd);

        if (scsi_bidi_cmnd(cmd)) {
            /*
             * Bidi commands Must be complete as a whole,
             * both sides at once.
             */
            scsi_req(req->next_rq)->resid_len = scsi_in(cmd)->resid;
            if (scsi_end_request(req, BLK_STS_OK, blk_rq_bytes(req),
                    blk_rq_bytes(req->next_rq)))
                BUG();
            return;
        }
    } else if (blk_rq_bytes(req) == 0 && result && !sense_deferred) {
        /*
         * Flush commands do not transfers any data, and thus cannot use
         * good_bytes != blk_rq_bytes(req) as the signal for an error.
         * This sets the error explicitly for the problem case.
         */
        error = __scsi_error_from_host_byte(cmd, result);
    }

    /* no bidi support for !blk_rq_is_passthrough yet */
    BUG_ON(blk_bidi_rq(req));

    /*
     * Next deal with any sectors which we were able to correctly
     * handle.
     */
    SCSI_LOG_HLCOMPLETE(1, scmd_printk(KERN_INFO, cmd,
        "%u sectors total, %d bytes done.\n",
        blk_rq_sectors(req), good_bytes));

    /*
     * Recovered errors need reporting, but they're always treated as
     * success, so fiddle the result code here.  For passthrough requests
     * we already took a copy of the original into sreq->result which
     * is what gets returned to the user
     */
    if (sense_valid && (sshdr.sense_key == RECOVERED_ERROR)) {
        /* if ATA PASS-THROUGH INFORMATION AVAILABLE skip
         * print since caller wants ATA registers. Only occurs on
         * SCSI ATA PASS_THROUGH commands when CK_COND=1
         */
        if ((sshdr.asc == 0x0) && (sshdr.ascq == 0x1d))
            ;
        else if (!(req->rq_flags & RQF_QUIET))
            scsi_print_sense(cmd);
        result = 0;
        /* for passthrough error may be set */
        error = BLK_STS_OK;
    }
    /*
     * Another corner case: the SCSI status byte is non-zero but 'good'.
     * Example: PRE-FETCH command returns SAM_STAT_CONDITION_MET when
     * it is able to fit nominated LBs in its cache (and SAM_STAT_GOOD
     * if it can't fit). Treat SAM_STAT_CONDITION_MET and the related
     * intermediate statuses (both obsolete in SAM-4) as good.
     */
    if (status_byte(result) && scsi_status_is_good(result)) {
        result = 0;
        error = BLK_STS_OK;
    }

    /*
     * special case: failed zero length commands always need to
     * drop down into the retry code. Otherwise, if we finished
     * all bytes in the request we are done now.
     */
    if (!(blk_rq_bytes(req) == 0 && error) &&
        !scsi_end_request(req, error, good_bytes, 0))
        return;

    /*
     * Kill remainder if no retrys.
     */
    if (error && scsi_noretry_cmd(cmd)) {
        if (scsi_end_request(req, error, blk_rq_bytes(req), 0))
            BUG();
        return;
    }

    /*
     * If there had been no error, but we have leftover bytes in the
     * requeues just queue the command up again.
     */
    if (result == 0)
        goto requeue;

    error = __scsi_error_from_host_byte(cmd, result);

    if (host_byte(result) == DID_RESET) {
        /* Third party bus reset or reset for error recovery
         * reasons.  Just retry the command and see what
         * happens.
         */
        action = ACTION_RETRY;
    } else if (sense_valid && !sense_deferred) {
        switch (sshdr.sense_key) {
        case UNIT_ATTENTION:
            if (cmd->device->removable) {
                /* Detected disc change.  Set a bit
                 * and quietly refuse further access.
                 */
                cmd->device->changed = 1;
                action = ACTION_FAIL;
            } else {
                /* Must have been a power glitch, or a
                 * bus reset.  Could not have been a
                 * media change, so we just retry the
                 * command and see what happens.
                 */
                action = ACTION_RETRY;
            }
            break;
        case ILLEGAL_REQUEST:
            /* If we had an ILLEGAL REQUEST returned, then
             * we may have performed an unsupported
             * command.  The only thing this should be
             * would be a ten byte read where only a six
             * byte read was supported.  Also, on a system
             * where READ CAPACITY failed, we may have
             * read past the end of the disk.
             */
            if ((cmd->device->use_10_for_rw &&
                sshdr.asc == 0x20 && sshdr.ascq == 0x00) &&
                (cmd->cmnd[0] == READ_10 ||
                 cmd->cmnd[0] == WRITE_10)) {
                /* This will issue a new 6-byte command. */
                cmd->device->use_10_for_rw = 0;
                action = ACTION_REPREP;
            } else if (sshdr.asc == 0x10) /* DIX */ {
                action = ACTION_FAIL;
                error = BLK_STS_PROTECTION;
            /* INVALID COMMAND OPCODE or INVALID FIELD IN CDB */
            } else if (sshdr.asc == 0x20 || sshdr.asc == 0x24) {
                action = ACTION_FAIL;
                error = BLK_STS_TARGET;
            } else
                action = ACTION_FAIL;
            break;
        case ABORTED_COMMAND:
            action = ACTION_FAIL;
            if (sshdr.asc == 0x10) /* DIF */
                error = BLK_STS_PROTECTION;
            break;
        case NOT_READY:
            /* If the device is in the process of becoming
             * ready, or has a temporary blockage, retry.
             */
            if (sshdr.asc == 0x04) {
                switch (sshdr.ascq) {
                case 0x01: /* becoming ready */
                case 0x04: /* format in progress */
                case 0x05: /* rebuild in progress */
                case 0x06: /* recalculation in progress */
                case 0x07: /* operation in progress */
                case 0x08: /* Long write in progress */
                case 0x09: /* self test in progress */
                case 0x14: /* space allocation in progress */
                    action = ACTION_DELAYED_RETRY;
                    break;
                default:
                    action = ACTION_FAIL;
                    break;
                }
            } else
                action = ACTION_FAIL;
            break;
        case VOLUME_OVERFLOW:
            /* See SSC3rXX or current. */
            action = ACTION_FAIL;
            break;
        default:
            action = ACTION_FAIL;
            break;
        }
    } else
        action = ACTION_FAIL;

    if (action != ACTION_FAIL &&
        time_before(cmd->jiffies_at_alloc + wait_for, jiffies))
        action = ACTION_FAIL;

    switch (action) {
    case ACTION_FAIL:
        /* Give up and fail the remainder of the request */
        if (!(req->rq_flags & RQF_QUIET)) {
            static DEFINE_RATELIMIT_STATE(_rs,
                    DEFAULT_RATELIMIT_INTERVAL,
                    DEFAULT_RATELIMIT_BURST);

            if (unlikely(scsi_logging_level))
                level = SCSI_LOG_LEVEL(SCSI_LOG_MLCOMPLETE_SHIFT,
                               SCSI_LOG_MLCOMPLETE_BITS);

            /*
             * if logging is enabled the failure will be printed
             * in scsi_log_completion(), so avoid duplicate messages
             */
            if (!level && __ratelimit(&_rs)) {
                scsi_print_result(cmd, NULL, FAILED);
                if (driver_byte(result) & DRIVER_SENSE)
                    scsi_print_sense(cmd);
                scsi_print_command(cmd);
            }
        }
        if (!scsi_end_request(req, error, blk_rq_err_bytes(req), 0))
            return;
        /*FALLTHRU*/
    case ACTION_REPREP:
    requeue:
        /* Unprep the request and put it back at the head of the queue.
         * A new command will be prepared and issued.
         */
        if (q->mq_ops) {
            scsi_mq_requeue_cmd(cmd);
        } else {
            scsi_release_buffers(cmd);
            scsi_requeue_command(q, cmd);
        }
        break;
    case ACTION_RETRY:
        /* Retry the same command immediately */
        __scsi_queue_insert(cmd, SCSI_MLQUEUE_EH_RETRY, 0);
        break;
    case ACTION_DELAYED_RETRY:
        /* Retry the same command after a delay */
        __scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY, 0);
        break;
    }
}

scsi_io_complete是塊設備IO請求的完成處理函數,我們會完成指定數量扇區的IO請求,這些IO請求完成之後,這些scsi command block將會被釋放以及隊列函數將會被清理解散,如果我們還沒有完成對應的IO請求我們需要知道接下來應該怎麼做:

(1)我們可以調用scsi_requeue_command()。 要求將沒有準備,並重新放入隊列。 然後將爲此創建一個新命令。 這應該
 如果我們取得了進步,或者我們想要。例如,從READ(10)切換到READ(6)

 (2)我們可以調用__scsi_queue_insert()。 該請求將放回隊列並使用相同的命令重試,像以前一樣命令,可能要延遲一下。

 (3) 我們將會調用scsi_end_request() 並且返回 -EIO 錯誤使得剩餘其他的IO請求失敗 

 

 

 

 

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章