Android系統中固件崩潰後使用uevent機制重新加載固件 流程分析

【前言】

    Android系統爲了保證各商業公司的利益,允許在系統中使用不開源的固件。因此我們可以看到比如用於音頻處理的、用於鍵盤支持的等等大量固件。既然這麼多模塊要用到固件,那麼我們也有必要來了解一下固件在崩潰後的自恢復過程。這篇博文是基於 Intel x86 平臺寫的,所分析的固件是 Intel 平臺的一款音頻 DSP 固件,使用 uevent 機制來收發固件恢復消息。所以如果讀者使用的其它平臺或其它固件,在內容上也許會有所出入,但這不影響我們的分析思路。


【填充併發送uevent消息】

    在設備檢測到固件崩潰之後,會調用恢複函數。在我使用的平臺下,這對應的是 sst_do_recovery() 函數。在這個函數中,主要完成 填充uevent消息、dump固件崩潰信息、重置Intel平臺音頻配置、清除內存中舊的固件內容、發送uevent消息 這5件事。代碼如下:

void sst_do_recovery(struct intel_sst_drv *sst)
{
	char iram_event[IRAM_EVENT_SIZE_MAX], dram_event[DRAM_EVENT_SIZE_MAX];
	char ddr_imr_event[DDR_EVENT_SIZE_MAX], event_type[EVENT_TYPE_SIZE_MAX];
	char *envp[NUM_EVENT_MAX];
	int env_offset = 0;

	pr_err("Audio: Intel SST engine encountered an unrecoverable error\n");
	snprintf(event_type, sizeof(event_type), "EVENT_TYPE=SST_CRASHED");    // 填充uevent消息
	envp[env_offset++] = event_type;
	snprintf(iram_event, sizeof(iram_event), "IRAM_DUMP_SIZE=%d",    // 填充uevent消息
			sst->dump_buf.iram_buf.size);
	envp[env_offset++] = iram_event;
	snprintf(dram_event, sizeof(dram_event), "DRAM_DUMP_SIZE=%d",    // 填充uevent消息
			sst->dump_buf.dram_buf.size);
	envp[env_offset++] = dram_event;

	if (sst->ddr != NULL) {
		snprintf(ddr_imr_event, sizeof(ddr_imr_event),
		"DDR_IMR_DUMP_SIZE=%d DDR_IMR_ADDRESS=%p", (sst->ddr_end - sst->ddr_base), sst->ddr);
		envp[env_offset++] = ddr_imr_event;
	}
	envp[env_offset] = NULL;
	kobject_uevent_env(&sst->dev->kobj, KOBJ_CHANGE, envp);    // 發送uevent消息向上層報告固件已崩潰
	pr_err("SST Crash Uevent Sent!!\n");

	/*
	 * setting firmware state as RESET so that the firmware will get
	 * redownloaded on next request.This is because firmare not responding
	 * for 1 sec is equalant to some unrecoverable error of FW.
	 */
	pr_err("Audio: trying to reset the dsp now\n");
	mutex_lock(&sst->sst_lock);
	sst->sst_state = SST_RECOVERY;    // 將Intel平臺的當前狀態置爲“恢復中”
	mutex_unlock(&sst->sst_lock);

	dump_stack();    // dump信息
	dump_sst_shim(sst);    // dump信息

	mutex_lock(&sst->sst_lock);
	sst_stall_lpe_n_wait(sst);
	mutex_unlock(&sst->sst_lock);

	/* dump mailbox and sram */
	pr_debug("Audio: Dumping Mailbox IA to LPE...\n");
	dump_buffer_fromio(sst->ipc_mailbox, NUM_DWORDS);    // dump信息
	pr_debug("Audio: Dumping Mailbox LPE to IA...\n");
	dump_buffer_fromio((sst->ipc_mailbox + sst->mailbox_recv_offset),    // dump信息
		NUM_DWORDS);
	pr_debug("Audio: Dumping SRAM CHECKPOINT...\n");
	dump_buffer_fromio((sst->mailbox +
			sst->pdata->debugfs_data->checkpoint_offset),
			DUMP_SRAM_CHECKPOINT_DWORDS);

	if (sst_drv_ctx->ops->set_bypass) {
		mutex_lock(&sst->sst_lock);
		sst_drv_ctx->ops->set_bypass(true);
		dump_ram_area(sst, &(sst->dump_buf), SST_IRAM);    // dump信息
		dump_ram_area(sst, &(sst->dump_buf), SST_DRAM);    // dump信息
		sst_drv_ctx->ops->set_bypass(false);
		mutex_unlock(&sst->sst_lock);
	}

	/* Send IPC to SCU to power gate and reset the LPE */
	sst_send_scu_reset_ipc(sst);    // 重置Intel平臺配置

	pr_err("reset the pvt id from val %d\n", sst_drv_ctx->pvt_id);
	spin_lock(&sst_drv_ctx->block_lock);
	sst_drv_ctx->pvt_id = 0;
	spin_unlock(&sst_drv_ctx->block_lock);
	sst_dump_ipc_dispatch_lists(sst_drv_ctx);    // dump信息
	sst_dump_rx_lists(sst_drv_ctx);    // dump信息

	if (sst_drv_ctx->fw_in_mem) {
		pr_err("Clearing the cached FW copy...\n");
		kfree(sst_drv_ctx->fw_in_mem);    // 清除內存中舊的固件內容
		sst_drv_ctx->fw_in_mem = NULL;    // 清除內存中舊的固件內容
		sst_memcpy_free_resources();    // 清除內存中舊的固件內容
		kfree(sst_drv_ctx->fw_sg_list.src);    // 清除內存中舊的固件內容
		kfree(sst_drv_ctx->fw_sg_list.dst);    // 清除內存中舊的固件內容
		sst_drv_ctx->fw_sg_list.list_len = 0;    // 清除內存中舊的固件內容
	}

	mutex_lock(&sst->sst_lock);
	sst->sst_state = SST_RESET;    // 將Intel平臺的當前狀態置爲“正在重置”
	sst_stream_recovery(sst);    // 重置Intel平臺音頻配置
	mutex_unlock(&sst->sst_lock);

	/* Delay is to ensure that the stream is closed before
	 * powering on DAPM widget
	 */
	usleep_range(STREAM_CLOSE_DELAY_MIN, STREAM_CLOSE_DELAY_MAX);

	env_offset = 0;
	snprintf(event_type, sizeof(event_type), "EVENT_TYPE=SST_RECOVERY");    // 填充uevent消息
	envp[env_offset++] = event_type;
	envp[env_offset] = NULL;
	kobject_uevent_env(&sst->dev->kobj, KOBJ_CHANGE, envp);    // 發送uevent消息通知上層開始重載固件
	pr_err("SST Recovery Uevent Sent!!\n");

}


【接收並處理uevent消息】

    在 Android 系統中,底層發送的 uevent 消息在上層由 ueventd 進行接收和處理。ueventd 是 Android 系統啓動後就運行的一個服務進程,它通過 while死循環 不斷檢查系統是否接收到新的 uevent 消息,如果有就調用 handle_device_fd() 函數進行處理。我們可以在 system/core/init/ueventd.cpp 中找到 ueventd 的主函數。代碼如下:

int ueventd_main(int argc, char **argv)
{
    /*
     * init sets the umask to 077 for forked processes. We need to
     * create files with exact permissions, without modification by
     * the umask.
     */
    umask(000);

    /* Prevent fire-and-forget children from becoming zombies.
     * If we should need to wait() for some children in the future
     * (as opposed to none right now), double-forking here instead
     * of ignoring SIGCHLD may be the better solution.
     */
    signal(SIGCHLD, SIG_IGN);

    open_devnull_stdio();
    klog_init();
    klog_set_level(KLOG_NOTICE_LEVEL);

    NOTICE("ueventd started!\n");

    selinux_callback cb;
    cb.func_log = selinux_klog_callback;
    selinux_set_callback(SELINUX_CB_LOG, cb);

    std::string hardware = property_get("ro.hardware");

    ueventd_parse_config_file("/ueventd.rc");
    ueventd_parse_config_file(android::base::StringPrintf("/ueventd.%s.rc", hardware.c_str()).c_str());

    device_init();

    pollfd ufd;
    ufd.events = POLLIN;
    ufd.fd = get_device_fd();

    while (true) {    // 使用死循環,不斷查詢是否有新的消息需要處理
        ufd.revents = 0;
        int nr = poll(&ufd, 1, -1);
        if (nr <= 0) {
            continue;
        }
        if (ufd.revents & POLLIN) {
            handle_device_fd();    // 如果檢查到有待處理的消息,在這裏進行處理
        }
    }

    return 0;
}

    handle_device_fd()函數主要負責解析 uevent 消息,然後將解析出的消息分別傳遞給 handle_device_event() 函數和 handle_firmware_event() 函數。後2者會分別檢查 uevent 消息是否是 device 類型或 firmware 類型,並且在滿足檢驗條件的情況下進行相應操作。這些函數都可以在 /system/core/init/devices.cpp 文件中找到,代碼如下:

void handle_device_fd()
{
    char msg[UEVENT_MSG_LEN+2];
    int n;
    while ((n = uevent_kernel_multicast_recv(device_fd, msg, UEVENT_MSG_LEN)) > 0) {
        if(n >= UEVENT_MSG_LEN)   /* overflow -- discard */
            continue;

        msg[n] = '\0';
        msg[n+1] = '\0';

        struct uevent uevent;
        parse_event(msg, &uevent);    // 從消息中解析出uevent事件,保存在uevent結構體變量中

        if (selinux_status_updated() > 0) {
            struct selabel_handle *sehandle2;
            sehandle2 = selinux_android_file_context_handle();
            if (sehandle2) {
                selabel_close(sehandle);
                sehandle = sehandle2;
            }
        }

        handle_device_event(&uevent);
        handle_firmware_event(&uevent);    // 檢查是否需要處理固件uevent事件
    }
}

    因爲我們是分析固件崩潰後重載的過程,所以來看看 handle_firmware_event() 函數。這個函數的內容比較簡潔,在檢查傳遞來的 uevent 消息屬於 firmware 子系統和 add 操作後,創建一個子線程並調用 process_firmware_event() 函數對 uevent 消息進行最終的處理。代碼如下:

static void handle_firmware_event(struct uevent *uevent)
{
    pid_t pid;

    if(strcmp(uevent->subsystem, "firmware"))    // 固件uevent事件所屬的子系統參數值必須要是"firmware"
        return;

    if(strcmp(uevent->action, "add"))    // 固件uevent事件所屬的動作參數值必須要是"add"
        return;

    /* we fork, to avoid making large memory allocations in init proper */
    pid = fork();
    if (!pid) {
        process_firmware_event(uevent);    // 開始處理固件事件
        _exit(EXIT_SUCCESS);
    } else if (pid < 0) {
        ERROR("could not fork to process firmware event: %s\n", strerror(errno));
    }
}

    不出意外地,在 proces_firmware_event() 函數中進行了讀寫文件節點和加載固件的操作。代碼如下:

static void process_firmware_event(struct uevent *uevent)
{
    char *root, *loading, *data;
    int l, loading_fd, data_fd, fw_fd;
    size_t i;
    int booting = is_booting();

    INFO("firmware: loading '%s' for '%s'\n",
         uevent->firmware, uevent->path);

    l = asprintf(&root, SYSFS_PREFIX"%s/", uevent->path);
    if (l == -1)
        return;

    l = asprintf(&loading, "%sloading", root);    // 獲取loading文件的路徑
    if (l == -1)
        goto root_free_out;

    l = asprintf(&data, "%sdata", root);    // 獲取data文件的路徑
    if (l == -1)
        goto loading_free_out;

    loading_fd = open(loading, O_WRONLY|O_CLOEXEC);    // 打開loading文件
    if(loading_fd < 0)
        goto data_free_out;

    data_fd = open(data, O_WRONLY|O_CLOEXEC);    // 打開data文件
    if(data_fd < 0)
        goto loading_close_out;

try_loading_again:
    for (i = 0; i < ARRAY_SIZE(firmware_dirs); i++) {
        char *file = NULL;
        l = asprintf(&file, "%s/%s", firmware_dirs[i], uevent->firmware);    // 獲取固件文件路徑
        if (l == -1)
            goto data_free_out;
        fw_fd = open(file, O_RDONLY|O_CLOEXEC);    // 打開固件文件
        free(file);
        if (fw_fd >= 0) {
            if(!load_firmware(fw_fd, loading_fd, data_fd))    // 加載固件
                INFO("firmware: copy success { '%s', '%s' }\n", root, uevent->firmware);
            else
                INFO("firmware: copy failure { '%s', '%s' }\n", root, uevent->firmware);
            break;
        }
    }
    if (fw_fd < 0) {
        if (booting) {
            /* If we're not fully booted, we may be missing
             * filesystems needed for firmware, wait and retry.
             */
            usleep(100000);    // 如果固件加載失敗,並且系統仍處於啓動過程中,那麼等待100ms後嘗試重新加載固件
            booting = is_booting();
            goto try_loading_again;    // 重新加載固件
        }
        INFO("firmware: could not open '%s': %s\n", uevent->firmware, strerror(errno));
        write(loading_fd, "-1", 2);
        goto data_close_out;
    }

    close(fw_fd);
data_close_out:
    close(data_fd);
loading_close_out:
    close(loading_fd);
data_free_out:
    free(data);
loading_free_out:
    free(loading);
root_free_out:
    free(root);
}

發佈了125 篇原創文章 · 獲贊 152 · 訪問量 74萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章