繼續上一篇的分析,應用層udev或者mdev獲取到內核的事件,來創建設備文件的時候,實際就是調用mknod命令進行節點創建。我們可以模仿udev或者mdev,來手動創建這個節點。
1 /dev設備節點創建
我們在使用命令: mknod /dev/iTestDevice c $major 0 創建設備文件的時候,內核做了哪些事呢?可以肯定的一點是,內核肯定創建了一個 dentry,inode 結構體,並加入到系統裏面,要不然在打開設備文件的時候,會因爲lookup_fast()函數找不到相應的 inode,從而使得打開失敗。也許你會說,lookup_fast()函數失敗了,還有lookup_slow()函數呢?這裏因爲是特殊文件,情況有所不同,如果lookup_fast()函數失敗了,那麼就會導致打開失敗,不會在lookup_slow()函數裏面動態創建 inode,而創建inode的工作其實是在 mknod 系統調用裏面完成的。下面來簡單分析其具體過程。
首先通過 strace 來查看下系統調用的傳入參數:
strace -o syscall mknod /dev/test c 250 0
結果如下:
...
mknod("/dev/test", S_IFCHR|0666, makedev(243, 0)) = 0
...
現在來看下內核裏面關於 mknod 系統調用的定義,在 source/fs/namei.c 文件中
SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev)
{
return sys_mknodat(AT_FDCWD, filename, mode, dev);
}
好了,來看 sys_mknodat 的定義:
SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
unsigned, dev)
{
...
/* 這裏進行路徑解析並創建新的 dentry */
dentry = user_path_create(dfd, filename, &path, lookup_flags);
...
switch (mode & S_IFMT) {
...
/* 在這裏創建 inode */
case S_IFCHR: case S_IFBLK:
error = vfs_mknod(path.dentry->d_inode,dentry,mode,
new_decode_dev(dev));
break;
...
}
其實就兩步:1,創建 dentry;2,創建 inode。我們在分析具體函數前先來分析一下/dev/iTestDevice這個目錄,假設我們的rootfs使用的是ubi文件系統,則dev目錄在ubi文件系統中,dev/目錄下面掛載的是ramfs文件系統,所以在路徑搜索的過程中,搜索到dev目錄的時候,會切換到掛載的ramfs根文件夾,得到ramfs的root dentry和root inode。然後再在ramfs文件系統中創建子文件或者子文件夾。這個父目錄的搜索切換過程這邊不詳細分析,感興趣的可以參考下面這兩篇文章:
https://blog.csdn.net/oqqYuJi12345678/article/details/101689334
https://blog.csdn.net/oqqYuJi12345678/article/details/101849978
先來看iTestDevice文件dentry的創建:
struct dentry *user_path_create(int dfd, const char __user *pathname,
struct path *path, unsigned int lookup_flags)
{
struct filename *tmp = getname(pathname);
struct dentry *res;
if (IS_ERR(tmp))
return ERR_CAST(tmp);
res = kern_path_create(dfd, tmp->name, path, lookup_flags);
putname(tmp);
return res;
}
核心函數爲kern_path_create:
struct dentry *kern_path_create(int dfd, const char *pathname,
struct path *path, unsigned int lookup_flags)
{
struct dentry *dentry = ERR_PTR(-EEXIST);
struct nameidata nd;
int err2;
int error;
bool is_dir = (lookup_flags & LOOKUP_DIRECTORY);
/*
* Note that only LOOKUP_REVAL and LOOKUP_DIRECTORY matter here. Any
* other flags passed in are ignored!
*/
lookup_flags &= LOOKUP_REVAL;
----------------------------------------------------------(1)
error = do_path_lookup(dfd, pathname, LOOKUP_PARENT|lookup_flags, &nd);
if (error)
return ERR_PTR(error);
/*
* Yucky last component or no last component at all?
* (foo/., foo/.., /////)
*/
if (nd.last_type != LAST_NORM)
goto out;
nd.flags &= ~LOOKUP_PARENT;
nd.flags |= LOOKUP_CREATE | LOOKUP_EXCL;
/* don't fail immediately if it's r/o, at least try to report other errors */
err2 = mnt_want_write(nd.path.mnt);
/*
* Do the final lookup.
*/
mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
------------------------------------------------------------(2)
dentry = lookup_hash(&nd);
if (IS_ERR(dentry))
goto unlock;
error = -EEXIST;
if (dentry->d_inode)
goto fail;
/*
* Special case - lookup gave negative, but... we had foo/bar/
* From the vfs_mknod() POV we just have a negative dentry -
* all is fine. Let's be bastards - you had / on the end, you've
* been asking for (non-existent) directory. -ENOENT for you.
*/
if (unlikely(!is_dir && nd.last.name[nd.last.len])) {
error = -ENOENT;
goto fail;
}
if (unlikely(err2)) {
error = err2;
goto fail;
}
*path = nd.path;
return dentry;
fail:
dput(dentry);
dentry = ERR_PTR(error);
unlock:
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
if (!err2)
mnt_drop_write(nd.path.mnt);
out:
path_put(&nd.path);
return dentry;
}
(1)do_path_lookup完成上層父目錄的解析,而lookup_hash則完成子節點的解析。先來看一下do_path_lookup函數。
do_path_lookup
--------->filename_lookup
------------>path_lookupat
static int path_lookupat(int dfd, const char *name,
unsigned int flags, struct nameidata *nd)
{
struct file *base = NULL;
struct path path;
int err;
err = path_init(dfd, name, flags | LOOKUP_PARENT, nd, &base);
if (unlikely(err))
return err;
current->total_link_count = 0;
err = link_path_walk(name, nd);//完成對父目錄的搜索,獲取父目錄的dentry
if (!err && !(flags & LOOKUP_PARENT)) {//設置了LOOKUP_PARENT,這邊不走
err = lookup_last(nd, &path);
while (err > 0) {
void *cookie;
struct path link = path;
err = may_follow_link(&link, nd);
if (unlikely(err))
break;
nd->flags |= LOOKUP_PARENT;
err = follow_link(&link, nd, &cookie);
if (err)
break;
err = lookup_last(nd, &path);
put_link(nd, &link, cookie);
}
}
if (!err)
err = complete_walk(nd);
if (!err && nd->flags & LOOKUP_DIRECTORY) {
if (!can_lookup(nd->inode)) {
path_put(&nd->path);
err = -ENOTDIR;
}
}
if (base)
fput(base);
if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
path_put(&nd->root);
nd->root.mnt = NULL;
}
return err;
}
該函數只完成對父目錄的搜索,即對於/dev/iTestDevice這樣的目錄結構,只搜索/dev目錄,最終得到ramfs的root dentry和inode。所以nd->path.dentry爲ramfs文件系統root dentry。下面創建iTestDevice inode節點的時候會用到。
(2)lookup_hash函數完成iTestDevice節點dentry創建
static struct dentry *lookup_hash(struct nameidata *nd)
{
return __lookup_hash(&nd->last, nd->path.dentry, nd->flags);
}
static struct dentry *__lookup_hash(struct qstr *name,
struct dentry *base, unsigned int flags)
{
bool need_lookup;
struct dentry *dentry;
dentry = lookup_dcache(name, base, flags, &need_lookup);//在這裏面分配新的dentry,然後設置need_lookup爲true
if (!need_lookup)
return dentry;
return lookup_real(base->d_inode, dentry, flags);//像sysfs文件系統會在該函數裏面調用父inode的方法創建inode,而ramfs文件系統不會,inode的創建工作放到後面
}
有的文件系統調用lookup_real會創建inode節點,ramfs不會,看一下該函數:
static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
struct dentry *old;
/* Don't create child dentry for a dead directory. */
if (unlikely(IS_DEADDIR(dir))) {
dput(dentry);
return ERR_PTR(-ENOENT);
}
old = dir->i_op->lookup(dir, dentry, flags);
if (unlikely(old)) {
dput(dentry);
dentry = old;
}
return dentry;
}
dir->i_op->lookup到底是哪個函數呢,ramfs文件系統初始化的時候,掛載根目錄的時候創建的root inode,其i_op操作集函數爲:
static const struct inode_operations shmem_dir_inode_operations = {
#ifdef CONFIG_TMPFS
.create = shmem_create,
.lookup = simple_lookup,
.link = shmem_link,
.unlink = shmem_unlink,
.symlink = shmem_symlink,
.mkdir = shmem_mkdir,
.rmdir = shmem_rmdir,
.mknod = shmem_mknod,
.rename = shmem_rename,
。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。
所以其lookup函數爲:
struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
{
static const struct dentry_operations simple_dentry_operations = {
.d_delete = simple_delete_dentry,
};
if (dentry->d_name.len > NAME_MAX)
return ERR_PTR(-ENAMETOOLONG);
d_set_d_op(dentry, &simple_dentry_operations);
d_add(dentry, NULL);
return NULL;
}
可以看到該lookup函數確實沒有創建新的inode。
好了下面看一下子節點inode的創建過程:
int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
{
...
error = dir->i_op->mknod(dir, dentry, mode, dev);
...
return error;
}
這裏調用了文件系統相關的函數:dir->i_op->mknod(),調用父目錄的inode操作方法,這是ramfs根目錄的i_op->mknod 函數,這個函數即是上面操作函數集合中的shmem_mknod:
static int
shmem_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
{
struct inode *inode;
int error = -ENOSPC;
inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE);
if (inode) {
...
d_instantiate(dentry, inode); /* 可簡單理解成: dentry->d_inode = inode; */
dget(dentry); /* Extra count - pin the dentry in core */
}
return error;
...
}
其中主要工作是在shmem_get_inode函數中完成:
static struct inode *shmem_get_inode(struct super_block *sb, const struct inode *dir,
umode_t mode, dev_t dev, unsigned long flags)
{
struct inode *inode;
struct shmem_inode_info *info;
struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
if (shmem_reserve_inode(sb))
return NULL;
/* 在內核空間創建 inode 結構體(分配內存) */
inode = new_inode(sb);
if (inode) {
/* 下面是各種成員變量的初始化 */
inode->i_ino = get_next_ino();
inode_init_owner(inode, dir, mode);
inode->i_blocks = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
inode->i_generation = get_seconds();
info = SHMEM_I(inode);
memset(info, 0, (char *)inode - (char *)info);
spin_lock_init(&info->lock);
info->seals = F_SEAL_SEAL;
info->flags = flags & VM_NORESERVE;
INIT_LIST_HEAD(&info->shrinklist);
INIT_LIST_HEAD(&info->swaplist);
simple_xattrs_init(&info->xattrs);
cache_no_acl(inode);
/***********************************************/
switch (mode & S_IFMT) {
default:
inode->i_op = &shmem_special_inode_operations;
init_special_inode(inode, mode, dev); /* 我們最感興趣的在這裏 */
break;
...
}
} else
shmem_free_inode(sb);
return inode;
}
可見在這個函數裏面,首先通過new_inode函數在內核空間分配內存,這裏不再詳細展開。然後對各個成員變量進行初始化,這裏我們也不感興趣,最感興趣的地方在init_special_inode函數裏面:
void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
{
inode->i_mode = mode;
if (S_ISCHR(mode)) {
inode->i_fop = &def_chr_fops;
inode->i_rdev = rdev;
}
...
}
可見這裏保存了兩個重要的成員變量:文件操作函數集和設備號。而這個文件操作函數集是一個通用的操作集,所有字符驅動文件打開時都會調用,在這個函數裏面,通過設備號來找到真正的該設備的文件操作函數集。先看這個 def_chr_fops 的定義:
const struct file_operations def_chr_fops = {
.open = chrdev_open,
.llseek = noop_llseek,
};
2 /dev設備節點打開
文件的打開流程,可以參考這篇文章:
https://blog.csdn.net/oqqYuJi12345678/article/details/101849978
從上面文章可以知道,在完成路徑搜索以後,會調用do_dentry_open函數,在該函數裏面,會最終會調用節點inode->i_fop函數集中的i_fop函數。對於字符設備,就是上一節講的def_chr_fops函數集,其打開函數爲chrdev_open:
static int chrdev_open(struct inode *inode, struct file *filp)
{
struct cdev *p;
struct cdev *new = NULL;
int ret = 0;
spin_lock(&cdev_lock);
----------------------------------------------(1)
p = inode->i_cdev;
if (!p) {
struct kobject *kobj;
int idx;
spin_unlock(&cdev_lock);
--------------------------------------------------(2)
kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);
if (!kobj)
return -ENXIO;
new = container_of(kobj, struct cdev, kobj);
spin_lock(&cdev_lock);
/* Check i_cdev again in case somebody beat us to it while
we dropped the lock. */
p = inode->i_cdev;
if (!p) {
inode->i_cdev = p = new;
---------------------------------------------------(3)
list_add(&inode->i_devices, &p->list);
new = NULL;
} else if (!cdev_get(p))
ret = -ENXIO;
} else if (!cdev_get(p))
ret = -ENXIO;
spin_unlock(&cdev_lock);
cdev_put(new);
if (ret)
return ret;
ret = -ENXIO;
filp->f_op = fops_get(p->ops);
if (!filp->f_op)
goto out_cdev_put;
if (filp->f_op->open) {
--------------------------------------------------------------------(4)
ret = filp->f_op->open(inode, filp);
if (ret)
goto out_cdev_put;
}
return 0;
out_cdev_put:
cdev_put(p);
return ret;
}
(1)inode->i_cdev還沒有設置,爲空
(2)根據inode的設備號,找到設備的kobject,然後再根據kobject找到其cdev結構。
struct kobject *kobj_lookup(struct kobj_map *domain, dev_t dev, int *index)
{
struct kobject *kobj;
struct probe *p;
unsigned long best = ~0UL;
retry:
mutex_lock(domain->lock);
for (p = domain->probes[MAJOR(dev) % 255]; p; p = p->next) {
struct kobject *(*probe)(dev_t, int *, void *);
struct module *owner;
void *data;
if (p->dev > dev || p->dev + p->range - 1 < dev)
continue;
if (p->range - 1 >= best)
break;
if (!try_module_get(p->owner))
continue;
owner = p->owner;
data = p->data;
probe = p->get;
best = p->range - 1;
*index = dev - p->dev;
if (p->lock && p->lock(dev, data) < 0) {
module_put(owner);
continue;
}
mutex_unlock(domain->lock);
kobj = probe(dev, index, data);
/* Currently ->owner protects _only_ ->probe() itself. */
module_put(owner);
if (kobj)
return kobj;
goto retry;
}
mutex_unlock(domain->lock);
return NULL;
}
當找到對應的probe以後,調用其p->get函數,在下面的初始化裏面我們知道該函數爲 exact_match
static struct kobject *exact_match(dev_t dev, int *part, void *data)
{
struct cdev *p = data;
return &p->kobj;
}
通過exact_match獲取其kobject
(3)把inode添加到cdev的list中,是不是意味着設備可以被打開幾次?
(4)調用cdev的ops函數集進一步做打開操作,這個操作函數集纔是真正的我們寫字符驅動的時候註冊的操作集函數。
關於上面的(2)和(4),其初始化是在字符設備註冊的時候做的,下面來進一步分析。
2.1 字符設備初始化
從上一篇文章可以知道,字符設備的註冊是通過調用register_chrdev函數,看看該函數具體做了什麼事情:
static inline int register_chrdev(unsigned int major, const char *name,
const struct file_operations *fops)
{
return __register_chrdev(major, 0, 256, name, fops);
}
int __register_chrdev(unsigned int major, unsigned int baseminor,
unsigned int count, const char *name,
const struct file_operations *fops)
{
struct char_device_struct *cd;
struct cdev *cdev;
int err = -ENOMEM;
//獲取主設備號,如果設定的major爲0,則自動分配一個,否則對該設備號進行檢查,看是否合法
cd = __register_chrdev_region(major, baseminor, count, name);
if (IS_ERR(cd))
return PTR_ERR(cd);
cdev = cdev_alloc();
if (!cdev)
goto out2;
cdev->owner = fops->owner;
------------------------------------------------------------(1)
cdev->ops = fops;//字符設備的操作函數,是該字符驅動的主要實現
kobject_set_name(&cdev->kobj, "%s", name);
-------------------------------------------------------------(2)
err = cdev_add(cdev, MKDEV(cd->major, baseminor), count);
if (err)
goto out;
cd->cdev = cdev;
return major ? 0 : cd->major;
out:
kobject_put(&cdev->kobj);
out2:
kfree(__unregister_chrdev_region(cd->major, baseminor, count));
return err;
}
(1)上一節open函數中最終操作的字符設備open函數就是在這邊註冊的字符設備操作集
(2)cdev_add把cdev添加到cdev_map中:
int cdev_add(struct cdev *p, dev_t dev, unsigned count)
{
int error;
p->dev = dev;
p->count = count;
error = kobj_map(cdev_map, dev, count, NULL,
exact_match, exact_lock, p);
if (error)
return error;
kobject_get(p->kobj.parent);
return 0;
}
int kobj_map(struct kobj_map *domain, dev_t dev, unsigned long range, struct module *module, kobj_probe_t *probe, int (*lock)(dev_t, void *), void *data)
{
unsigned n = MAJOR(dev+range-1) - MAJOR(dev) + 1;
unsigned index = MAJOR(dev);
unsigned i;
struct probe *p;
if (n > 255) /* 若n > 255,則超出了kobj_map中probes數組的大小 */
n = 255;
p = kmalloc(sizeof(struct probe) * n, GFP_KERNEL); /* 分配n個struct probe */
if(p == NULL)
return -ENOMEM;
for(i = 0; i < n; i++, p++) { /* 用函數的參數初始化probe */
p->owner = module;
p->get = probe; //該函數open的時候用來得到cdev的kobject結構
p->lock = lock;
p->dev = dev; //設備號
p->range = range;
p->data = data; //保存cdev,後面文件open的時候會用到
}
mutex_lock(domain->lock);
for(i = 0, p-=n; i < n; i++, p++, index++) {
struct probe **s = &domain->probes[index % 255];//添加到全局模塊中
while(*s && (*s)->range < range)
s = &(*s)->next;
p->next = *s;
*s = p;
}
mutex_unlock(domain->lock);
return 0;
}
dev_t的前12位爲主設備號,後20位爲次設備號。
n = MAJOR(dev + range - 1) - MAJOR(dev) + 1 表示設備號範圍(dev, dev+range)中不同的主設備號的個數。
通常n的值爲1。
從代碼中的第二個for循環可以看出kobj_map中的probes數組中每個元素爲一個struct probe鏈表的頭指針。
每個鏈表中的probe對象有(MAJOR(probe.dev) % 255)值相同的關係。若主設備號小於255, 則每個鏈表中的probe都有相同的主設備號。
鏈表中的元素是按照range值從小到大排列的。
while循環即是找出該將p插入的位置。
該初始化過程也印證了上面設備的打開過程。