epoll源碼探祕(epoll_create)

epoll源碼探祕(epoll_create)

epoll系列的系統函數,很簡單,但是很強大。epoll_create(),epoll_ctl() , epoll_wait(),三個就夠了。

一些重要的結構:

/*
 * Each file descriptor added to the eventpoll interface will
 * have an entry of this type linked to the "rbr" RB tree.
 * Avoid increasing the size of this struct, there can be many thousands
 * of these on a server and we do not want this to take another cache line.(紅黑樹單節點)
 */
struct epitem {
	union {
		/* RB tree node links this structure to the eventpoll RB tree */
		struct rb_node rbn;
		/* Used to free the struct epitem */
		struct rcu_head rcu;
	};

	/* List header used to link this structure to the eventpoll ready list (列表頭用於連接結構的eventpoll就緒列表)*/
	struct list_head rdllink;

	/*
	 * Works together "struct eventpoll"->ovflist in keeping the
	 * single linked chain of items.
	 */
	struct epitem *next;

	/* The file descriptor information this item refers to (關聯的文件描述符)*/
	struct epoll_filefd ffd;

	/* Number of active wait queue attached to poll operations (輪詢操作)*/
	int nwait;

	/* List containing poll wait queues */
	struct list_head pwqlist;

	/* The "container" of this item */
	struct eventpoll *ep;

	/* List header used to link this item to the "struct file" items list */
	struct list_head fllink;

	/* wakeup_source used when EPOLLWAKEUP is set */
	struct wakeup_source __rcu *ws;

	/* The structure that describe the interested events and the source fd (感興趣的監控文件描述符的狀態)*/
	struct epoll_event event;
};
/*
 * This structure is stored inside the "private_data" member of the file
 * structure and represents(表現)the main data structure for the eventpoll
 * interface.
 */
struct eventpoll {
	/* Protect the access to this structure */
	spinlock_t lock;

	/*
	 * This mutex is used to ensure that files are not removed
	 * while epoll is using them. This is held during the event
	 * collection loop, the file cleanup path, the epoll file exit
	 * code and the ctl operations.
	 */
	struct mutex mtx;

	/* Wait queue used by sys_epoll_wait() (雙鏈表,epoll文件的等待隊列。
	 *調用epoll_wait的進程可能在此隊列上睡眠, 等待ep_poll_callback()函數喚醒或超時 
	 */
	wait_queue_head_t wq;

	/* Wait queue used by file->poll() (雙鏈表, poll_wait是eventpoll文件本身的喚醒隊列,
	 *該隊列上睡眠的進程是等待eventpoll文件本身的某些事件發生
	 */
	wait_queue_head_t poll_wait;

	/* List of ready file descriptors (就緒鏈表)*/
	struct list_head rdllist;

	/* RB tree root used to store monitored fd structs (存儲監聽文件描述符結構紅黑樹根節點)*/
	struct rb_root rbr;

	/*
	 * This is a single linked list(單鏈表) that chains all the "struct epitem" that
	 * happened while transferring ready events to userspace w/out
	 * holding ->lock.(如果正在向用戶空間傳遞事件,此時狀態就緒的文件描述符相關的結構會暫時放在該隊列上,
	 * 否則會直接添加到就緒隊列rdllist中。)
	 */
	struct epitem *ovflist;

	/* wakeup_source used when ep_scan_ready_list is running */
	struct wakeup_source *ws;

	/* The user that created the eventpoll descriptor */
	struct user_struct *user;

	struct file *file;

	/* used to optimize loop detection check */
	int visited;
	struct list_head visited_list_link;
};


epoll_create函數:

         創建一個epoll的句柄。需要注意的是,當創建好epoll句柄後,它就是會佔用一個fd值,在linux下如果查看/proc/進程id/fd/,是能夠看到這個fd的,所以在使用完epoll後,必須調用close()關閉,否則可能導致fd被耗盡。

int epoll_create(int size);
int epoll_create1(int flags);

第一級:epoll_create()(注意在Linux 2.6.8之後,size參數是被忽略的)

第二級: epoll_create1()

第三級:ep_alloc()創建內部數據(eventpoll)

                 在ep_alloc()中

               1.初始化epoll文件等待隊列(雙向鏈表)

               2.初始化eventpoll文件喚醒隊列(雙向鏈表)

               3.初始化就緒隊列(雙向鏈表)

static inline void INIT_LIST_HEAD(struct list_head *list)
{
	list->next = list;
	list->prev = list;
}

               4.初始化紅黑樹根節點

#define RB_ROOT	(struct rb_root) { NULL, }
ep->rbr = RB_ROOT;

               5.初始化發生事件紅黑樹節點鏈表(單鏈表)

#define EP_UNACTIVE_PTR ((void *) -1L)
ep->ovflist = EP_UNACTIVE_PTR;

第三級:get_unused_fd_flags()獲取一個空閒的文件描述符

第三級:anon_inode_getfile()創建一個匿名文件

第三級:fd_install()將文件與fd建立聯繫

/*
 * Open an eventpoll file descriptor.
 */
SYSCALL_DEFINE1(epoll_create1, int, flags)
{
	int error, fd;
	struct eventpoll *ep = NULL;
	struct file *file;

	/* Check the EPOLL_* constant for consistency(符合條件EPOLL_CLOEXEC != O_CLOEXEC就報錯).  */
	BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);

	if (flags & ~EPOLL_CLOEXEC)
		return -EINVAL;
	/*
	 * Create the internal(內部) data structure ("struct eventpoll").
	 */
	error = ep_alloc(&ep);
	if (error < 0)
		return error;
	/*
	 * Creates all the items needed to setup an eventpoll file. That is,
	 * a file structure and a free file descriptor.
	 *(分配eventpoll實例並初始化,存儲在file結構的private_data成員中。 
     * private_data成員用來存儲文件描述符真正對應的對象。例如 
     * 如果文件描述符是一個套接字的話,其對應的file實例的private_data 
     * 成員存儲的就是一個socket實例。)
	 */
	fd = get_unused_fd_flags(O_RDWR | (flags & O_CLOEXEC));
	if (fd < 0) {
		error = fd;
		goto out_free_ep;
	}
	file = anon_inode_getfile("[eventpoll]", &eventpoll_fops, ep,
				 O_RDWR | (flags & O_CLOEXEC));
	if (IS_ERR(file)) {
		error = PTR_ERR(file);
		goto out_free_fd;
	}
	ep->file = file;
	fd_install(fd, file);
	return fd;

out_free_fd:
	put_unused_fd(fd);
out_free_ep:
	ep_free(ep);
	return error;
}

SYSCALL_DEFINE1(epoll_create, int, size)
{
	if (size <= 0)
		return -EINVAL;

	return sys_epoll_create1(0);
}




























發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章