最近幹活的時候又被Linux管道和消息隊列搞的一臉懵逼。當初自己走馬觀花似的學習以爲內容很簡單,結果留下了大坑,借來Unix網絡編程來補補,重新審視這兩個部分,並且引以爲戒!!!
首先看管道
#include<unistd.h>
int pipe(int fd[2]);
返回:成功爲0,出錯爲1,兩個文件描述符fd[0]用來讀,fd[1]用來寫
靈魂作圖
單進程管道
剛fork後
父進程關閉管道讀出端,子進程關閉管道寫入端,在父子進程間提供一個單向數據流
管道只能用於父子進程或者兄弟進程間通信,也就是說管道只能用於具有親緣關係的進程間通信
管道的緩衝區大小是受限制的。管道所傳輸的是無格式的字節流。這就需要管道輸入方和輸出方事先約定好數據格式
有名管道可用於沒有親緣關係的進程間通信(name pipe或者叫FIFO)
#include<sys/types.h>
#include<sys/stat.h>
int mkinfo(const char *pathname, mode_t mode);//<span style="font-family: 宋體, Arial; line-height: 26px;"><span style="font-size:12px;">pathname爲創建有名管道的全路徑名,mode爲創建有名管道的模式</span></span>
返回:若成功則0,不成功則-1
實現分析
//管道緩衝區個數
#define PIPE_BUFFERS (16)
//管道緩存區對象結構
struct pipe_buffer {
struct page *page; //管道緩衝區頁框的描述符地址
unsigned int offset, len; //頁框內有效數據的當前位置,和有效數據的長度
struct pipe_buf_operations *ops; //管道緩存區方法表的地址
};
//管道信息結構
struct pipe_inode_info {
wait_queue_head_t wait; //管道等待隊列
unsigned int nrbufs, curbuf; //包含待讀數據的緩衝區數和包含待讀數據的第一個緩衝區的索引
struct pipe_buffer bufs[PIPE_BUFFERS]; //管道緩衝區描述符數組
struct page *tmp_page; //高速緩存區頁框指針
unsigned int start; //當前管道緩存區讀的位置
unsigned int readers; //讀進程的標誌,或編號
unsigned int writers; //寫進程的標誌,或編號
unsigned int waiting_writers; //在等待隊列中睡眠的寫進程的個數
unsigned int r_counter; //與readers類似,但當等待寫入FIFO的進程是使用
unsigned int w_counter; //與writers類似,但當等待寫入FIFO的進程時使用
struct fasync_struct *fasync_readers; //用於通過信號進行的異步I/O通知
struct fasync_struct *fasync_writers; //用於通過信號的異步I/O通知
};
//管道讀操作函數
static ssize_t
pipe_readv(struct file *filp, const struct iovec *_iov,
unsigned long nr_segs, loff_t *ppos)
{
struct inode *inode = filp->f_dentry->d_inode; //獲取inode結點指針
struct pipe_inode_info *info;
int do_wakeup;
ssize_t ret;
struct iovec *iov = (struct iovec *)_iov; //獲取讀緩衝區的結構
size_t total_len;
total_len = iov_length(iov, nr_segs);
/* Null read succeeds. */
if (unlikely(total_len == 0))
return 0;
do_wakeup = 0;
ret = 0;
down(PIPE_SEM(*inode)); //獲取inode中的i_sem信號量
info = inode->i_pipe; //獲取inode 結構的pipe_inode_info結構指針
for (;;) {
int bufs = info->nrbufs; //檢查有幾個管道緩衝區有被讀取的數據
if (bufs) { //說明有其中有緩衝區包含了讀數據
int curbuf = info->curbuf; //獲取當前讀數據的管道緩存區的索引
struct pipe_buffer *buf = info->bufs + curbuf; //共有16個緩衝區,curbuf是當前的
struct pipe_buf_operations *ops = buf->ops; //獲取操作函數列表
void *addr;
size_t chars = buf->len;
int error;
//若緩衝區長度大於要求讀取的數據長度,chars設置成要求讀的長度
if (chars > total_len)
chars = total_len;
//執行Map方法
addr = ops->map(filp, info, buf);
//從緩存區中複製數據
error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars);
//執行umap方法
ops->unmap(info, buf);
if (unlikely(error)) {
if (!ret) ret = -EFAULT; //第一次讀失敗
break;
}
//更新管道的offset和len字段
ret += chars;
buf->offset += chars;
buf->len -= chars;
//若現在的緩存區的數據長度爲0
if (!buf->len) {
buf->ops = NULL;
ops->release(info, buf);
curbuf = (curbuf + 1) & (PIPE_BUFFERS-1);
info->curbuf = curbuf;
info->nrbufs = --bufs;
do_wakeup = 1;
}
total_len -= chars; //更新讀的總長度
if (!total_len) //該讀的已讀完成
break; /* common path: read succeeded */
}
if (bufs) /* More to do? */
continue;
//若bufs爲0,說明所有管道爲NULL,此時進行一下操作
if (!PIPE_WRITERS(*inode)) //是否有寫操作正在進行
break;
if (!PIPE_WAITING_WRITERS(*inode)) { //是否需要等待
/* syscall merging: Usually we must not sleep
* if O_NONBLOCK is set, or if we got some data.
* But if a writer sleeps in kernel space, then
* we can wait for that data without violating POSIX.
*/
if (ret)
break;
if (filp->f_flags & O_NONBLOCK) { //要等待但又設置了NONBLOCK標記,矛盾了
ret = -EAGAIN;
break;
}
}
if (signal_pending(current)) { //設置進程阻塞標誌
if (!ret) ret = -ERESTARTSYS;
break;
}
if (do_wakeup) {
wake_up_interruptible_sync(PIPE_WAIT(*inode));
kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
}
pipe_wait(inode);
}
up(PIPE_SEM(*inode));
/* Signal writers asynchronously that there is more room. */
if (do_wakeup) {
wake_up_interruptible(PIPE_WAIT(*inode));
kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
}
if (ret > 0)
file_accessed(filp); //更新文件結構的atime對象
return ret;
}
static ssize_t
pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
{
struct iovec iov = { .iov_base = buf, .iov_len = count };
return pipe_readv(filp, &iov, 1, ppos);
}
/* Drop the inode semaphore and wait for a pipe event, atomically */
void pipe_wait(struct inode * inode)
{
DEFINE_WAIT(wait);
//把current添加到管道的等待隊列中
prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE);
//釋放i_sem
up(PIPE_SEM(*inode));
schedule();
//被呼醒,把它從等待隊列中刪除
finish_wait(PIPE_WAIT(*inode), &wait);
//再次獲取i_sem索引節點信號量
down(PIPE_SEM(*inode));
}
static ssize_t
pipe_writev(struct file *filp, const struct iovec *_iov,
unsigned long nr_segs, loff_t *ppos)
{
struct inode *inode = filp->f_dentry->d_inode;
struct pipe_inode_info *info;
ssize_t ret;
int do_wakeup;
struct iovec *iov = (struct iovec *)_iov;
size_t total_len;
total_len = iov_length(iov, nr_segs);
/* Null write succeeds. */
if (unlikely(total_len == 0))
return 0;
do_wakeup = 0;
ret = 0;
down(PIPE_SEM(*inode));
info = inode->i_pipe;
//是否有讀者進程存在,若沒有寫管道操作就沒有任何意義
//此時產生SIGPIPE信號
if (!PIPE_READERS(*inode)) {
send_sig(SIGPIPE, current, 0);
ret = -EPIPE;
goto out;
}
/* We try to merge small writes */
//若有待讀數據的緩衝區,而且寫入的數據長度小於PAGE_SIZE
if (info->nrbufs && total_len < PAGE_SIZE) {
//第一個待讀緩衝區+可讀緩衝區數-1得到第一個可寫緩衝區的地址
int lastbuf = (info->curbuf + info->nrbufs - 1) & (PIPE_BUFFERS-1);
struct pipe_buffer *buf = info->bufs + lastbuf;
struct pipe_buf_operations *ops = buf->ops;
int offset = buf->offset + buf->len;
//若可寫緩衝區的剩餘的空間大於寫入的數據總量total_len
if (ops->can_merge && offset + total_len <= PAGE_SIZE) {
void *addr = ops->map(filp, info, buf);
//把數據複製到管道緩衝區
int error = pipe_iov_copy_from_user(offset + addr, iov, total_len);
ops->unmap(info, buf);
ret = error;
do_wakeup = 1;
if (error)
goto out;
//更新有效數據長度字段
buf->len += total_len;
ret = total_len;
goto out;
}
}
// 若全部可寫(可讀緩衝區數爲0),
// 或寫入數據長度大於管道緩衝區的長度單位(PAGE_SIZE)
for (;;) {
int bufs;
//是否有讀者進程存在
if (!PIPE_READERS(*inode)) {
send_sig(SIGPIPE, current, 0);
if (!ret) ret = -EPIPE;
break;
}
//獲取讀緩衝區數
bufs = info->nrbufs;
if (bufs < PIPE_BUFFERS) {
ssize_t chars;
//用第一個可讀緩衝區+可讀緩衝區數得到可寫(空)緩衝區的地址
int newbuf = (info->curbuf + bufs) & (PIPE_BUFFERS-1);
struct pipe_buffer *buf = info->bufs + newbuf;
struct page *page = info->tmp_page;
int error;
//若page的值爲空,從夥伴系統中獲取一頁
if (!page) {
page = alloc_page(GFP_HIGHUSER);
if (unlikely(!page)) {
ret = ret ? : -ENOMEM;
break;
}
info->tmp_page = page;
}
/* Always wakeup, even if the copy fails. Otherwise
* we lock up (O_NONBLOCK-)readers that sleep due to
* syscall merging.
* FIXME! Is this really true?
*/
do_wakeup = 1;
chars = PAGE_SIZE;
if (chars > total_len)
chars = total_len;
//寫chars字節到緩衝區中
error = pipe_iov_copy_from_user(kmap(page), iov, chars);
kunmap(page);
if (unlikely(error)) {
if (!ret) ret = -EFAULT;
break;
}
ret += chars;
/* Insert it into the buffer array */
/更新nrbufs,和len字段。
buf->page = page;
buf->ops = &anon_pipe_buf_ops;
buf->offset = 0;
buf->len = chars;
info->nrbufs = ++bufs;
info->tmp_page = NULL;
//若沒有寫完繼續寫入剩下的數據
total_len -= chars;
if (!total_len)
break;
}
//還有可寫緩衝區,繼續寫
if (bufs < PIPE_BUFFERS)
continue;
//若設置非阻塞,
//若沒有寫入任何的數據ret=0,此時返回錯誤
//若已經寫完了數據,結束寫操作。
if (filp->f_flags & O_NONBLOCK) {
if (!ret) ret = -EAGAIN;
break;
}
if (signal_pending(current)) {
if (!ret) ret = -ERESTARTSYS;
break;
}
if (do_wakeup) {
wake_up_interruptible_sync(PIPE_WAIT(*inode));
kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
do_wakeup = 0;
}
PIPE_WAITING_WRITERS(*inode)++;
pipe_wait(inode);
PIPE_WAITING_WRITERS(*inode)--;
}
out:
up(PIPE_SEM(*inode));
if (do_wakeup) {
wake_up_interruptible(PIPE_WAIT(*inode));
kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
}
if (ret > 0)
inode_update_time(inode, 1); /* mtime and ctime */
return ret;
}
PS:管道是作爲一組VFS對象來實現的,因此沒有對應的磁盤映像。所以管道的安裝和實現都是VFS類似,此處不進行探討