消息隊列是由存放在內核中的消息組成的鏈表,由IPC id標識。
由msgget創建新隊列或打開已經存在的隊列
由msgsnd將消息添加到消息隊列尾,每個消息包括正整數標識的類型,非負的長度,及數據。
由msgrcv從消息隊列中取消息,不必按FIFO取消息,可以通過類型字段取相應的消息。
I.數據結構
i.msg_queue
87 /* one msq_queue structure for each present queue on the system */
88 struct msg_queue {
89 struct kern_ipc_perm q_perm;
90 time_t q_stime; /* last msgsnd time */
91 time_t q_rtime; /* last msgrcv time */
92 time_t q_ctime; /* last change time */
93 unsigned long q_cbytes; /* current number of bytes on queue */
94 unsigned long q_qnum; /* number of messages in queue */
95 unsigned long q_qbytes; /* max number of bytes on queue */
96 pid_t q_lspid; /* pid of last msgsnd */
97 pid_t q_lrpid; /* last receive pid */
98
99 struct list_head q_messages;
100 struct list_head q_receivers;
101 struct list_head q_senders;
102 };
msg_queue:一個msg_queue對應一個消息隊列
q_messages:消息鏈表
q_receivers:被阻塞的接收消息進程鏈表
q_senders:被阻塞的發送消息進程鏈表
ii.msg_msg
77 /* one msg_msg structure for each message */
78 struct msg_msg {
79 struct list_head m_list;
80 long m_type;
81 int m_ts; /* message text size */
82 struct msg_msgseg* next;
83 void *security;
84 /* the actual message follows immediately */
85 };
39 struct msg_msgseg {
40 struct msg_msgseg* next;
41 /* the next part of the message follows immediately */
42 };
43
44 #define DATALEN_MSG (PAGE_SIZE-sizeof(struct msg_msg))
45 #define DATALEN_SEG (PAGE_SIZE-sizeof(struct msg_msgseg))
46
msg_msg:一個msg_msg對應一條消息;一條消息由next組成的內存鏈表組成,第一個結點由msg_msg與數據組成,其它結點由msg_msgseg與數據組成;除最後一個結點外其它的結點大小均爲PAGE_SIZE大小,最後一個結點大小取決於消息長度。
iii.msg_sender
60 /* one msg_sender for each sleeping sender */
61 struct msg_sender {
62 struct list_head list;
63 struct task_struct *tsk;
64 };
msg_sender:表示被阻塞的發送消息進程,通過list組成發送阻塞進程鏈表中,tsk指向被阻塞進程
iv. msg_receiver
46 /*
47 * one msg_receiver structure for each sleeping receiver:
48 */
49 struct msg_receiver {
50 struct list_head r_list;
51 struct task_struct *r_tsk;
52
53 int r_mode;
54 long r_msgtype;
55 long r_maxsize;
56
57 struct msg_msg *volatile r_msg;
58 };
msg_receiver:表示被阻塞的接收消息進程,及接收消息的屬性
r_msg:用於有消息發送時,當消息滿足接收消息要求,直接將消息通過r_msg發送給接收進程,而不需要放入消息隊列中。
v.結構關係圖
II.消息隊列的創建
消息隊列由newque創建:
174 /**
175 * newque - Create a new msg queue
176 * @ns: namespace
177 * @params: ptr to the structure that contains the key and msgflg
178 *
179 * Called with msg_ids.rw_mutex held (writer)
180 */
181 static int newque(struct ipc_namespace *ns, struct ipc_params *params)
182 {
183 struct msg_queue *msq;
184 int id, retval;
185 key_t key = params->key;
186 int msgflg = params->flg;
187
188 msq = ipc_rcu_alloc(sizeof(*msq));
189 if (!msq)
190 return -ENOMEM;
191
192 msq->q_perm.mode = msgflg & S_IRWXUGO;
193 msq->q_perm.key = key;
194
195 msq->q_perm.security = NULL;
196 retval = security_msg_queue_alloc(msq);
197 if (retval) {
198 ipc_rcu_putref(msq);
199 return retval;
200 }
201
202 /*
203 * ipc_addid() locks msq
204 */
205 id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
206 if (id < 0) {
207 security_msg_queue_free(msq);
208 ipc_rcu_putref(msq);
209 return id;
210 }
211
212 msq->q_stime = msq->q_rtime = 0;
213 msq->q_ctime = get_seconds();
214 msq->q_cbytes = msq->q_qnum = 0;
215 msq->q_qbytes = ns->msg_ctlmnb;
216 msq->q_lspid = msq->q_lrpid = 0;
217 INIT_LIST_HEAD(&msq->q_messages);
218 INIT_LIST_HEAD(&msq->q_receivers);
219 INIT_LIST_HEAD(&msq->q_senders);
220
221 msg_unlock(msq);
222
223 return msq->q_perm.id;
224 }
1.創建msg_queue結構
2.將msg_queue添加到消息隊列基數樹中,並取回基數樹id
3.初始化msg_queue結構,如初始化消息鏈表、被阻塞接收進程鏈表等
III.消息隊列的移除
272 /*
273 * freeque() wakes up waiters on the sender and receiver waiting queue,
274 * removes the message queue from message queue ID IDR, and cleans up all the
275 * messages associated with this queue.
276 *
277 * msg_ids.rw_mutex (writer) and the spinlock for this message queue are held
278 * before freeque() is called. msg_ids.rw_mutex remains locked on exit.
279 */
280 static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
281 {
282 struct list_head *tmp;
283 struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
284
285 expunge_all(msq, -EIDRM);
286 ss_wakeup(&msq->q_senders, 1);
287 msg_rmid(ns, msq);
288 msg_unlock(msq);
289
290 tmp = msq->q_messages.next;
291 while (tmp != &msq->q_messages) {
292 struct msg_msg *msg = list_entry(tmp, struct msg_msg, m_list);
293
294 tmp = tmp->next;
295 atomic_dec(&ns->msg_hdrs);
296 free_msg(msg);
297 }
298 atomic_sub(msq->q_cbytes, &ns->msg_bytes);
299 security_msg_queue_free(msq);
300 ipc_rcu_putref(msq);
301 }
1.喚醒所有被阻塞的消息接收進程,並通知消息隊列被移除EIDRM
2.喚醒所有被阻塞的消息發送進程
3.將消息隊列從消息隊列基數樹中移除;msq->q_perm.deleted=1,通知被喚醒的發送進程消息隊列被刪除(freeque已經獲取了spinlock,而被喚醒的發送進程要獲取spinloc再檢查msq->q_perm.deleted)
4.釋放消息隊列中消息所使用的內存
5.將消息隊列的消息長度計數從系統消息長度計數中刪除
6.刪除msg_queue
IV.消息
i.load_msg
47 struct msg_msg *load_msg(const void __user *src, int len)
48 {
49 struct msg_msg *msg;
50 struct msg_msgseg **pseg;
51 int err;
52 int alen;
53
54 alen = len;
55 if (alen > DATALEN_MSG)
56 alen = DATALEN_MSG;
57
58 msg = kmalloc(sizeof(*msg) + alen, GFP_KERNEL);
59 if (msg == NULL)
60 return ERR_PTR(-ENOMEM);
61
62 msg->next = NULL;
63 msg->security = NULL;
64
65 if (copy_from_user(msg + 1, src, alen)) {
66 err = -EFAULT;
67 goto out_err;
68 }
69
70 len -= alen;
71 src = ((char __user *)src) + alen;
72 pseg = &msg->next;
73 while (len > 0) {
74 struct msg_msgseg *seg;
75 alen = len;
76 if (alen > DATALEN_SEG)
77 alen = DATALEN_SEG;
78 seg = kmalloc(sizeof(*seg) + alen,
79 GFP_KERNEL);
80 if (seg == NULL) {
81 err = -ENOMEM;
82 goto out_err;
83 }
84 *pseg = seg;
85 seg->next = NULL;
86 if (copy_from_user(seg + 1, src, alen)) {
87 err = -EFAULT;
88 goto out_err;
89 }
90 pseg = &seg->next;
91 len -= alen;
92 src = ((char __user *)src) + alen;
93 }
94
95 err = security_msg_msg_alloc(msg);
96 if (err)
97 goto out_err;
98
99 return msg;
100
101 out_err:
102 free_msg(msg);
103 return ERR_PTR(err);
104 }
load_msg用於將用戶空間的信息數據複製到內核內存中
注:
一條消息是由內存鏈表組成,每個結點內存從通用slab中獲取;
每個結點均由管理信息與數據組成,第一個結點由msg_msg管理,其它由msg_msgseg管理;
除最後一個結點外,其它結點大小均爲PAGE_SIZE大小;不直接取頁幀是因爲如果有很多小消息(遠小於PAGE_SIZE)的話會浪費內存
ii.store_msg
106 int store_msg(void __user *dest, struct msg_msg *msg, int len)
107 {
108 int alen;
109 struct msg_msgseg *seg;
110
111 alen = len;
112 if (alen > DATALEN_MSG)
113 alen = DATALEN_MSG;
114 if (copy_to_user(dest, msg + 1, alen))
115 return -1;
116
117 len -= alen;
118 dest = ((char __user *)dest) + alen;
119 seg = msg->next;
120 while (len > 0) {
121 alen = len;
122 if (alen > DATALEN_SEG)
123 alen = DATALEN_SEG;
124 if (copy_to_user(dest, seg + 1, alen))
125 return -1;
126 len -= alen;
127 dest = ((char __user *)dest) + alen;
128 seg = seg->next;
129 }
130 return 0;
131 }
store_msg用於將消息數據從內核內存中複製到進程用戶空間中
iii.free_msg
133 void free_msg(struct msg_msg *msg)
134 {
135 struct msg_msgseg *seg;
136
137 security_msg_msg_free(msg);
138
139 seg = msg->next;
140 kfree(msg);
141 while (seg != NULL) {
142 struct msg_msgseg *tmp = seg->next;
143 kfree(seg);
144 seg = tmp;
145 }
146 }
free_msg用於釋放消息所使用的內核slab內存
IV.發送消息
636 long do_msgsnd(int msqid, long mtype, void __user *mtext,
637 size_t msgsz, int msgflg)
638 {
639 struct msg_queue *msq;
640 struct msg_msg *msg;
641 int err;
642 struct ipc_namespace *ns;
643
644 ns = current->nsproxy->ipc_ns;
645
646 if (msgsz > ns->msg_ctlmax || (long) msgsz < 0 || msqid < 0)
647 return -EINVAL;
648 if (mtype < 1)
649 return -EINVAL;
650
651 msg = load_msg(mtext, msgsz);
652 if (IS_ERR(msg))
653 return PTR_ERR(msg);
654
655 msg->m_type = mtype;
656 msg->m_ts = msgsz;
657
658 msq = msg_lock_check(ns, msqid);
659 if (IS_ERR(msq)) {
660 err = PTR_ERR(msq);
661 goto out_free;
662 }
663
664 for (;;) {
665 struct msg_sender s;
666
667 err = -EACCES;
668 if (ipcperms(&msq->q_perm, S_IWUGO))
669 goto out_unlock_free;
670
671 err = security_msg_queue_msgsnd(msq, msg, msgflg);
672 if (err)
673 goto out_unlock_free;
674
675 if (msgsz + msq->q_cbytes <= msq->q_qbytes &&
676 1 + msq->q_qnum <= msq->q_qbytes) {
677 break;
678 }
679
680 /* queue full, wait: */
681 if (msgflg & IPC_NOWAIT) {
682 err = -EAGAIN;
683 goto out_unlock_free;
684 }
685 ss_add(msq, &s);
686 ipc_rcu_getref(msq);
687 msg_unlock(msq);
688 schedule();
689
690 ipc_lock_by_ptr(&msq->q_perm);
691 ipc_rcu_putref(msq);
692 if (msq->q_perm.deleted) {
693 err = -EIDRM;
694 goto out_unlock_free;
695 }
696 ss_del(&s);
697
698 if (signal_pending(current)) {
699 err = -ERESTARTNOHAND;
700 goto out_unlock_free;
701 }
702 }
703
704 msq->q_lspid = task_tgid_vnr(current);
705 msq->q_stime = get_seconds();
706
707 if (!pipelined_send(msq, msg)) {
708 /* noone is waiting for this message, enqueue it */
709 list_add_tail(&msg->m_list, &msq->q_messages);
710 msq->q_cbytes += msgsz;
711 msq->q_qnum++;
712 atomic_add(msgsz, &ns->msg_bytes);
713 atomic_inc(&ns->msg_hdrs);
714 }
715
716 err = 0;
717 msg = NULL;
718
719 out_unlock_free:
720 msg_unlock(msq);
721 out_free:
722 if (msg != NULL)
723 free_msg(msg);
724 return err;
725 }
726
727 SYSCALL_DEFINE4(msgsnd, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz,
728 int, msgflg)
729 {
730 long mtype;
731
732 if (get_user(mtype, &msgp->mtype))
733 return -EFAULT;
734 return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg);
735 }
1.參數檢查
2.分配消息內存,並將消息複製到內核內存中
3.權限檢查
4.檢查消息隊列是否已滿
A.如果消息隊列已滿
a.如果IPC_NOWAIT置位,返回EAGAIN通知用戶進程再次嘗試發送;
b.如果IPC_NOWAIT未置位,阻塞發送進程;
c.阻塞進程被喚醒時檢查消息隊列是否被刪除,如果被刪除返回EIDRM通知用戶進程消息隊列被刪除,否則繼續檢查消息隊列是否已滿
B.如果消息隊列未滿
a.如果有被阻塞的接收進程,且消息滿足接收要求,則將消息直接發送給被阻塞的接收進程
b.否則,將消息排入消息隊列尾
V.接收消息
756 long do_msgrcv(int msqid, long *pmtype, void __user *mtext,
757 size_t msgsz, long msgtyp, int msgflg)
758 {
759 struct msg_queue *msq;
760 struct msg_msg *msg;
761 int mode;
762 struct ipc_namespace *ns;
763
764 if (msqid < 0 || (long) msgsz < 0)
765 return -EINVAL;
766 mode = convert_mode(&msgtyp, msgflg);
767 ns = current->nsproxy->ipc_ns;
768
769 msq = msg_lock_check(ns, msqid);
770 if (IS_ERR(msq))
771 return PTR_ERR(msq);
772
773 for (;;) {
774 struct msg_receiver msr_d;
775 struct list_head *tmp;
776
777 msg = ERR_PTR(-EACCES);
778 if (ipcperms(&msq->q_perm, S_IRUGO))
779 goto out_unlock;
780
781 msg = ERR_PTR(-EAGAIN);
782 tmp = msq->q_messages.next;
783 while (tmp != &msq->q_messages) {
784 struct msg_msg *walk_msg;
785
786 walk_msg = list_entry(tmp, struct msg_msg, m_list);
787 if (testmsg(walk_msg, msgtyp, mode) &&
788 !security_msg_queue_msgrcv(msq, walk_msg, current,
789 msgtyp, mode)) {
790
791 msg = walk_msg;
792 if (mode == SEARCH_LESSEQUAL &&
793 walk_msg->m_type != 1) {
794 msg = walk_msg;
795 msgtyp = walk_msg->m_type - 1;
796 } else {
797 msg = walk_msg;
798 break;
799 }
800 }
801 tmp = tmp->next;
802 }
803 if (!IS_ERR(msg)) {
804 /*
805 * Found a suitable message.
806 * Unlink it from the queue.
807 */
808 if ((msgsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) {
809 msg = ERR_PTR(-E2BIG);
810 goto out_unlock;
811 }
812 list_del(&msg->m_list);
813 msq->q_qnum--;
814 msq->q_rtime = get_seconds();
815 msq->q_lrpid = task_tgid_vnr(current);
816 msq->q_cbytes -= msg->m_ts;
817 atomic_sub(msg->m_ts, &ns->msg_bytes);
818 atomic_dec(&ns->msg_hdrs);
819 ss_wakeup(&msq->q_senders, 0);
820 msg_unlock(msq);
821 break;
822 }
823 /* No message waiting. Wait for a message */
824 if (msgflg & IPC_NOWAIT) {
825 msg = ERR_PTR(-ENOMSG);
826 goto out_unlock;
827 }
828 list_add_tail(&msr_d.r_list, &msq->q_receivers);
829 msr_d.r_tsk = current;
830 msr_d.r_msgtype = msgtyp;
831 msr_d.r_mode = mode;
832 if (msgflg & MSG_NOERROR)
833 msr_d.r_maxsize = INT_MAX;
834 else
835 msr_d.r_maxsize = msgsz;
836 msr_d.r_msg = ERR_PTR(-EAGAIN);
837 current->state = TASK_INTERRUPTIBLE;
838 msg_unlock(msq);
839
840 schedule();
841
842 /* Lockless receive, part 1:
843 * Disable preemption. We don't hold a reference to the queue
844 * and getting a reference would defeat the idea of a lockless
845 * operation, thus the code relies on rcu to guarantee the
846 * existance of msq:
847 * Prior to destruction, expunge_all(-EIRDM) changes r_msg.
848 * Thus if r_msg is -EAGAIN, then the queue not yet destroyed.
849 * rcu_read_lock() prevents preemption between reading r_msg
850 * and the spin_lock() inside ipc_lock_by_ptr().
851 */
852 rcu_read_lock();
853
854 /* Lockless receive, part 2:
855 * Wait until pipelined_send or expunge_all are outside of
856 * wake_up_process(). There is a race with exit(), see
857 * ipc/mqueue.c for the details.
858 */
859 msg = (struct msg_msg*)msr_d.r_msg;
860 while (msg == NULL) {
861 cpu_relax();
862 msg = (struct msg_msg *)msr_d.r_msg;
863 }
864
865 /* Lockless receive, part 3:
866 * If there is a message or an error then accept it without
867 * locking.
868 */
869 if (msg != ERR_PTR(-EAGAIN)) {
870 rcu_read_unlock();
871 break;
872 }
873
874 /* Lockless receive, part 3:
875 * Acquire the queue spinlock.
876 */
877 ipc_lock_by_ptr(&msq->q_perm);
878 rcu_read_unlock();
879
880 /* Lockless receive, part 4:
881 * Repeat test after acquiring the spinlock.
882 */
883 msg = (struct msg_msg*)msr_d.r_msg;
884 if (msg != ERR_PTR(-EAGAIN))
885 goto out_unlock;
886
887 list_del(&msr_d.r_list);
888 if (signal_pending(current)) {
889 msg = ERR_PTR(-ERESTARTNOHAND);
890 out_unlock:
891 msg_unlock(msq);
892 break;
893 }
894 }
895 if (IS_ERR(msg))
896 return PTR_ERR(msg);
897
898 msgsz = (msgsz > msg->m_ts) ? msg->m_ts : msgsz;
899 *pmtype = msg->m_type;
900 if (store_msg(mtext, msg, msgsz))
901 msgsz = -EFAULT;
902
903 free_msg(msg);
904
905 return msgsz;
906 }
907
908 SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz,
909 long, msgtyp, int, msgflg)
910 {
911 long err, mtype;
912
913 err = do_msgrcv(msqid, &mtype, msgp->mtext, msgsz, msgtyp, msgflg);
914 if (err < 0)
915 goto out;
916
917 if (put_user(mtype, &msgp->mtype))
918 err = -EFAULT;
919 out:
920 return err;
921 }
1.參數檢查及權限檢查
2.如果有滿足接收要求的消息(消息隊列中有消息,且類型、長度都滿足要求)
a.將消息從消息隊列中取出,並複製到用戶地址空間
b.釋放消息所佔用的內核內存
c.嘗試喚醒被阻塞的第一個消息發送進程
3.如果沒有滿足接收要求的消息
A.如果IPC_NOWAIT置位,返回ENOMSG通知用戶進程沒有消息
B.如果IPC_NOWAIT未置位,阻塞消息接收進程
C.阻塞進程被喚醒
a.如果因有滿足接收要求的消息發送,同2的a和b的處理;
b.如果因爲信號發送,則先做信號處理;再自動重新調用msgsnd
c.否則接收進程繼續被阻塞