【Linux4.1.12源碼分析】二層報文發送之報文GSO分段(skb_segment)

skg_segment是實現封裝報文GSO分段的基礎,直接拋代碼。

/**
 *	skb_segment - Perform protocol segmentation on skb.
 *	@head_skb: buffer to segment
 *	@features: features for the output path (see dev->features)
 *
 *	This function performs segmentation on the given skb.  It returns
 *	a pointer to the first in a list of new skbs for the segments.
 *	In case of error it returns ERR_PTR(err).
 */
struct sk_buff *skb_segment(struct sk_buff *head_skb,
			    netdev_features_t features)
{
	struct sk_buff *segs = NULL;
	struct sk_buff *tail = NULL;
	struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
	skb_frag_t *frag = skb_shinfo(head_skb)->frags;	
	unsigned int mss = skb_shinfo(head_skb)->gso_size;
	unsigned int doffset = head_skb->data - skb_mac_header(head_skb);  //得到內層報頭的長度
	struct sk_buff *frag_skb = head_skb;
	unsigned int offset = doffset;
	unsigned int tnl_hlen = skb_tnl_header_len(head_skb);	//得到外層報頭的長度,非封裝報文該值爲0, 是支持封裝報文GSO的基礎
	unsigned int headroom;
	unsigned int len;
	__be16 proto;
	bool csum;
	int sg = !!(features & NETIF_F_SG);	//是否支持SG
	int nfrags = skb_shinfo(head_skb)->nr_frags;
	int err = -ENOMEM;
	int i = 0;
	int pos;
	int dummy;

	__skb_push(head_skb, doffset);		//報文移到內層報文的mac頭
	proto = skb_network_protocol(head_skb, &dummy);	//報文協議類型
	if (unlikely(!proto))
		return ERR_PTR(-EINVAL);

	csum = !head_skb->encap_hdr_csum &&
	    !!can_checksum_protocol(features, proto);

	headroom = skb_headroom(head_skb);	//得到報文的headroom大小
	pos = skb_headlen(head_skb);		//報文線性區長度

	do {
		struct sk_buff *nskb;
		skb_frag_t *nskb_frag;
		int hsize;
		int size;

		len = head_skb->len - offset;	//計算報文待拷貝的長度,不包括包頭
		if (len > mss)			
			len = mss;		//len超過mss,則只能拷貝mss長度

		hsize = skb_headlen(head_skb) - offset;	//待拷貝的線性區長度
		if (hsize < 0)
			hsize = 0;
		if (hsize > len || !sg)
			hsize = len;

		if (!hsize && i >= nfrags && skb_headlen(list_skb) &&	//frag_list中還有數據
		    (skb_headlen(list_skb) == len || sg)) {
			BUG_ON(skb_headlen(list_skb) > len);	//frag_list中的skb線性區長度不超過len,即mss值

			i = 0;
			nfrags = skb_shinfo(list_skb)->nr_frags;
			frag = skb_shinfo(list_skb)->frags;
			frag_skb = list_skb;
			pos += skb_headlen(list_skb);	//增加線性區長度

			while (pos < offset + len) {	//只能拷貝len長度
				BUG_ON(i >= nfrags);

				size = skb_frag_size(frag);
				if (pos + size > offset + len)
					break;

				i++;
				pos += size;		//增加frag的長度
				frag++;
			}

			nskb = skb_clone(list_skb, GFP_ATOMIC);	//克隆報文,該報文包含完整的數據,需要裁剪
			list_skb = list_skb->next;

			if (unlikely(!nskb))
				goto err;

			if (unlikely(pskb_trim(nskb, len))) {	//裁剪報文到len長度
				kfree_skb(nskb);
				goto err;
			}

			hsize = skb_end_offset(nskb);
			if (skb_cow_head(nskb, doffset + headroom)) {	//擴展head,以容得下外層報頭
				kfree_skb(nskb);
				goto err;
			}

			nskb->truesize += skb_end_offset(nskb) - hsize;	//truesize值刷新
			skb_release_head_state(nskb);
			__skb_push(nskb, doffset);	//skb移動到內層報文的mac頭
		} else {
			nskb = __alloc_skb(hsize + doffset + headroom,	//skb的frag還未使用完,採用新申請skb的方式
					   GFP_ATOMIC, skb_alloc_rx_flag(head_skb),
					   NUMA_NO_NODE);

			if (unlikely(!nskb))
				goto err;

			skb_reserve(nskb, headroom);	//skb預留headroom長度
			__skb_put(nskb, doffset);	//線性區擴展內層報頭長度
		}

		if (segs)
			tail->next = nskb;
		else
			segs = nskb;
		tail = nskb;

		__copy_skb_header(nskb, head_skb);	//拷貝skb的相關信息,包括header都拷貝了

		skb_headers_offset_update(nskb, skb_headroom(nskb) - headroom);	//刷新header值
		skb_reset_mac_len(nskb);	//重置mac len值

		skb_copy_from_linear_data_offset(head_skb, -tnl_hlen,	//拷貝外兩層報頭(如果封裝的話)
						 nskb->data - tnl_hlen,
						 doffset + tnl_hlen);

		if (nskb->len == len + doffset)		//對於使用frag_list場景,滿足條件;拷貝frag場景不滿足
			goto perform_csum_check;

		if (!sg && !nskb->remcsum_offload) {
			nskb->ip_summed = CHECKSUM_NONE;
			nskb->csum = skb_copy_and_csum_bits(head_skb, offset,	//計算cusm值
							    skb_put(nskb, len),
							    len, 0);
			SKB_GSO_CB(nskb)->csum_start =
			    skb_headroom(nskb) + doffset;
			continue;
		}

		nskb_frag = skb_shinfo(nskb)->frags;

		skb_copy_from_linear_data_offset(head_skb, offset,	//拷貝線性區數據
						 skb_put(nskb, hsize), hsize);

		skb_shinfo(nskb)->tx_flags = skb_shinfo(head_skb)->tx_flags &
			SKBTX_SHARED_FRAG;

		while (pos < offset + len) {	
			if (i >= nfrags) {
				BUG_ON(skb_headlen(list_skb));

				i = 0;
				nfrags = skb_shinfo(list_skb)->nr_frags;
				frag = skb_shinfo(list_skb)->frags;
				frag_skb = list_skb;

				BUG_ON(!nfrags);

				list_skb = list_skb->next;	//frag_list場景,取下一個skb
			}

			if (unlikely(skb_shinfo(nskb)->nr_frags >=
				     MAX_SKB_FRAGS)) {
				net_warn_ratelimited(
					"skb_segment: too many frags: %u %u\n",
					pos, mss);
				goto err;
			}

			if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC)))
				goto err;

			*nskb_frag = *frag;	//frag_list的邏輯和frag的邏輯合併在了一起,增加了複雜度
			__skb_frag_ref(nskb_frag);
			size = skb_frag_size(nskb_frag);

			if (pos < offset) {
				nskb_frag->page_offset += offset - pos;
				skb_frag_size_sub(nskb_frag, offset - pos);  //frag分拆
			}

			skb_shinfo(nskb)->nr_frags++;

			if (pos + size <= offset + len) {
				i++;
				frag++;
				pos += size;
			} else {
				skb_frag_size_sub(nskb_frag, pos + size - (offset + len));	//frag分拆
				goto skip_fraglist;
			}

			nskb_frag++;
		}

skip_fraglist:
		nskb->data_len = len - hsize;
		nskb->len += nskb->data_len;
		nskb->truesize += nskb->data_len;

perform_csum_check:
		if (!csum && !nskb->remcsum_offload) {
			nskb->csum = skb_checksum(nskb, doffset,
						  nskb->len - doffset, 0);	//計算csum值
			nskb->ip_summed = CHECKSUM_NONE;
			SKB_GSO_CB(nskb)->csum_start =
			    skb_headroom(nskb) + doffset;
		}
	} while ((offset += len) < head_skb->len);

	/* Some callers want to get the end of the list.
	 * Put it in segs->prev to avoid walking the list.
	 * (see validate_xmit_skb_list() for example)
	 */
	segs->prev = tail;

	/* Following permits correct backpressure, for protocols
	 * using skb_set_owner_w().
	 * Idea is to tranfert ownership from head_skb to last segment.
	 */
	if (head_skb->destructor == sock_wfree) {
		swap(tail->truesize, head_skb->truesize);
		swap(tail->destructor, head_skb->destructor);
		swap(tail->sk, head_skb->sk);
	}
	return segs;

err:
	kfree_skb_list(segs);
	return ERR_PTR(err);
}


發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章