對象釋放函數kmem_cache_free核心函數slab_free的實現詳解

1.kmem_cache_free函數

void kmem_cache_free(struct kmem_cache *s, void *x)//X是要釋放的對象
{
	s = cache_from_obj(s, x);//virt_to_head_page通過對象x找到該對象所在的slab的首個page，再通過page找到所屬的slab緩存
	if (!s)
		return;
	slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_);//virt_to_head_page通過對象x找到該對象所在的slab的首個page
	trace_kmem_cache_free(_RET_IP_, x);
}
EXPORT_SYMBOL(kmem_cache_free);

virt_to_head_page函數：

static inline struct page *virt_to_head_page(const void *x)
{
	struct page *page = virt_to_page(x);//把內核中的虛擬地址轉化爲struct page
	//判斷該page是不是pagetail,是的話就返回page->first_page,否則直接返回page
	return compound_head(page); 
}

compound_head函數：

static inline struct page *compound_head(struct page *page)
{
	if (unlikely(PageTail(page)))
		return page->first_page;//如果一個slab含有多個連續的物理頁，那麼除了第一個頁之外的所有頁都是tailpage,並且其struct page中都有變量first_page指向第一個page
	return page;
}

2.slab_free函數

/*
 * Fastpath with forced inlining to produce a kfree and kmem_cache_free that
 * can perform fastpath freeing without additional function calls.
 *
 * The fastpath is only possible if we are freeing to the current cpu slab
 * of this processor. This typically the case if we have just allocated
 * the item before.
 *
 * If fastpath is not possible then fall back to __slab_free where we deal
 * with all sorts of special processing.
 *
 * Bulk free of a freelist with several objects (all pointing to the
 * same page) possible by specifying head and tail ptr, plus objects
 * count (cnt). Bulk free indicated by tail pointer being set.
 */
static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
				      void *head, void *tail, int cnt,
				      unsigned long addr)
{
	void *tail_obj = tail ? : head;
	struct kmem_cache_cpu *c;
	unsigned long tid;

	slab_free_freelist_hook(s, head, tail);//和debug相關

redo:
	/*
	 * Determine the currently cpus per cpu slab.
	 * The cpu may change afterward. However that does not matter since
	 * data is retrieved via this pointer. If we are on the same cpu
	 * during the cmpxchg then the free will succedd.
	 */
	preempt_disable();
	c = __this_cpu_ptr(s->cpu_slab);//獲得當前的kmem_cache_cpu

	tid = c->tid;//當前kmem_cache_cpu中的transcation id
	preempt_enable();
	//最簡單的釋放情形1：此對象x的slab正好處於當前cpu的緩存中
	if (likely(page == c->page)) {//如果相等則表示當前slub被當前cpu cache了
		set_freepointer(s, tail_obj, c->freelist);//相當於把object->next=c->freelist

		if (unlikely(!this_cpu_cmpxchg_double(//將當前cpu的下個空閒對象設置爲釋放的對象x
				s->cpu_slab->freelist, s->cpu_slab->tid,
				c->freelist, tid,
				head, next_tid(tid)))) {

			note_cmpxchg_failure("slab_free", s, tid);
			goto redo;
		}
		stat(s, FREE_FASTPATH);
	} else//不在當前cpu緩存中，則需要釋放到當前cpu的部分空slab中或者node的部分空slab中
		__slab_free(s, page, head, tail_obj, cnt, addr);

}

3.__slab_free函數

/*
 * Slow patch handling. This may still be called frequently since objects
 * have a longer lifetime than the cpu slabs in most processing loads.
 *
 * So we still attempt to reduce cache line usage. Just take the slab
 * lock and free the item. If there is no additional partial page
 * handling required then we can return immediately.
 */
static void __slab_free(struct kmem_cache *s, struct page *page,
			void *head, void *tail, int cnt,
			unsigned long addr)

{
	void *prior;
	int was_frozen;
	struct page new;
	unsigned long counters;
	struct kmem_cache_node *n = NULL;
	unsigned long uninitialized_var(flags);

	stat(s, FREE_SLOWPATH);

	if (kmem_cache_debug(s) &&
	    !(n = free_debug_processing(s, page, head, tail, cnt,
					addr, &flags)))
		return;

	do {
		if (unlikely(n)) {
			spin_unlock_irqrestore(&n->list_lock, flags);
			n = NULL;
		}
		prior = page->freelist;//獲取釋放對象所在slab的第一個空閒對象
		counters = page->counters;
		set_freepointer(s, tail, prior);
		// 此處爲什麼不直接new=*page,是由於page這個結構體比較大，直接整個結構體賦值肯定比賦值counter和freelist加起來爲double word的開銷大。
		new.counters = counters; //counters在struct page中是在union中， 所以counters一賦值給new,就相當於把inuse,object,frozen都賦值給new了
		was_frozen = new.frozen;//frozen==1表示該slab被某個cpu cache住了
		new.inuse -= cnt;//inuse表示該slab中已分配出去的對象個數
/* 1）!new.inuse || !prior爲真的情況是原來slab只有一個對象被分配出去，那現在這個對象被釋放則slab中所有的object都是free的，或者原來slab->freelist爲NULL，說明原來slab在kmem_cache_node的full鏈裏。
   2）此時，如果!was_frozen也爲真（即沒有cpu cache住這個slab）,那麼就需要去操作這個kmem_cache_node的partial或full鏈，則執行get_node並對node加鎖
   3）如果此時!was_frozen爲假（即有cpu cache住這個slab,此處有可能是我本身這個cpu,因爲從slab_free開始執行操作並沒有對page加鎖，在slab_free函數中先判斷page==c->page只是減少這個可能性而已），那就直接把object加到page->freelist中就可以了。
   4）如果!new.inuse || !prior爲假，那就不用去操作kmem_cache_node中的鏈，就直接把object加到page->freelist中就可以了。*/
		if ((!new.inuse || !prior) && !was_frozen) {

			if (!kmem_cache_debug(s) && !prior)

				/*
				 * Slab was on no list before and will be partially empty
				 * We can defer the list move and instead freeze it.
				 */
				new.frozen = 1;

			else { /* Needs to be taken off a list */

	                        n = get_node(s, page_to_nid(page));
				/*
				 * Speculatively acquire the list_lock.
				 * If the cmpxchg does not succeed then we may
				 * drop the list_lock without any processing.
				 *
				 * Otherwise the list_lock will synchronize with
				 * other processors updating the list of slabs.
				 */
				spin_lock_irqsave(&n->list_lock, flags);

			}
		}

	} while (!cmpxchg_double_slab(s, page,
		prior, counters,
		head, new.counters,
		"__slab_free"));

	if (likely(!n)) {

		/*
		 * If we just froze the page then put it onto the
		 * per cpu partial list.
		 */
		if (new.frozen && !was_frozen) {
			put_cpu_partial(s, page, 1);
			stat(s, CPU_PARTIAL_FREE);
		}
		/*
		 * The list lock was not taken therefore no list
		 * activity can be necessary.
		 */
                if (was_frozen)
                        stat(s, FREE_FROZEN);
                return;
        }
/*如果釋放掉這個對象後，slab中的object均爲空閒的，那麼就要考慮知否釋放掉這個slab，因爲kmem_cache_node中沒有free鏈只有partial和full鏈。
最終是否真正釋放的條件是當前node的partial鏈中的slab個數比最小閾值大，那麼就goto slab_empty./*
	if (unlikely(!new.inuse && n->nr_partial > s->min_partial))
		goto slab_empty;

	/*
	 * Objects left in the slab. If it was not on the partial list before
	 * then add it.
	 */
//如果prior爲NULL,則表示在釋放之前該slab沒有空閒的object了，即在full 鏈中
	if (kmem_cache_debug(s) && unlikely(!prior)) {
		remove_full(s, page);
		add_partial(n, page, DEACTIVATE_TO_TAIL);
		stat(s, FREE_ADD_PARTIAL);
	}
	spin_unlock_irqrestore(&n->list_lock, flags);//對kmem_cache_node的操作結束，則釋放node鎖
	return;

slab_empty:
	if (prior) {
		/*
		 * Slab on the partial list.
		 */
		remove_partial(n, page);
		stat(s, FREE_REMOVE_PARTIAL);
	} else
		/* Slab must be on the full list */
	/*這種情況下就是原來那個slab只有一個object，那這個object被分配出去後就在full鏈裏，現在要釋放這個object就得把這個slab從full鏈上摘除*/
		remove_full(s, page);

	spin_unlock_irqrestore(&n->list_lock, flags);
	stat(s, FREE_SLAB);
	discard_slab(s, page);
}

discard_slab函數：

static void discard_slab(struct kmem_cache *s, struct page *page)
{
	//減少對應kmem_cache_node上的slab和total_object個數
	dec_slabs_node(s, page_to_nid(page), page->objects);
	free_slab(s, page);
}

dec_slabs_node函數：

static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
{
	struct kmem_cache_node *n = get_node(s, node);
	atomic_long_dec(&n->nr_slabs);
	atomic_long_sub(objects, &n->total_objects);
}

free_slab函數：

static void free_slab(struct kmem_cache *s, struct page *page)
{
	if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {
		struct rcu_head *head;

		if (need_reserve_slab_rcu) {
			int order = compound_order(page);
			int offset = (PAGE_SIZE << order) - s->reserved;
			VM_BUG_ON(s->reserved != sizeof(*head));
			head = page_address(page) + offset;
		} else {
			head = (void *)&page->lru;
		}
		call_rcu(head, rcu_free_slab);
	} else
		__free_slab(s, page);
}

__free_slab函數：

static void __free_slab(struct kmem_cache *s, struct page *page)
{
	int order = compound_order(page);//從該slab的首個page結構體中獲得該slab的頁的個數的order信息
	int pages = 1 << order;//獲得頁個數

	if (kmem_cache_debug(s)) {
		void *p;
		slab_pad_check(s, page);
		for_each_object(p, s, page_address(page),
						page->objects)
			check_object(s, page, p, SLUB_RED_INACTIVE);
	}

	kmemcheck_free_shadow(page, compound_order(page));
	mod_zone_page_state(page_zone(page),
		(s->flags & SLAB_RECLAIM_ACCOUNT) ?
		NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
		-pages);

	__ClearPageSlab(page);
	reset_page_mapcount(page);
	if (current->reclaim_state)
		current->reclaim_state->reclaimed_slab += pages;
	__free_pages(page, order);//把物理頁面真正釋放掉
}

對象釋放函數kmem_cache_free核心函數slab_free的實現詳解

網絡丟包排查思路

tcpdump和wireshark抓包方法介紹

網路收包流程-網橋的處理流程（br網橋）（四）

perf-應用程序的調優與使用

網絡收包流程-報文從網卡驅動到網絡層（或者網橋)的流程（非NAPI、NAPI）(一)

Mac下配置sublime實現LaTeX

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結