1.kmem_cache_free函數
void kmem_cache_free(struct kmem_cache *s, void *x)//X是要釋放的對象
{
s = cache_from_obj(s, x);//virt_to_head_page通過對象x找到該對象所在的slab的首個page,再通過page找到所屬的slab緩存
if (!s)
return;
slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_);//virt_to_head_page通過對象x找到該對象所在的slab的首個page
trace_kmem_cache_free(_RET_IP_, x);
}
EXPORT_SYMBOL(kmem_cache_free);
virt_to_head_page函數:
static inline struct page *virt_to_head_page(const void *x)
{
struct page *page = virt_to_page(x);//把內核中的虛擬地址轉化爲struct page
//判斷該page是不是pagetail,是的話就返回page->first_page,否則直接返回page
return compound_head(page);
}
compound_head函數:
static inline struct page *compound_head(struct page *page)
{
if (unlikely(PageTail(page)))
return page->first_page;//如果一個slab含有多個連續的物理頁,那麼除了第一個頁之外的所有頁都是tailpage,並且其struct page中都有變量first_page指向第一個page
return page;
}
2.slab_free函數
/*
* Fastpath with forced inlining to produce a kfree and kmem_cache_free that
* can perform fastpath freeing without additional function calls.
*
* The fastpath is only possible if we are freeing to the current cpu slab
* of this processor. This typically the case if we have just allocated
* the item before.
*
* If fastpath is not possible then fall back to __slab_free where we deal
* with all sorts of special processing.
*
* Bulk free of a freelist with several objects (all pointing to the
* same page) possible by specifying head and tail ptr, plus objects
* count (cnt). Bulk free indicated by tail pointer being set.
*/
static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
void *head, void *tail, int cnt,
unsigned long addr)
{
void *tail_obj = tail ? : head;
struct kmem_cache_cpu *c;
unsigned long tid;
slab_free_freelist_hook(s, head, tail);//和debug相關
redo:
/*
* Determine the currently cpus per cpu slab.
* The cpu may change afterward. However that does not matter since
* data is retrieved via this pointer. If we are on the same cpu
* during the cmpxchg then the free will succedd.
*/
preempt_disable();
c = __this_cpu_ptr(s->cpu_slab);//獲得當前的kmem_cache_cpu
tid = c->tid;//當前kmem_cache_cpu中的transcation id
preempt_enable();
//最簡單的釋放情形1:此對象x的slab正好處於當前cpu的緩存中
if (likely(page == c->page)) {//如果相等則表示當前slub被當前cpu cache了
set_freepointer(s, tail_obj, c->freelist);//相當於把object->next=c->freelist
if (unlikely(!this_cpu_cmpxchg_double(//將當前cpu的下個空閒對象設置爲釋放的對象x
s->cpu_slab->freelist, s->cpu_slab->tid,
c->freelist, tid,
head, next_tid(tid)))) {
note_cmpxchg_failure("slab_free", s, tid);
goto redo;
}
stat(s, FREE_FASTPATH);
} else//不在當前cpu緩存中,則需要釋放到當前cpu的部分空slab中或者node的部分空slab中
__slab_free(s, page, head, tail_obj, cnt, addr);
}
3.__slab_free函數
/*
* Slow patch handling. This may still be called frequently since objects
* have a longer lifetime than the cpu slabs in most processing loads.
*
* So we still attempt to reduce cache line usage. Just take the slab
* lock and free the item. If there is no additional partial page
* handling required then we can return immediately.
*/
static void __slab_free(struct kmem_cache *s, struct page *page,
void *head, void *tail, int cnt,
unsigned long addr)
{
void *prior;
int was_frozen;
struct page new;
unsigned long counters;
struct kmem_cache_node *n = NULL;
unsigned long uninitialized_var(flags);
stat(s, FREE_SLOWPATH);
if (kmem_cache_debug(s) &&
!(n = free_debug_processing(s, page, head, tail, cnt,
addr, &flags)))
return;
do {
if (unlikely(n)) {
spin_unlock_irqrestore(&n->list_lock, flags);
n = NULL;
}
prior = page->freelist;//獲取釋放對象所在slab的第一個空閒對象
counters = page->counters;
set_freepointer(s, tail, prior);
// 此處爲什麼不直接new=*page,是由於page這個結構體比較大,直接整個結構體賦值肯定比賦值counter和freelist加起來爲double word的開銷大。
new.counters = counters; //counters在struct page中是在union中, 所以counters一賦值給new,就相當於把inuse,object,frozen都賦值給new了
was_frozen = new.frozen;//frozen==1表示該slab被某個cpu cache住了
new.inuse -= cnt;//inuse表示該slab中已分配出去的對象個數
/* 1)!new.inuse || !prior爲真的情況是原來slab只有一個對象被分配出去,那現在這個對象被釋放則slab中所有的object都是free的,或者原來slab->freelist爲NULL,說明原來slab在kmem_cache_node的full鏈裏。
2)此時,如果!was_frozen也爲真(即沒有cpu cache住這個slab),那麼就需要去操作這個kmem_cache_node的partial或full鏈,則執行get_node並對node加鎖
3)如果此時!was_frozen爲假(即有cpu cache住這個slab,此處有可能是我本身這個cpu,因爲從slab_free開始執行操作並沒有對page加鎖,在slab_free函數中先判斷page==c->page只是減少這個可能性而已),那就直接把object加到page->freelist中就可以了。
4)如果!new.inuse || !prior爲假,那就不用去操作kmem_cache_node中的鏈,就直接把object加到page->freelist中就可以了。*/
if ((!new.inuse || !prior) && !was_frozen) {
if (!kmem_cache_debug(s) && !prior)
/*
* Slab was on no list before and will be partially empty
* We can defer the list move and instead freeze it.
*/
new.frozen = 1;
else { /* Needs to be taken off a list */
n = get_node(s, page_to_nid(page));
/*
* Speculatively acquire the list_lock.
* If the cmpxchg does not succeed then we may
* drop the list_lock without any processing.
*
* Otherwise the list_lock will synchronize with
* other processors updating the list of slabs.
*/
spin_lock_irqsave(&n->list_lock, flags);
}
}
} while (!cmpxchg_double_slab(s, page,
prior, counters,
head, new.counters,
"__slab_free"));
if (likely(!n)) {
/*
* If we just froze the page then put it onto the
* per cpu partial list.
*/
if (new.frozen && !was_frozen) {
put_cpu_partial(s, page, 1);
stat(s, CPU_PARTIAL_FREE);
}
/*
* The list lock was not taken therefore no list
* activity can be necessary.
*/
if (was_frozen)
stat(s, FREE_FROZEN);
return;
}
/*如果釋放掉這個對象後,slab中的object均爲空閒的,那麼就要考慮知否釋放掉這個slab,因爲kmem_cache_node中沒有free鏈只有partial和full鏈。
最終是否真正釋放的條件是當前node的partial鏈中的slab個數比最小閾值大,那麼就goto slab_empty./*
if (unlikely(!new.inuse && n->nr_partial > s->min_partial))
goto slab_empty;
/*
* Objects left in the slab. If it was not on the partial list before
* then add it.
*/
//如果prior爲NULL,則表示在釋放之前該slab沒有空閒的object了,即在full 鏈中
if (kmem_cache_debug(s) && unlikely(!prior)) {
remove_full(s, page);
add_partial(n, page, DEACTIVATE_TO_TAIL);
stat(s, FREE_ADD_PARTIAL);
}
spin_unlock_irqrestore(&n->list_lock, flags);//對kmem_cache_node的操作結束,則釋放node鎖
return;
slab_empty:
if (prior) {
/*
* Slab on the partial list.
*/
remove_partial(n, page);
stat(s, FREE_REMOVE_PARTIAL);
} else
/* Slab must be on the full list */
/*這種情況下就是原來那個slab只有一個object,那這個object被分配出去後就在full鏈裏,現在要釋放這個object就得把這個slab從full鏈上摘除*/
remove_full(s, page);
spin_unlock_irqrestore(&n->list_lock, flags);
stat(s, FREE_SLAB);
discard_slab(s, page);
}
discard_slab函數:
static void discard_slab(struct kmem_cache *s, struct page *page)
{
//減少對應kmem_cache_node上的slab和total_object個數
dec_slabs_node(s, page_to_nid(page), page->objects);
free_slab(s, page);
}
dec_slabs_node函數:
static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
{
struct kmem_cache_node *n = get_node(s, node);
atomic_long_dec(&n->nr_slabs);
atomic_long_sub(objects, &n->total_objects);
}
free_slab函數:
static void free_slab(struct kmem_cache *s, struct page *page)
{
if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {
struct rcu_head *head;
if (need_reserve_slab_rcu) {
int order = compound_order(page);
int offset = (PAGE_SIZE << order) - s->reserved;
VM_BUG_ON(s->reserved != sizeof(*head));
head = page_address(page) + offset;
} else {
head = (void *)&page->lru;
}
call_rcu(head, rcu_free_slab);
} else
__free_slab(s, page);
}
__free_slab函數:
static void __free_slab(struct kmem_cache *s, struct page *page)
{
int order = compound_order(page);//從該slab的首個page結構體中獲得該slab的頁的個數的order信息
int pages = 1 << order;//獲得頁個數
if (kmem_cache_debug(s)) {
void *p;
slab_pad_check(s, page);
for_each_object(p, s, page_address(page),
page->objects)
check_object(s, page, p, SLUB_RED_INACTIVE);
}
kmemcheck_free_shadow(page, compound_order(page));
mod_zone_page_state(page_zone(page),
(s->flags & SLAB_RECLAIM_ACCOUNT) ?
NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
-pages);
__ClearPageSlab(page);
reset_page_mapcount(page);
if (current->reclaim_state)
current->reclaim_state->reclaimed_slab += pages;
__free_pages(page, order);//把物理頁面真正釋放掉
}