B-樹實現2

/**
 *  BTree.h
 *
 *  採用網上流傳已久的插入刪除算法——回溯方式
 *  與《算法導論》中的不一樣,但是效率更高
 *  
 *  [email protected]
 *  2012/3/5
 *
 **/

#ifndef BTREE_H
#define BTREE_H

#include <iostream>
#include <vector>
using std::vector;
using std::cout;
using std::endl;

/**
 *  Knuth's definition, a B-tree of order m:
 *   Every node has at most m children.
 *   Every node (except root) has at least ceil(m/2) children.
 *   The root has at least two children if it is not a leaf node.
 *   All leaves appear in the same level, and carry information.
 *   A non-leaf node with k children contains k-1 keys.
 *  
 *  不應該把BTreeNode暴露給用戶
 **/
template< typename FileName, typename DiskAddr, int m >
class BTree;

template< typename FileName, typename DiskAddr, int m >
class BTreeNode
{
public:
	static const int MaxKeyCnt = m-1;
	static const int MinKeyCnt = ((m+1)>>1)-1;

private:
	FileName filename[m];    //多一個槽位,我是先插入再分裂
	DiskAddr fileaddr[m];

	//子樹個數:ceil(m/2)~m
	BTreeNode *child[m+1];
	BTreeNode *parent;

	int keycnt;
	bool isleaf;

public:
	BTreeNode(): keycnt(0), isleaf(false), parent(NULL)
	{
		//子女指針初始化爲0
		memset( child, 0, sizeof(child) );
	};
	~BTreeNode() {};

	bool IsFull() { return ( keycnt == MaxKeyCnt ); }

	FileName const GetKey(int index) {
		if ( index < 0 || index >= 2*t-1 )
			throw index;
		return filename[index];
	}
	FileName const& GetKey(int index) const {
		if ( index < 0 || index >= 2*t-1 )
			throw index;
		return filename[index];
	}

	DiskAddr const GetValue(int index){
		if ( index < 0 || index >= 2*t-1 )
			throw index;
		return fileaddr[index];
	}
	DiskAddr const& GetValue(int index) const{
		if ( index < 0 || index >= 2*t-1 )
			throw index;
		return fileaddr[index];
	}

	friend class BTree<FileName, DiskAddr, m>;
};

//===================================================================================================
template< typename FileName, typename DiskAddr, int m >
class BTree
{
public:
	typedef BTreeNode<FileName, DiskAddr, m> Node;
	typedef Node& RNode;
	typedef Node* PNode;
	typedef Node const * PCNode;

private:
	PNode root;

public:
	BTree(): root(NULL) {}
	~BTree();
	PCNode Search( FileName name, int& index );
	bool Insert( FileName name, DiskAddr addr );
	bool Delete( FileName name );

	//test, output the whole tree
	void PrintTree();
	void PrintTree(PNode pnode);

private:
	PNode Allocate( bool isleaf=false );
	void Clear( PNode pnode );

	PNode SubTreeSearch( PNode pnode, FileName name, int& index );
	bool NodeSearch( PNode pnode, FileName name, int& index );

	int NodeInsert( PNode pnode, FileName name, DiskAddr addr );
	void SplitNode( PNode pnode );

	PNode FindMinNode( PNode pnode );
	PNode FindMaxNode( PNode pnode );
	void Update( PNode pnode );
	void FromLeftSibling( PNode child, PNode parent, int chind );
	void FromRightSibling( PNode child, PNode parent, int chind );
	void MergeChild( PNode pnode, int index );
};

//=======================================================================================
//public:
template< typename FileName, typename DiskAddr, int m >
inline BTree<FileName, DiskAddr, m>::~BTree()
{
	if ( NULL != root )
		Clear(root);
}

/**
 *  如果存在,返回指向常節點的指針,index表示節點中第幾個關鍵字
 *  否則,返回NULL,index置爲-1
 **/
template< typename FileName, typename DiskAddr, int m >
typename BTree<FileName, DiskAddr, m>::PCNode 
	BTree<FileName, DiskAddr, m>::Search(FileName name, int& index)
{
	if ( NULL == root )
	{
		index = -1;
		return NULL;
	}
	PNode p = SubTreeSearch( root, name, index );
	if ( -1 == index )
		p = NULL;
	return p;
}

/**
 *  插入關鍵字僞代碼:
	1、找到應該插入位置的節點,一定是葉子節點,直接插入;
	2、如果該葉子節點關鍵字個數大於m-1;分裂該葉子節點;

	分裂節點僞代碼:
	1、分裂該節點,產生一個新節點;
	2、將中間關鍵字插入父節點中;
	3、如果父節點關鍵字個數大於m-1,遞歸分裂父節點,否則直接返回;

 *  如果name已經存在,返回false
 **/
template< typename FileName, typename DiskAddr, int m >
bool BTree<FileName, DiskAddr, m>::Insert( FileName name, DiskAddr addr )
{
	if ( NULL == root )
	{
		root = Allocate(true);
		root->filename[0] = name;
		root->fileaddr[0] = addr;
		root->keycnt = 1;
	}
	else {
		int index;
		PNode pnode = SubTreeSearch( root, name, index ); //返回的一定是葉子節點
		if ( -1 != index )		//關鍵字存在
			return false;

		NodeInsert( pnode, name, addr );
		if ( pnode->keycnt > Node::MaxKeyCnt )
			SplitNode( pnode );
	}
	return true;
}

/**
 *  刪除關鍵字僞代碼;
    1、查找包含這個關鍵字key的節點node:
	2、如果這個節點是內節點:
		a 找到左子樹中含有最大關鍵字的節點leafnode,及其最大的關鍵字keyx;
	    b 在葉子節點leafnode上刪除關鍵字keyx,並用keyx代替原來將要刪除的關鍵字key
		c 維護leafnode
	   如果這個節點是葉子節點:
	    a 直接刪除這個關鍵字,移動後面的其他關鍵字
		b 維護這個節點

	維護節點僞代碼:
		a 如果關鍵字滿足要求,直接返回;
		b 如果左右兄弟節點有足夠多的關鍵字,向其借一個,返回;
		c 如果左右兄弟節點都沒有足夠的關鍵字,合併一個兄弟節點,回溯維護父節點。
 *
 *  如果name已經存在,返回false
 **/
template< typename FileName, typename DiskAddr, int m >
bool BTree<FileName, DiskAddr, m>::Delete( FileName name )
{
	if ( NULL == root )
		return false;
	
	int index;
	PNode pnode, leafnode;
	pnode = SubTreeSearch( root, name, index );
	if ( -1 == index )
		return false;
	
	if ( false == pnode->isleaf )
	{
		leafnode = FindMinNode( pnode->child[index+1] );
		pnode->filename[index] = leafnode->filename[0];
		pnode->fileaddr[index] = leafnode->fileaddr[0];
		index = 0;
	}
	else {
		leafnode = pnode;
	}

	for ( int i = index; i < leafnode->keycnt-1; ++ i )
	{
		leafnode->filename[i] = leafnode->filename[i+1];
		leafnode->fileaddr[i] = leafnode->fileaddr[i+1];
	}
	-- leafnode->keycnt;

	if ( leafnode->keycnt < Node::MinKeyCnt )
		Update( leafnode );
	return true;
}

//just for test
template< typename FileName, typename DiskAddr, int m >
void BTree<FileName, DiskAddr, m>::PrintTree()
{
	if ( NULL == root )
		return;
	
	vector<PNode> one, two, *tmp, *tmp2, *xx;
	vector<PNode>::iterator it;
	PNode pnode;
	int level = 0, j;

	one.push_back( root );
	tmp = &one;
	tmp2 = &two;
	while ( !tmp->empty() )
	{
		cout << "level: " << level << " ";
		for ( it = tmp->begin(); it != tmp->end(); ++ it )
			PrintTree( *it );
		cout << endl;

		it = tmp->begin();
		if ( false == (*it)->isleaf )
		{
			for ( it = tmp->begin(); it != tmp->end(); ++ it )
			{
				pnode = *it;
				for ( j = 0; j < pnode->keycnt+1; ++ j )
					tmp2->push_back( pnode->child[j] );
			}
		}
		tmp->clear();
		++ level;
	
		xx = tmp;
		tmp = tmp2;
		tmp2 = xx;
	}
}
template< typename FileName, typename DiskAddr, int m >
void BTree<FileName, DiskAddr, m>::PrintTree(PNode pnode)
{
	if ( NULL == pnode )
		return;
	cout << "[ ";
	for ( int i = 0; i < pnode->keycnt; ++ i )
		cout << pnode->filename[i] << "(" <<pnode->fileaddr[i] << ") ";
	cout << "] ";
}

//=======================================================================================
//private:
template< typename FileName, typename DiskAddr, int m >
typename BTree<FileName, DiskAddr, m>::PNode BTree<FileName, DiskAddr, m>::Allocate( bool isleaf )
{
	PNode pnode = new Node();
	pnode->isleaf = isleaf;
	return pnode;
}

template< typename FileName, typename DiskAddr, int m >
void BTree<FileName, DiskAddr, m>::Clear(PNode pnode)
{
	for ( int i = 0; i < pnode->keycnt; ++ i )
	{
		if ( NULL != pnode->child[i] )
			Clear( pnode->child[i] );
	}
}

/**
 *  在以pnode爲根節點的子樹中查找指定的key值
 *  返回指向節點的指針,index表示節點中第幾個關鍵字
 *  若不存在,index置爲-1,但是返回相應的節點(爲了Insert操作)
 **/
template< typename FileName, typename DiskAddr, int m >
typename BTree<FileName, DiskAddr, m>::PNode 
	BTree<FileName, DiskAddr, m>::SubTreeSearch( PNode pnode, FileName name, int& index )
{
	if ( NULL == pnode )
	{
		index = -1;
		return NULL;
	}

	int keyind;
	PNode presult = pnode;

	if ( true == NodeSearch( pnode, name, keyind ) )
	{
		index = keyind;
	}
	else {
		//那麼keyind就是子節點的索引
		if ( true == pnode->isleaf )
			index = -1;
		else
			presult = SubTreeSearch( pnode->child[keyind], name, index );
	}
	return presult;
}

/**
 *  在節點內二分搜索
 *  如果相等,返回true, index爲關鍵字的索引號;否則,返回false,index爲子女指針序號
 **/
template< typename FileName, typename DiskAddr, int m >
bool BTree<FileName, DiskAddr, m>::NodeSearch( PNode pnode, FileName name, int& index )
{
	int i, j, mid;
	i = 0;
	j = pnode->keycnt-1;
	while ( i <= j )
	{
		mid = (i+j)>>1;
		//FileName類型最低要求:operator <
		if ( pnode->filename[mid] < name )
			i = mid+1;
		else if ( name < pnode->filename[mid] )
			j = mid-1;
		else {
			index = mid;
			return true;
		}
	}
	index = i;
	return false;
}

/**
 *  直接在節點中插入關鍵字,不一定是葉子節點
 *  返回插入的位置
 **/
template< typename FileName, typename DiskAddr, int m >
int BTree<FileName, DiskAddr, m>::NodeInsert( PNode pnode, FileName name, DiskAddr addr )
{
	int i, position;

	NodeSearch( pnode, name, position );
	
	for ( i = pnode->keycnt; i > position; -- i )
	{
		pnode->filename[i] = pnode->filename[i-1];
		pnode->fileaddr[i] = pnode->fileaddr[i-1];
		pnode->child[i+1] = pnode->child[i];
	}
	pnode->child[ position+1 ] = pnode->child[ position ];   //如果pnode是新建的空root,不會有影響
    
	pnode->filename[position] = name;
	pnode->fileaddr[position] = addr;
	pnode->child[position] = NULL;
	++ pnode->keycnt;

	return position;
}

/**
 *  分裂pnode節點,主要當pnode爲root
 *
 **/
template< typename FileName, typename DiskAddr, int m >
void BTree<FileName, DiskAddr, m>::SplitNode( PNode pnode )
{
	PNode parent, pnew;
	int index, lcnt, rcnt, i;

	pnew = Allocate( pnode->isleaf );
	lcnt = (Node::MaxKeyCnt)>>1;
	rcnt = Node::MaxKeyCnt-lcnt;

	//直接使用memcpy更快,但是如果FileName、DiskAddr有深複製?
	for ( i = 0; i < rcnt; ++ i )
	{
		pnew->filename[i] = pnode->filename[lcnt+i+1];
		pnew->fileaddr[i] = pnode->fileaddr[lcnt+i+1];
	}
	if ( false == pnode->isleaf )
	{
		memcpy( (void*)pnew->child, (void*)(pnode->child+lcnt+1), sizeof(void*)*(rcnt+1) );
		for ( i = 0; i < rcnt+1; ++ i )
			pnew->child[i]->parent = pnew;
	}
	pnew->keycnt = rcnt;
	pnode->keycnt = lcnt;

	//關鍵字上移
	if ( NULL == pnode->parent )     //pnode爲root
	{
		root = pnode->parent = Allocate();
	}
	parent = pnode->parent;
	index = NodeInsert( parent, pnode->filename[lcnt], pnode->fileaddr[lcnt] );
	
	parent = pnode->parent;
	parent->child[index] = pnode;
	parent->child[index+1] = pnew;
	pnew->parent = parent;

	if ( parent->keycnt > Node::MaxKeyCnt )
		SplitNode( parent );
}

/**
 *  尋找以pnode爲根節點的子樹中含最小關鍵字的節點
 *
 **/
template< typename FileName, typename DiskAddr, int m >
inline typename BTree<FileName, DiskAddr, m>::PNode 
	BTree<FileName, DiskAddr, m>::FindMinNode( PNode pnode )
{
	PNode presult;
	presult = pnode;
	while ( false == presult->isleaf )
		presult = presult->child[0];

	return presult;
}

/**
 *  尋找以pnode爲根節點的子樹中含最大關鍵字的節點
 *
 **/
template< typename FileName, typename DiskAddr, int m >
inline typename BTree<FileName, DiskAddr, m>::PNode 
	BTree<FileName, DiskAddr, m>::FindMaxNode( PNode pnode )
{
	PNode presult;
	presult = pnode;
	while ( false == presult->isleaf )
		presult = presult->child[ presult->keycnt ];

	return presult;
}

/**
 *  更新pnode,使得滿足B-tree的定義
 *
 **/
template< typename FileName, typename DiskAddr, int m >
void BTree<FileName, DiskAddr, m>::Update( PNode pnode )
{
	if ( pnode->keycnt >= Node::MinKeyCnt || ( pnode == root && pnode->keycnt > 0 ) )
		return;
	if ( pnode == root && 0 == pnode->keycnt )
	{
		root = root->child[0];
		if ( NULL != root )
		{
			delete root->parent;
			root->parent = NULL;
		}
		return;
	}

	PNode parent;
	int index;

	parent = pnode->parent;
	//如果pnode->keycnt == 0, m = 3;很特殊的一種情況。
	if ( pnode->keycnt > 0 )
		NodeSearch( parent, pnode->filename[0], index );
	else{
		for ( index = 0; index < parent->keycnt+1; ++ index )
			if ( parent->child[index] == pnode )
				break;
	}

	if ( index > 0 && parent->child[index-1]->keycnt > Node::MinKeyCnt )
	{
		FromLeftSibling( pnode, parent, index );
	}
	else if ( index < parent->keycnt && parent->child[index+1]->keycnt > Node::MinKeyCnt )
	{
		FromRightSibling( pnode, parent, index );
	}
	else{		//合併
		if ( index == parent->keycnt )    //pnode是parent的最後一個子女節點
			-- index;
		MergeChild( parent, index );
		if ( parent->keycnt < Node::MinKeyCnt )
			Update( parent );
	}
}

/*
 *  leftsibling節點最右邊的關鍵字 --> parent節點的第chind-1個關鍵字 --> child節點最左邊位置
 */
template< typename FileName, typename DiskAddr, int t >
void BTree<FileName, DiskAddr, t>::FromLeftSibling( PNode child, PNode parent, int chind )
{
	int i;
	PNode left;
	//child向後移動一個位置
	for ( i = child->keycnt; i > 0; -- i )
	{
		child->filename[i] = child->filename[i-1];
		child->fileaddr[i] = child->fileaddr[i-1];
		child->child[i+1] = child->child[i];
	}
	child->child[1] = child->child[0];
	left = parent->child[chind-1];
	child->child[0] = left->child[ left->keycnt ];
	if ( false == child->isleaf )
	{
		child->child[0]->parent = child;
	}

	//parent關鍵字移到child中
	child->filename[0] = parent->filename[chind-1];
	child->fileaddr[0] = parent->fileaddr[chind-1];
	++ child->keycnt;

	//left關鍵字移到parent中
	-- left->keycnt;
	parent->filename[chind-1] = left->filename[ left->keycnt ];
	parent->fileaddr[chind-1] = left->fileaddr[ left->keycnt ];
}

/*
 *  rightsibling節點最左邊的關鍵字 --> parent節點的第chind個關鍵字 --> child節點最右邊位置
 */
template< typename FileName, typename DiskAddr, int t >
void BTree<FileName, DiskAddr, t>::FromRightSibling( PNode child, PNode parent, int chind )
{
	int i;
	PNode right;

	right = parent->child[chind+1];
	//
	child->filename[ child->keycnt ] = parent->filename[ chind ];
	child->fileaddr[ child->keycnt ] = parent->fileaddr[ chind ];
	child->child[ child->keycnt+1 ] = right->child[0];
	if ( false == child->isleaf )
	{
		child->child[ child->keycnt+1 ]->parent = child;
	}
	++ child->keycnt;

	//right第一個關鍵字移到parent
	parent->filename[ chind ] = right->filename[0];
	parent->fileaddr[ chind ] = right->fileaddr[0];

	//right向前移動一個位置
	-- right->keycnt;
	for ( i = 0; i < right->keycnt; ++ i )
	{
		right->filename[i] = right->filename[i+1];
		right->fileaddr[i] = right->fileaddr[i+1];
		right->child[i] = right->child[i+1];
	}
	right->child[i] = right->child[i+1];
}

/*
 *  將pnode->child[i]、pnode->child[i+1]和pnode->filename[i]合併
 */
template< typename FileName, typename DiskAddr, int t >
void BTree<FileName, DiskAddr, t>::MergeChild( PNode pnode, int keyind )
{
	if ( NULL == pnode )
		return;

	PNode left, right;
	int i;
	left = pnode->child[keyind];
	right = pnode->child[keyind+1];

	//merging left and right node!
	for ( i = 0; i < right->keycnt; ++ i )
	{
		//留一個位置給關鍵字下移
		left->filename[left->keycnt+i+1] = right->filename[i];
		left->fileaddr[left->keycnt+i+1] = right->fileaddr[i];
	}
	if ( false == left->isleaf )
	{
		memcpy( left->child+left->keycnt+1, right->child, sizeof(PNode)*(right->keycnt+1) );
		if ( false == left->isleaf )
		{
			for ( i = 0; i < right->keycnt+1; ++ i )
				left->child[ left->keycnt + 1 + i ]->parent = left;
		}
	}
	
	//關鍵字下移
	left->filename[ left->keycnt ] = pnode->filename[ keyind ];
	left->fileaddr[ left->keycnt ] = pnode->fileaddr[ keyind ];
	left->keycnt += right->keycnt + 1;
	delete right;
	right = NULL;

	for ( i = keyind; i < pnode->keycnt-1; ++ i )
	{
		pnode->filename[i] = pnode->filename[i+1];
		pnode->fileaddr[i] = pnode->fileaddr[i+1];

		pnode->child[i+1] = pnode->child[i+2];
	}
	-- pnode->keycnt;
}
//=======================================================================================

#endif

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章