在準備開題的時候查閱碩士論文,發現了一個算法SCAN(Structural Clustering Algorithm for Networks) ,時間複雜度很好,理論上是線性的,在實際結果(論文中的結果,我寫了程序還沒有做實驗)中也是線性的。劃分結果粗略看來也還不錯。
SNAP,全稱Standford Network Analysis Project,是斯坦福大學提供的一個功能非常強大的開源工具。SNAP官網
僞代碼 SCAN論文及我的代碼實現不好意思,因爲我也經常下資源,所以下載資源需要1分。
核心的代碼貼到下面,沒有分數的童鞋可以自己調整一下,應該難度不大:
//記錄當前的類標號
int CurClusterNum = 0;
/*
*對於節點遍歷
*定義一下節點值的含義:
*0代表爲分類;大於0的代表類的標號;-1代表 non-member; -2代表隊hub; -3代表outlier
*/
for (TNodeEDatNet<TInt, TInt>::TNodeI NI = net->BegNI(); NI < net->EndNI(); NI++) {
if (net->GetNDat(NI.GetId()) <= 0) {
TIntV community;
//Q中存儲的是節點id
std::vector<int> Q;
if (staticScan::IsCore(net, NI, u, e)) {
CurClusterNum++;
net->SetNDat(NI.GetId(), CurClusterNum);
community.Add(NI.GetId());
for (int i = 0; i < NI.GetDeg(); i++)
{
Q.push_back(NI.GetNbrNId(i));
}
while (Q.size() != 0) {
//取出Q列表中的最後一個元素
TNodeEDatNet<TInt, TInt>::TNodeI NodeI = net->GetNI(Q[Q.size() - 1]);
Q.pop_back();
if (IsCore(net, NodeI, u, e)) {
//遍歷R
for (int j = 0; j < NodeI.GetDeg(); j++)
{
int Nid = NodeI.GetNbrNId(j);
if (net->GetNI(Nid).GetDat() <= 0) {
net->SetNDat(Nid, CurClusterNum);
community.Add(Nid);
} else if (net->GetNI(Nid).GetDat() == 0) {
Q.push_back(Nid);
}
}
} else {
net->SetNDat(NodeI.GetId(), -1);
}
}
Communities.Add(community);
} else {
//不是核心節點標記爲non-member
net->SetNDat(NI.GetId(), -1);
}
}
}
TIntV Hub, Outlier;
//對於non-member節點進行判斷,區別出hub和outlier
for (TNodeEDatNet<TInt, TInt>::TNodeI NI = net->BegNI(); NI < net->EndNI(); NI++) {
if (NI.GetDat() == -1) {
int flag = -1;
for (int i = 0; i < NI.GetDeg(); i++) {
if (NI.GetNbrNDat(i) != flag && flag > 0 && NI.GetNbrNDat(i) > 0) {
flag = -2;
}
flag = NI.GetNbrNDat(i);
}
if (flag == -2)
{
net->SetNDat(NI.GetId(), -2);
Hub.Add(NI.GetId());
} else {
net->SetNDat(NI.GetId(), -3);
Outlier.Add(NI.GetId());
}
}
}
Communities.Add(Hub);
Communities.Add(Outlier);
//顯示聚類結果
for (int i = 0; i < Communities.Len(); i++) {
for (int j = 0; j < Communities[i].Len(); j++) {
std::cout<<Communities[i][j]<<"\t";
}
std::cout<<std::endl;
}
double staticScan::ComputeSim(TNodeEDatNet<TInt, TInt>::TNodeI NodeI,
TNodeEDatNet<TInt, TInt>::TNodeI NodeJ) {
int ni = NodeI.GetDeg() + 1;
int nj = NodeJ.GetDeg() + 1;
int count = 0;
for (int i = 0; i < NodeI.GetDeg(); i++)
{
for (int j = 0; j < NodeJ.GetDeg(); j++)
{
if (NodeI.GetNbrNId(i) == NodeJ.GetNbrNId(j))
{
count++;
}
}
}
return (2 + count)/sqrt((double)ni)*sqrt((double)nj);
}
bool staticScan::IsCore(const Net& net, TNodeEDatNet<TInt, TInt>::TNodeI NodeI,
int u, double e){
int Count = 0;
for (int i = 0; i < NodeI.GetDeg(); i++) {
//獲取到鄰居節點
TNodeEDatNet<TInt, TInt>::TNodeI NeighborNode = net->GetNI(NodeI.GetNbrNId(i));
double Sim = staticScan::ComputeSim(NodeI, NeighborNode);
// net->SetEDat(NeighborNode.GetId(), NodeI.GetId(), Sim);
// net->SetEDat(NodeI.GetId(), NeighborNode.GetId(), Sim);
if (Sim > e) {
Count++;
}
}
if (Count > u) {
return true;
} else {
return false;
}
}