Weighted PageRank算法實現

PageRank裏面的邊是沒有權重的,就是說每一個點對另一個點的影響都是一樣的,但是會有一些情況,一個點對另一個點的影響大小不一致,這就需 要用到了給邊加權重的方法,這就有了Weighted PageRank這個算法。WPR算法的理論資料相對於PR算法會少很多的,可以在Google scholar 上找到這篇論文 Weighted PageRank Algorithm . ,還可以到一些學校的網站找到這個算法的講解,就差不多了。

我的實現方法是,先構建一個沒有權重的關係矩陣,然後根據已經獲得的矩陣,和WPR的權重出度和入讀的方法計算出權重矩陣,然後有WPR的算法實現。實現的只是一個簡單的實例,具體到實際的數據時還需要做修改。

代碼如下:

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;

public class WeightPageRank {
       private static int count;            //矩陣的維數
       static int[][] matrix;        //二維數組,構造關係矩陣
       static double[][] weightMatrix; //二維數組,邊的權重矩陣
       static double[] outLinks;      //每個點的出鏈接數
       static double[] inLinks;      //每個點的入鏈接數
       static double[] pageRank;   //PageRank值
       static double d = 0.85;        //抑制因素,設爲0.85
       boolean flag = true;        //控制循環,收斂後則爲false,停止計算
       
       /**
        * 構造函數
        * @param count  //矩陣的維數
        */
       public WeightPageRank(int count)
       {
           this.count = count;
           pageRank = new double[count];
           for(int i = 0; i < count; i++)
               pageRank[i] = 1/count;           //將每個頁面的初始PR值設爲1
       }
       
       
       /**
        * 初始化矩陣
        * @throws IOException
        */
       public void InitialMatrix() throws IOException
       {
           matrix = new int[count][count];               //關係矩陣
           File file = new File("graph.txt"); //讀取文件中的關係信息
           FileReader fr;
           BufferedReader br;
           if(file.exists())            //文件存在
           {
                fr = new FileReader(file);
                br = new BufferedReader(fr);  //讀取文件信息
                int i;
                while(br.ready())
                {
                    String[] words = new String[20];
                    String[] fromNodes = new String[20];  //起始點
                    String[] toNodes = new String[20];    //指向的點
                    words = br.readLine().split("-");
                    fromNodes[0] = words[0];
                    toNodes = words[1].split(",");
                    int row = 0;
                    for(i = 0;i < toNodes.length;i++)
                    {
                        int column = 0;
                        //System.out.println(fromNodes[0]+toNodes[i]);
                        switch(fromNodes[0].charAt(0))
                        {
                        case 'A': row = 0;break;
                        case 'B': row = 1;break;
                        case 'C': row = 2;break;
                        case 'D': row = 3;break;
                        case 'E': row = 4;break;
                        case 'F': row = 5;break;
                        case 'G': row = 6;break;
                        case 'H': row = 7;break;
                        case 'I': row = 8;break;
                        default : row = 0;break;
                        }
                        switch(toNodes[i].charAt(0))
                        {
                        case 'A': column = 0;break;
                        case 'B': column = 1;break;
                        case 'C': column = 2;break;
                        case 'D': column = 3;break;
                        case 'E': column = 4;break;
                        case 'F': column = 5;break;
                        case 'G': column = 6;break;
                        case 'H': column = 7;break;
                        case 'I': column = 8;break;
                        default : column = 0;break;
                        }  
                        matrix[row][column] = 1;      //將有邊的兩點賦值爲1
                        //System.out.println(matrix[row][column]);
                    }
                   
                }
           }
           else            //文件存在
           {
               System.out.println("文件不存在!");
               System.exit(0);
           }
       }
       
       /**
        * 計算每個點的入鏈接數
        * 即,計算矩陣中每一列的和
        */
       public void inLinks()
       {
           inLinks = new double[count];
           for(int i = 0;i < count; i++)
           {
               for(int j = 0; j < count; j++)
               {
                   inLinks[i] += matrix[j][i];
               }
               //System.out.println(inLinks[i]);
           }
       }
       /**
        *
        * 計算每一個點的鏈出數
        * 即,計算矩陣中的每一行的和
        */
       public void outLinks()
       {
           outLinks = new double[count];
           for(int i = 0;i < count; i++)
           {
               for(int j = 0; j < count; j++)
               {
                   outLinks[i] += matrix[i][j];
               }
               //System.out.println(outLinks[i]);
           }
       }
       
       /**
        * 構建權重矩陣,根據關係矩陣,有邊則計算該邊的權重
        * 由入度權重和出度權重相乘得到
        * 入度權重計算方法爲:(i,j)邊的入度權重爲i指向的邊j的入度除以i指向的所有點的入度和
        * 出度權重計算方法爲:(i,j)邊的出度權重爲i指向的邊j的出度除以i指向的所有點的出度和
        */
       public void weightMatrix()
       {
           weightMatrix = new double[count][count];
           int[] sumIn = new int[count];
           int[] sumOut = new int[count];
           for(int i = 0;i < count;i++)
           {
               for(int j = 0; j < count; j++)
               {
                   if(matrix[i][j] == 1)               //有邊,則統計起始點的所指向的點的入度和出度和
                   {
                       sumIn[i] += inLinks[j];         //統計點i指向的所有點的入度和
                       sumOut[i] += outLinks[j];       //統計點i指向的所有點的出度和
                   }
               }
           }
           
           //構建權重矩陣
           for(int i = 0;i < count;i++)
           {
               for(int j = 0; j < count; j++)
               {
                   if(matrix[i][j] == 1)               //有邊,則計算該邊的權重
                   {
                       weightMatrix[i][j] = (inLinks[j]/sumIn[i])*(outLinks[j]/sumOut[i]);
                       //System.out.println(weightMatrix[i][j]);
                   }
               }
           }
           
           
       }
       /**
        * 計算每一個頁面PR值
        */
       public double[] CalculatePR(double[] pagerank)
       {
           double totle = 0;
           double pageRank1 = pagerank[0];           //第一個點前一次的PR值
           double pageRank2;                         //第一個點下一次的PR值,兩個值用於判斷是否收斂
           for(int j = 0; j < count; j++)
           {
               double sum = 0;
               for(int k = 0; k < count; k++)
               {
                   sum += weightMatrix[j][k]*pageRank[k]*matrix[k][j]/outLinks[k]; //計算各個PR總和
               }
               pageRank[j] = (1-d)/count + d*sum;                  //PR的計算
               totle += pageRank[j];
               System.out.print(pageRank[j]+":");
           }
           pageRank2 = pageRank[0];                        //下一次的PR值
           if(Math.abs(pageRank1-pageRank2) < Math.pow(10, -10))//收斂條件,兩次的PR值的差的絕對值小於0.0000000001
               flag = false;
           else
               flag = true;
           
           //歸一化處理
           for(int i = 0; i < count; i++)
           {
               pageRank[i] = pageRank[i]/totle;
           }
           return pageRank;
       }
       
       /**
        *
        * 迭代計算直到收斂
        *
        */
       public void CalculateResult()
       {
           double[] pageRanks = pageRank;
           int i = 0;
           while(flag)
           {
               System.out.println("第"+(i+1)+"輪迭代:");
               pageRanks = CalculatePR(pageRanks);           //循環調用計算PR值
               System.out.println();
               i++;
           }
           
           for(int j = 0; j < count; j++)
           {
               System.out.println("最終的結果爲:");
               System.out.println((j+1)+"-----"+pageRanks[j]);
           }
               
       }
       
       public static void main(String[] args) throws IOException
       {
           WeightPageRank pg = new WeightPageRank(8);   //n維矩陣,有n個點
           pg.InitialMatrix();
           pg.inLinks();
           pg.outLinks();
           pg.weightMatrix();
           for(int i = 0; i < count;i++)
           {
               for(int j = 0; j < count; j++)
               {
                   System.out.print(matrix[i][j]+"  ");
               }
               System.out.println();
           }
           
           for(int i = 0; i < count;i++)
           {
               for(int j = 0; j < count; j++)
               {
                   System.out.print(weightMatrix[i][j]+"  ");
               }
               System.out.println();
           }
           
           pg.CalculateResult();
       }

}

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章