串的模式匹配

還是算法有意思,感覺就是不一樣,今天搞了串的匹配這一節。

 

串的匹配算法主要有兩種:1.Brute-Force算法

2. KMP算法 D.E.Knuth  J.H.Morris  V.R.Pratt以及其改進的形式

 

實現代碼如下:

 

#include <cstring>

#include <iostream>

 

#define MaxSize 100

 

typedef std::string SqString;

 

//Brute-Force算法

int index(SqString s,SqString t)

{

         unsigned int i=0,j,k;

         while(i<(int)s.length())

         {

                   for(j=i,k=0;j<s.length() && k<t.length() && s.at(j)==t.at(k);j++,k++);

                   if(k==t.length())

                            return i;

                   i++;

         }

         return -1;

}

 

//Brute-Force算法另一種實現

int index1(SqString s,SqString t)

{

         int i=0,j=0,k;

         while(i<(int)s.length() && j<(int)t.length())

         {

                   if(s[i]==t[j])

                   {

                            i++;

                            j++;

                   }

                   else

                   {

                            i=i-j+1;

                            j=0;

                   }

         }

 

         if(j>=(int)t.length())

                   k=i-t.length();

         else

                   k=-1;

         return k;

}

 

//KMP算法 D.E.Knuth J.H.Morris V.R.Pratt

void GetNext(SqString t,int next[])

{

         int j,k;

         j=0;k=-1;next[0]=-1;

         while(j<(int)t.length()-1)

         {

                   if(k==-1 || t[j]==t[k])

                   {

                            j++;k++;

                       next[j]=k;

                   }

                   else k=next[k];

         }

}

 

int KMPIndex(SqString s,SqString t)

{

         int next[MaxSize],i=0,j=0,v;

         GetNext(t,next);

         while(i<(int)s.length() && j<(int)t.length())

         {

                   if(j==-1 || s[i]==t[j])

                   { i++;j++;}

                   else j=next[j];                   //i不變,j後退

         }

         if (j>=t.length())

                   v=i-t.length();

         else

                   v=-1;

         return v;

}

 

 

//改進的KMP算法

void GetNextval(SqString t,int nextval[])

{

         int j=0,k=-1;

         nextval[0]=-1;

         while(j<(int)t.length())

         {

                   if(k==-1 || t[j]==t[k])

                   {

                            j++;k++;

                            if(t[j]!=t[k]) nextval[j]=k;

                            else nextval[j]=nextval[k];

                   }

                   else k=nextval[k];

         }

}

 

int KMPIndex1(SqString s,SqString t)

{

         int nextval[MaxSize],i=0,j=0,v;

         GetNextval(t,nextval);

         while(i<(int)s.length() && j<(int)t.length())

         {

                   if(j==-1 || s[i]==t[j])

                   {

                            i++;j++;

                   }

                   else j=nextval[j];

         }

         if(j>=t.length()) v=i-t.length();

         else v=-1;

         return v;

}

 

//在串str中查找子串substr最後一次出現的位置(不適用任何字符串標準函數)

int indexpos(SqString str,SqString substr)

{

         int i,j,k,idx=-1;

         for(i=0;i<(int)str.length();i++)

         {

                   for(j=i,k=0;str[j]==substr[k];j++,k++);//Brute-Force算法部分

                   if(k==(int)substr.length())

                            idx=i;

         }

         return idx;

}

 

//設計一個算法int like(SqString str,SqString pattern),實現以下四種匹配

int like(SqString str,SqString pattern)

{

         int i,j,k;

         for(i=0;i<str.length();i++)

         {

                   j=i;k=0;

                   while(1)

                   {

                            if(str[j]==pattern[k] || pattern[k]=='?'

                                     || pattern[k]=='_')

                            {

                                     j++;k++;

                            }

                            else if(pattern[k]=='//' && str[j]==pattern[k+1])  // '/'非法的字符

                            {

                                     j++;k=k+2;

                            }

                            else break;

                            if(k==pattern.length())

                                     return i;

                   }

         }

         return -1;

}

 

//求串s中出現的第一個最長重複子串的下標和長度

SqString smaxsubstr(SqString s)

{

         int index=0,length=0,length1,i=0,j,k;

         SqString t;

         while(i<(int)s.length())

         {

                   j=i+1;

                   while(j<(int)s.length())

                   {

                            if(s[i]==s[j])

                            {

                                     length1=1;

                                     for(k=1;s[i+k]==s[j+k];k++)

                                               length1++;

                                     if(length1>length)

                                     {

                                               index=i;

                                               length=length1;

                                     }

                                     //j+=length1;                    //沒有考慮到幾個字符相同的情況

                                     j++;

                            }

                            else

                                     j++;

                   }

                   i++;

         }

 

         //測試部分

         std::cout<<"position: "<<index<<" length: "<<length<<std::endl;

         t.resize(length);

         for(i=0;i<length;i++)

                   t[i]=s[index+i];

         return t;

}

 

//我自己題意理解錯了,認爲是具有相同字符纔算是重複子串,得到的算法如下:

void maxsubstr(SqString s,int& lastpos,int& lastlen)

{

         int pos=0;

         int len=1;

         lastpos=pos;lastlen=len;

        

         for(int i=1; i<(int)s.length();i++)

         {

                   if(s[i]==s[i-1] && i<(int)s.length())

                            len++;

                   else

                   {

                            if(len>lastlen)

                            {

                                     lastlen=len;

                                     lastpos=pos;

                            }

                            pos=i;

                            len=1;

                   }

         }

}

 

//兩個字符串st的長度分別爲m,n,求這兩個字符串最大公共子串

//算法的時間複雜度T(m,n)=O(m*n)

//估算最優的T(m,n)=O(m+n),採用KMP進行模式匹配

SqString maxcomstr(SqString s,SqString t)

{

         SqString str;

         int midx=0,mlen=0,tlen,i=0,j,k;

         while(i<(int)s.length())

         {

                   j=0;

                   while(j<(int)t.length())

                   {

                            if(s[i]==t[j])

                            {

                                     tlen=1;

                                     for(k=1;(i+k)<(int)s.length() && (j+k)<(int)t.length()

                                               && (s[i+k]==t[j+k]);k++)

                                               tlen++;

                                     if(tlen>mlen)

                                     {

                                               midx=i;mlen=tlen;

                                     }

                                     //j+=tlen;

                                     j++;

                            }

                            else j++;

                   }

                   i++;

         }

 

         for(i=0;i<mlen;i++)

                   //str[i]=s[midx+i];

                   std::cout<<s[midx+i];

         std::cout<<std::endl;

         str.resize(mlen);

         return str;

}

 

int main()

{

         //SqString main="aababddfabhenidfhdshfjshfjkhsdkj";

         //SqString pattern="abdd";

 

         //SqString main="abcaabbcaaabababaabca";

         //SqString pattern="babab";

 

         //SqString main="a nice string";

         //SqString pattern="?nice?str_ng";

 

         SqString main="aaaabaaaaab";

 

         SqString main1="hellaaabaaabss";

         SqString qq;

 

         int pos,len;

   

         qq=maxcomstr(main,main1);

 

         //maxsubstr(main,pos,len);

         //std::cout<<"position: "<<pos<<" length: "<<len<<std::endl;

 

         //int match;

         //int intenger=-1;

         //unsigned int unintenger=9;

         //std::cout<<(intenger<unintenger)<<std::endl;            注意int unsigned int的比較

         //pos=index(main,pattern);

         //pos=index1(main,pattern);

         //pos=KMPIndex(main,pattern);

         //pos=KMPIndex1(main,pattern);

         //std::cout<<pos<<std::endl;

 

         //pos=indexpos(main,pattern);

 

    //qq=smaxsubstr(main);

         /*match=like(main,pattern);

         if(match==-1)

                   printf("fail!/n");

         else

                   printf("successful!/n");*/

 

         /*

         if(pos>0)

                   printf("the position is: %d/n",pos);

         else

                   printf("fail!/n");*/

         return 1;

}

 

PS:數據結構那本書上的例子代碼有的有問題,我都改過來了,並且註明了。

還是算法有意思,感覺就是不一樣,今天搞了串的匹配這一節。

 

串的匹配算法主要有兩種:1.Brute-Force算法

2. KMP算法 D.E.Knuth  J.H.Morris  V.R.Pratt以及其改進的形式

 

實現代碼如下:

 

#include <cstring>

#include <iostream>

 

#define MaxSize 100

 

typedef std::string SqString;

 

//Brute-Force算法

int index(SqString s,SqString t)

{

         unsigned int i=0,j,k;

         while(i<(int)s.length())

         {

                   for(j=i,k=0;j<s.length() && k<t.length() && s.at(j)==t.at(k);j++,k++);

                   if(k==t.length())

                            return i;

                   i++;

         }

         return -1;

}

 

//Brute-Force算法另一種實現

int index1(SqString s,SqString t)

{

         int i=0,j=0,k;

         while(i<(int)s.length() && j<(int)t.length())

         {

                   if(s[i]==t[j])

                   {

                            i++;

                            j++;

                   }

                   else

                   {

                            i=i-j+1;

                            j=0;

                   }

         }

 

         if(j>=(int)t.length())

                   k=i-t.length();

         else

                   k=-1;

         return k;

}

 

//KMP算法 D.E.Knuth J.H.Morris V.R.Pratt

void GetNext(SqString t,int next[])

{

         int j,k;

         j=0;k=-1;next[0]=-1;

         while(j<(int)t.length()-1)

         {

                   if(k==-1 || t[j]==t[k])

                   {

                            j++;k++;

                       next[j]=k;

                   }

                   else k=next[k];

         }

}

 

int KMPIndex(SqString s,SqString t)

{

         int next[MaxSize],i=0,j=0,v;

         GetNext(t,next);

         while(i<(int)s.length() && j<(int)t.length())

         {

                   if(j==-1 || s[i]==t[j])

                   { i++;j++;}

                   else j=next[j];                   //i不變,j後退

         }

         if (j>=t.length())

                   v=i-t.length();

         else

                   v=-1;

         return v;

}

 

 

//改進的KMP算法

void GetNextval(SqString t,int nextval[])

{

         int j=0,k=-1;

         nextval[0]=-1;

         while(j<(int)t.length())

         {

                   if(k==-1 || t[j]==t[k])

                   {

                            j++;k++;

                            if(t[j]!=t[k]) nextval[j]=k;

                            else nextval[j]=nextval[k];

                   }

                   else k=nextval[k];

         }

}

 

int KMPIndex1(SqString s,SqString t)

{

         int nextval[MaxSize],i=0,j=0,v;

         GetNextval(t,nextval);

         while(i<(int)s.length() && j<(int)t.length())

         {

                   if(j==-1 || s[i]==t[j])

                   {

                            i++;j++;

                   }

                   else j=nextval[j];

         }

         if(j>=t.length()) v=i-t.length();

         else v=-1;

         return v;

}

 

//在串str中查找子串substr最後一次出現的位置(不適用任何字符串標準函數)

int indexpos(SqString str,SqString substr)

{

         int i,j,k,idx=-1;

         for(i=0;i<(int)str.length();i++)

         {

                   for(j=i,k=0;str[j]==substr[k];j++,k++);//Brute-Force算法部分

                   if(k==(int)substr.length())

                            idx=i;

         }

         return idx;

}

 

//設計一個算法int like(SqString str,SqString pattern),實現以下四種匹配

int like(SqString str,SqString pattern)

{

         int i,j,k;

         for(i=0;i<str.length();i++)

         {

                   j=i;k=0;

                   while(1)

                   {

                            if(str[j]==pattern[k] || pattern[k]=='?'

                                     || pattern[k]=='_')

                            {

                                     j++;k++;

                            }

                            else if(pattern[k]=='//' && str[j]==pattern[k+1])  // '/'非法的字符

                            {

                                     j++;k=k+2;

                            }

                            else break;

                            if(k==pattern.length())

                                     return i;

                   }

         }

         return -1;

}

 

//求串s中出現的第一個最長重複子串的下標和長度

SqString smaxsubstr(SqString s)

{

         int index=0,length=0,length1,i=0,j,k;

         SqString t;

         while(i<(int)s.length())

         {

                   j=i+1;

                   while(j<(int)s.length())

                   {

                            if(s[i]==s[j])

                            {

                                     length1=1;

                                     for(k=1;s[i+k]==s[j+k];k++)

                                               length1++;

                                     if(length1>length)

                                     {

                                               index=i;

                                               length=length1;

                                     }

                                     //j+=length1;                    //沒有考慮到幾個字符相同的情況

                                     j++;

                            }

                            else

                                     j++;

                   }

                   i++;

         }

 

         //測試部分

         std::cout<<"position: "<<index<<" length: "<<length<<std::endl;

         t.resize(length);

         for(i=0;i<length;i++)

                   t[i]=s[index+i];

         return t;

}

 

//我自己題意理解錯了,認爲是具有相同字符纔算是重複子串,得到的算法如下:

void maxsubstr(SqString s,int& lastpos,int& lastlen)

{

         int pos=0;

         int len=1;

         lastpos=pos;lastlen=len;

        

         for(int i=1; i<(int)s.length();i++)

         {

                   if(s[i]==s[i-1] && i<(int)s.length())

                            len++;

                   else

                   {

                            if(len>lastlen)

                            {

                                     lastlen=len;

                                     lastpos=pos;

                            }

                            pos=i;

                            len=1;

                   }

         }

}

 

//兩個字符串st的長度分別爲m,n,求這兩個字符串最大公共子串

//算法的時間複雜度T(m,n)=O(m*n)

//估算最優的T(m,n)=O(m+n),採用KMP進行模式匹配

SqString maxcomstr(SqString s,SqString t)

{

         SqString str;

         int midx=0,mlen=0,tlen,i=0,j,k;

         while(i<(int)s.length())

         {

                   j=0;

                   while(j<(int)t.length())

                   {

                            if(s[i]==t[j])

                            {

                                     tlen=1;

                                     for(k=1;(i+k)<(int)s.length() && (j+k)<(int)t.length()

                                               && (s[i+k]==t[j+k]);k++)

                                               tlen++;

                                     if(tlen>mlen)

                                     {

                                               midx=i;mlen=tlen;

                                     }

                                     //j+=tlen;

                                     j++;

                            }

                            else j++;

                   }

                   i++;

         }

 

         for(i=0;i<mlen;i++)

                   //str[i]=s[midx+i];

                   std::cout<<s[midx+i];

         std::cout<<std::endl;

         str.resize(mlen);

         return str;

}

 

int main()

{

         //SqString main="aababddfabhenidfhdshfjshfjkhsdkj";

         //SqString pattern="abdd";

 

         //SqString main="abcaabbcaaabababaabca";

         //SqString pattern="babab";

 

         //SqString main="a nice string";

         //SqString pattern="?nice?str_ng";

 

         SqString main="aaaabaaaaab";

 

         SqString main1="hellaaabaaabss";

         SqString qq;

 

         int pos,len;

   

         qq=maxcomstr(main,main1);

 

         //maxsubstr(main,pos,len);

         //std::cout<<"position: "<<pos<<" length: "<<len<<std::endl;

 

         //int match;

         //int intenger=-1;

         //unsigned int unintenger=9;

         //std::cout<<(intenger<unintenger)<<std::endl;            注意int unsigned int的比較

         //pos=index(main,pattern);

         //pos=index1(main,pattern);

         //pos=KMPIndex(main,pattern);

         //pos=KMPIndex1(main,pattern);

         //std::cout<<pos<<std::endl;

 

         //pos=indexpos(main,pattern);

 

    //qq=smaxsubstr(main);

         /*match=like(main,pattern);

         if(match==-1)

                   printf("fail!/n");

         else

                   printf("successful!/n");*/

 

         /*

         if(pos>0)

                   printf("the position is: %d/n",pos);

         else

                   printf("fail!/n");*/

         return 1;

}

 

PS:數據結構那本書上的例子代碼有的有問題,我都改過來了,並且註明了。

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章