DNA Sequence(POJ-2778) (AC自動機+矩陣快速冪)

It's well known that DNA Sequence is a sequence only contains A, C, T and G, and it's very useful to analyze a segment of DNA Sequence,For example, if a animal's DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don't contain those segments.

Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n.

Input

First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences.

Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.

Output

An integer, the number of DNA sequences, mod 100000.

Sample Input

4 3
AT
AC
AG
AA

Sample Output

36

題意:給你n個字符串,問你長度爲m的字符串且字符串中不含有那n個子串的字符串的數量。

思路:這道題的話,看題之後,不難發現需要用AC自動機來做,但是不得其解。看了一些dalao的博客之後,感覺這道題真的很神奇,是fail數組的另一種用法。我們讓所有病毒串構造一個AC自動機,這個AC自動機可以看作一張有向圖,圖上的每個頂點就是Trie樹上的結點,每個結點都可以看作是某個病毒串的前綴,Trie樹的根則是空字符串。然後根據fail數組的構造,如果某個結點fail指向的結點被標記了,那麼這個結點也是不允許走的,這樣,一個符合條件的Trie樹就建立出來了,剩下的就是矩陣部分。把題目簡化成是從結點0出發到其他結點走n步的的所有允許情況。

AC代碼:

#include <stdio.h>
#include <string>
#include <string.h>
#include <algorithm>
#include <iostream>
#include <math.h>
#include <vector>
#include <map>
#include <queue>
#include <stack>
typedef long long ll;
const int maxx=110;
const int maxn=4;
const int inf=0x3f3f3f3f;
const int mod=100000;
using namespace std;
int Trie[maxx][maxn];
int fail[maxx];
int cntword[maxx];
char s[20];
char c['Z'+1];
ll n,m;
int k;
void init()
{
    k=0;
    memset(fail,0,sizeof(fail));
    memset(Trie,0,sizeof(Trie));
    memset(cntword,0,sizeof(cntword));
    c['A']=0;
    c['T']=1;
    c['C']=2;
    c['G']=3;
}
void insert(char *s)
{
    int p=0;
    int len=strlen(s);
    for(int i=0; i<len; i++)
    {
        int x=c[s[i]];
        if(!Trie[p][x])
            Trie[p][x]=++k;
        p=Trie[p][x];
    }
    cntword[p]=1;
}
void build()
{
    queue<int>q;
    for(int i=0; i<4; i++)
    {
        if(Trie[0][i])
        {
            fail[Trie[0][i]]=0;
            q.push(Trie[0][i]);
        }
    }
    while(!q.empty())
    {
        int now=q.front();
        q.pop();
        if(cntword[fail[now]])//如果當前結點的指向是不允許的,那麼這個點也是不允許的
            cntword[now]=1;
        for(int i=0; i<4; i++)
        {
            if(Trie[now][i])
            {
                fail[Trie[now][i]]=Trie[fail[now]][i];
                q.push(Trie[now][i]);
            }
            else
                Trie[now][i]=Trie[fail[now]][i];
        }
    }
}
struct matrix
{
    ll mat[maxx][maxx];
    matrix()
    {
        memset(mat,0,sizeof(mat));
    }
} ans,cnt;
matrix mul(matrix x,matrix y)
{
    matrix a;
    for(int i=0; i<=k; i++)
        for(int j=0; j<=k; j++)
            for(int c=0; c<=k; c++)
            {
                a.mat[i][j]+=x.mat[i][c]*y.mat[c][j];
                a.mat[i][j]%=mod;
            }
    return a;
}
matrix matrixpow(matrix x,ll y)
{
    matrix a;
    for(int i=0; i<=k; i++)
        a.mat[i][i]=1;
    while(y)
    {
        if(y&1)
            a=mul(a,ans);
        ans=mul(ans,ans);
        y>>=1;
    }
    return a;
}
matrix build_mat()//構建矩陣
{
    matrix temp;
    for(int i=0; i<=k; i++)
    {
        if(cntword[i])
            continue;
        for(int j=0; j<4; j++)
        {
            if(cntword[Trie[i][j]])
                continue;
            ++temp.mat[i][Trie[i][j]];
        }
    }
    return temp;
}
int main()
{
    init();
    scanf("%lld%lld",&m,&n);
    for(int i=1; i<=m; i++)
    {
        scanf("%s",s);
        insert(s);
    }
    build();
    ans=build_mat();
    cnt=matrixpow(ans,n);
    ll x=0;
    for(int i=0; i<=k; i++)
    {
        x+=cnt.mat[0][i];
        x%=mod;
    }
    printf("%lld\n",x);
}

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章