It's well known that DNA Sequence is a sequence only contains A, C, T and G, and it's very useful to analyze a segment of DNA Sequence,For example, if a animal's DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don't contain those segments.
Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n.
Input
First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences.
Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.
Output
An integer, the number of DNA sequences, mod 100000.
Sample Input
4 3 AT AC AG AA
Sample Output
36
題意:給你n個字符串,問你長度爲m的字符串且字符串中不含有那n個子串的字符串的數量。
思路:這道題的話,看題之後,不難發現需要用AC自動機來做,但是不得其解。看了一些dalao的博客之後,感覺這道題真的很神奇,是fail數組的另一種用法。我們讓所有病毒串構造一個AC自動機,這個AC自動機可以看作一張有向圖,圖上的每個頂點就是Trie樹上的結點,每個結點都可以看作是某個病毒串的前綴,Trie樹的根則是空字符串。然後根據fail數組的構造,如果某個結點fail指向的結點被標記了,那麼這個結點也是不允許走的,這樣,一個符合條件的Trie樹就建立出來了,剩下的就是矩陣部分。把題目簡化成是從結點0出發到其他結點走n步的的所有允許情況。
AC代碼:
#include <stdio.h>
#include <string>
#include <string.h>
#include <algorithm>
#include <iostream>
#include <math.h>
#include <vector>
#include <map>
#include <queue>
#include <stack>
typedef long long ll;
const int maxx=110;
const int maxn=4;
const int inf=0x3f3f3f3f;
const int mod=100000;
using namespace std;
int Trie[maxx][maxn];
int fail[maxx];
int cntword[maxx];
char s[20];
char c['Z'+1];
ll n,m;
int k;
void init()
{
k=0;
memset(fail,0,sizeof(fail));
memset(Trie,0,sizeof(Trie));
memset(cntword,0,sizeof(cntword));
c['A']=0;
c['T']=1;
c['C']=2;
c['G']=3;
}
void insert(char *s)
{
int p=0;
int len=strlen(s);
for(int i=0; i<len; i++)
{
int x=c[s[i]];
if(!Trie[p][x])
Trie[p][x]=++k;
p=Trie[p][x];
}
cntword[p]=1;
}
void build()
{
queue<int>q;
for(int i=0; i<4; i++)
{
if(Trie[0][i])
{
fail[Trie[0][i]]=0;
q.push(Trie[0][i]);
}
}
while(!q.empty())
{
int now=q.front();
q.pop();
if(cntword[fail[now]])//如果當前結點的指向是不允許的,那麼這個點也是不允許的
cntword[now]=1;
for(int i=0; i<4; i++)
{
if(Trie[now][i])
{
fail[Trie[now][i]]=Trie[fail[now]][i];
q.push(Trie[now][i]);
}
else
Trie[now][i]=Trie[fail[now]][i];
}
}
}
struct matrix
{
ll mat[maxx][maxx];
matrix()
{
memset(mat,0,sizeof(mat));
}
} ans,cnt;
matrix mul(matrix x,matrix y)
{
matrix a;
for(int i=0; i<=k; i++)
for(int j=0; j<=k; j++)
for(int c=0; c<=k; c++)
{
a.mat[i][j]+=x.mat[i][c]*y.mat[c][j];
a.mat[i][j]%=mod;
}
return a;
}
matrix matrixpow(matrix x,ll y)
{
matrix a;
for(int i=0; i<=k; i++)
a.mat[i][i]=1;
while(y)
{
if(y&1)
a=mul(a,ans);
ans=mul(ans,ans);
y>>=1;
}
return a;
}
matrix build_mat()//構建矩陣
{
matrix temp;
for(int i=0; i<=k; i++)
{
if(cntword[i])
continue;
for(int j=0; j<4; j++)
{
if(cntword[Trie[i][j]])
continue;
++temp.mat[i][Trie[i][j]];
}
}
return temp;
}
int main()
{
init();
scanf("%lld%lld",&m,&n);
for(int i=1; i<=m; i++)
{
scanf("%s",s);
insert(s);
}
build();
ans=build_mat();
cnt=matrixpow(ans,n);
ll x=0;
for(int i=0; i<=k; i++)
{
x+=cnt.mat[0][i];
x%=mod;
}
printf("%lld\n",x);
}