bzoj3998 後綴數組複習

對於一個給定長度爲N的字符串,求它的第K小子串是什麼。其中可能有重複和不重複的K小子串。


我只會後綴數組

sa做法
當T=0的詢問,我們從sa[i]開始,每次sa[i]這個後綴,我們會加入n-sa[i]+1個子串,其中height[i]個是和sa[i-1]重複的,所以我們記一個變量每次加上n+1-height[i]-sa[i],一旦超過k,則從sa[i]開始長度爲k-pre+height[i]的串,爲什麼是k-pre+height[i]呢?我個人理解是k-pre表示從sa[i]開始還需要k-pre個字符,然而前height[i]和上一次算的重複了,所以還要加上height[i].
T=1:二分答案是第幾小的串(可重複的),然後判斷有多少個串比它小(可重複的)
假設當前的串在SA上排i,長度爲len,那麼排名在他之前的i−1個串的都比他小,排名在他之後的串,看兩個串的最長公共前綴長度即可,首先二分出一個位置滿足這個串被sa[i]的後綴包含,在之前比該串小的可以直接加,之後的我們發現和sa[i]的最長公共前綴就是比該串小的,然後for一下加上去就好了

#include<cmath>
#include<cstdio>
#include<cstring>
#include<iostream>
#include<algorithm>
using namespace std;
const int MAXN = 500005;
int n, m, i, j, k, sa[MAXN], rank[MAXN], height[MAXN], w[MAXN], x[MAXN], tot, ty;
long long sum[MAXN], sum1[MAXN];
char c[MAXN];
inline bool check(int p)
{
    int l = 1, r = n;
    while (l != r - 1)
    {
    	if (l == r) break;
        int mid = (l + r) >> 1;
        if (sum[mid] >= p) r = mid;
        else l = mid;
    }
    int begi = 0, hi = 0;
    long long tot = 0;
    if (sum[l] >= p) begi = l + 1, hi = p - sum[l - 1] + height[l], tot = hi + sum1[l - 1];
    else begi = r + 1, hi = p - sum[r - 1] + height[r], tot = hi + sum1[r - 1];
    if (tot >= k) return 1;
    for(int i = begi; i <= n; i ++)
    {
        hi = min(hi, height[i]);
        if (!hi) break;
        tot += hi;
        if (tot >= k) return 1; 
    }
    return 0;
}
int main()
{
	//freopen("string.in", "r", stdin);
	//freopen("string.out", "w", stdout);
    scanf("%s", c + 1);
    n = strlen(c + 1);
    for(i = 1; i <= n; i ++)
        ++w[x[i] = c[i] - 96];
    for(i = 2; i <= 30; i ++)
        w[i] += w[i - 1];
    for(i = n; i >= 1; i --)
        sa[w[x[i]]--] = i;
    for(k = 1; k <= n; k <<= 1)
    {
        int t = 0;
        for(i = n; i >= n - k + 1; i --)
            rank[++t] = i;
        for(i = 1; i <= n; i ++)
            if (sa[i] - k > 0) rank[++t] = sa[i] - k;
        memset(w, 0, sizeof(w));
        for(i = 1; i <= n; i ++)
            w[x[i]] ++;
        for(i = 2; i <= n; i ++)
            w[i] += w[i - 1];
        for(i = n; i >= 1; i --)
            sa[w[x[rank[i]]]--] = rank[i];
        int m = 0;
        for(i = 1; i <= n; i ++)
        {
            int u = sa[i], v = sa[i - 1];
            if (x[u] != x[v] || x[u + k] != x[v + k]) rank[u] = ++m;
            else rank[u] = m;
        }
        if (m == n) break;
        for(i = 1; i <= n; i ++)
            swap(rank[i], x[i]);
    }
    tot = 0;
    for(i = 1; i <= n; i ++)
    {
        if (tot) tot --;
        int j = sa[rank[i] - 1];
        while (c[i + tot] == c[j + tot]) tot ++;
        height[rank[i]] = tot;
    }
    cin >> ty >> k;
    if (!ty)
    {
        int tot = 0;
        for(i = 1; i <= n; i ++)
        {
            int pre = tot;
            tot += n + 1 - sa[i] - height[i];
            if (tot >= k)
            {
            	 cout << i << endl;
                int j = k - pre + height[i];
                for(k = sa[i]; k < sa[i] + j; k ++)
                    printf("%c", c[k]);
                break;
            }
        }
    }
    else
    {
        for(i = 1; i <= n; i ++)
            sum[i] = sum[i - 1] + n + 1 - sa[i] - height[i], sum1[i] = sum1[i - 1] + n + 1 - sa[i];
        int l = 1, r = k;
        while (l != r - 1)
        {
            int mid = (l + r) >> 1;
            if (check(mid)) r = mid;
            else l = mid;
        }
        int ans = 0;
        if (check(l)) ans = l;
        else if (check(r)) ans = r;
        else {cout << -1; return 0;}
        l = 1, r = n;
        while (l != r - 1)
        {
            int mid = (l + r) >> 1;
            if (sum[mid] >= (long long)ans) r = mid;
            else l = mid;
        }
        int begi = 0, end = 0;
        if (sum[l] >= (long long)ans) begi = l, end = ans - sum[l - 1] + height[l];
        else begi = r, end = ans - sum[r - 1] + height[r];
        int jj = sa[begi] + end;
        for(i = sa[begi]; i < jj; i ++)
            printf("%c", c[i]);
    }
}


發佈了44 篇原創文章 · 獲贊 11 · 訪問量 1萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章