對於一個給定長度爲N的字符串,求它的第K小子串是什麼。其中可能有重複和不重複的K小子串。
我只會後綴數組
sa做法
當T=0的詢問,我們從sa[i]開始,每次sa[i]這個後綴,我們會加入n-sa[i]+1個子串,其中height[i]個是和sa[i-1]重複的,所以我們記一個變量每次加上n+1-height[i]-sa[i],一旦超過k,則從sa[i]開始長度爲k-pre+height[i]的串,爲什麼是k-pre+height[i]呢?我個人理解是k-pre表示從sa[i]開始還需要k-pre個字符,然而前height[i]和上一次算的重複了,所以還要加上height[i].
T=1:二分答案是第幾小的串(可重複的),然後判斷有多少個串比它小(可重複的)
假設當前的串在SA上排i,長度爲len,那麼排名在他之前的i−1個串的都比他小,排名在他之後的串,看兩個串的最長公共前綴長度即可,首先二分出一個位置滿足這個串被sa[i]的後綴包含,在之前比該串小的可以直接加,之後的我們發現和sa[i]的最長公共前綴就是比該串小的,然後for一下加上去就好了
#include<cmath>
#include<cstdio>
#include<cstring>
#include<iostream>
#include<algorithm>
using namespace std;
const int MAXN = 500005;
int n, m, i, j, k, sa[MAXN], rank[MAXN], height[MAXN], w[MAXN], x[MAXN], tot, ty;
long long sum[MAXN], sum1[MAXN];
char c[MAXN];
inline bool check(int p)
{
int l = 1, r = n;
while (l != r - 1)
{
if (l == r) break;
int mid = (l + r) >> 1;
if (sum[mid] >= p) r = mid;
else l = mid;
}
int begi = 0, hi = 0;
long long tot = 0;
if (sum[l] >= p) begi = l + 1, hi = p - sum[l - 1] + height[l], tot = hi + sum1[l - 1];
else begi = r + 1, hi = p - sum[r - 1] + height[r], tot = hi + sum1[r - 1];
if (tot >= k) return 1;
for(int i = begi; i <= n; i ++)
{
hi = min(hi, height[i]);
if (!hi) break;
tot += hi;
if (tot >= k) return 1;
}
return 0;
}
int main()
{
//freopen("string.in", "r", stdin);
//freopen("string.out", "w", stdout);
scanf("%s", c + 1);
n = strlen(c + 1);
for(i = 1; i <= n; i ++)
++w[x[i] = c[i] - 96];
for(i = 2; i <= 30; i ++)
w[i] += w[i - 1];
for(i = n; i >= 1; i --)
sa[w[x[i]]--] = i;
for(k = 1; k <= n; k <<= 1)
{
int t = 0;
for(i = n; i >= n - k + 1; i --)
rank[++t] = i;
for(i = 1; i <= n; i ++)
if (sa[i] - k > 0) rank[++t] = sa[i] - k;
memset(w, 0, sizeof(w));
for(i = 1; i <= n; i ++)
w[x[i]] ++;
for(i = 2; i <= n; i ++)
w[i] += w[i - 1];
for(i = n; i >= 1; i --)
sa[w[x[rank[i]]]--] = rank[i];
int m = 0;
for(i = 1; i <= n; i ++)
{
int u = sa[i], v = sa[i - 1];
if (x[u] != x[v] || x[u + k] != x[v + k]) rank[u] = ++m;
else rank[u] = m;
}
if (m == n) break;
for(i = 1; i <= n; i ++)
swap(rank[i], x[i]);
}
tot = 0;
for(i = 1; i <= n; i ++)
{
if (tot) tot --;
int j = sa[rank[i] - 1];
while (c[i + tot] == c[j + tot]) tot ++;
height[rank[i]] = tot;
}
cin >> ty >> k;
if (!ty)
{
int tot = 0;
for(i = 1; i <= n; i ++)
{
int pre = tot;
tot += n + 1 - sa[i] - height[i];
if (tot >= k)
{
cout << i << endl;
int j = k - pre + height[i];
for(k = sa[i]; k < sa[i] + j; k ++)
printf("%c", c[k]);
break;
}
}
}
else
{
for(i = 1; i <= n; i ++)
sum[i] = sum[i - 1] + n + 1 - sa[i] - height[i], sum1[i] = sum1[i - 1] + n + 1 - sa[i];
int l = 1, r = k;
while (l != r - 1)
{
int mid = (l + r) >> 1;
if (check(mid)) r = mid;
else l = mid;
}
int ans = 0;
if (check(l)) ans = l;
else if (check(r)) ans = r;
else {cout << -1; return 0;}
l = 1, r = n;
while (l != r - 1)
{
int mid = (l + r) >> 1;
if (sum[mid] >= (long long)ans) r = mid;
else l = mid;
}
int begi = 0, end = 0;
if (sum[l] >= (long long)ans) begi = l, end = ans - sum[l - 1] + height[l];
else begi = r, end = ans - sum[r - 1] + height[r];
int jj = sa[begi] + end;
for(i = sa[begi]; i < jj; i ++)
printf("%c", c[i]);
}
}