Luogu P2408 不同子串个数

题目描述

https://www.luogu.com.cn/problem/P2408

简要题意:给定一个长度为 $n$ 的字符串,求其本质不同的子串个数

$n\le 10^5$

Solution

注意到子串一定是某个后缀的前缀,所以我们考虑对于所有后缀求其本质不同的前缀个数

我们不妨维护当前若干个后缀的本质不同的前缀个数,然后考虑加入一个后缀的贡献,重复的前缀显然是与集合中所有后缀的最大 $lcp$

我们按照字典序加入后缀,那么这个 $lcp$ 就是 $H[i]$

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#include <iostream>
#include <cstring>
#define maxn 100010
#define ll long long
using namespace std;

int n, m;

char s[maxn];

int tax[maxn], tp[maxn], sa[maxn], rk[maxn], M = 255;
void rsort() {
for (int i = 0; i <= M; ++i) tax[i] = 0;
for (int i = 1; i <= n; ++i) ++tax[rk[i]];
for (int i = 1; i <= M; ++i) tax[i] += tax[i - 1];
for (int i = n; i; --i) sa[tax[rk[tp[i]]]--] = tp[i];
}

int H[maxn];
void init_sa() {
if (n == 1) return sa[1] = rk[1] = 1, void(); int cnt = 1;
for (int i = 1; i <= n; ++i) rk[i] = s[i], tp[i] = i; rsort();
for (int k = 1; k < n; k *= 2) {
if (cnt == n) break; M = cnt; cnt = 0;
for (int i = n - k + 1; i <= n; ++i) tp[++cnt] = i;
for (int i = 1; i <= n; ++i) if (sa[i] > k) tp[++cnt] = sa[i] - k;
rsort(); swap(rk, tp); rk[sa[1]] = cnt = 1;
for (int i = 2; i <= n; ++i) {
if (tp[sa[i - 1]] != tp[sa[i]] || tp[sa[i - 1] + k] != tp[sa[i] + k]) ++cnt;
rk[sa[i]] = cnt;
}
} int lcp = 0;
for (int i = 1; i <= n; ++i) {
if (lcp) --lcp;
int j = sa[rk[i] - 1];
while (s[j + lcp] == s[i + lcp]) ++lcp;
H[rk[i]] = lcp;
}
}

int main() {
ios::sync_with_stdio(false);
cin.tie(nullptr); cout.tie(nullptr);

cin >> n >> s + 1; init_sa(); ll ans = 0;
for (int i = 1; i <= n; ++i) ans += n - sa[i] + 1 - H[i];
cout << ans << "\n";
return 0;
}