The 2021 CCPC Guilin Onsite (Grand Prix of EDG) J Suffix Automaton

题目描述

简要题意:给定一个串 $S$,现在有 $m$ 次询问,每次询问本质不同的第 $k$ 小的子串,这里的字典序是长度不同,则长度小的字典序小,长度相同按照从前向后按位判断

$|S|,m\le 10^6$

Solution

首先我们将问题转换为求长度为定值的第 $k$ 小子串

我们考虑建出原串的后缀树,然后我们在 $dfs$ 的时候按照边从小到大进行 $dfs$,这样每个点的 $dfs$ 序就是字典序

我们考虑将询问按长度离线,后缀树上每个点代表长度为一个区间的子串,那么我们可以做一个差分,在长度左端点加入,在长度右端点删除,这里我们用权值线段树维护第 $k$ 小的字典序即可

时间复杂度 $O(L\log L)$,我的代码 $vector$ 用的有点多,被卡空间了

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#include <iostream>
#include <cstring>
#include <vector>
#include <algorithm>
#include <map>
#define maxn 1000010
#define Maxn 2000010
#define ll long long
using namespace std;

int n, m;
char s[maxn];
ll d[maxn];
vector<pair<int, int>> G[Maxn];

namespace SAM {
struct node {
int l, L, pos;
map<int, int> nxt;
} T[Maxn]; int f[Maxn], top, last, rt;
void init() {
for (int i = 1; i <= top; ++i) {
T[i].l = T[i].L = T[i].pos = f[i] = 0;
T[i].nxt.clear();
}
rt = last = top = 1;
T[rt].l = T[rt].L = T[rt].pos = f[rt] = 0;
}

void extend(int ch) {
int np = ++top, p = last; last = np;
T[np].pos = T[np].L = T[p].L + 1;
while (p && T[p].nxt.find(ch) == T[p].nxt.end()) T[p].nxt[ch] = np, p = f[p];
if (!p) return f[np] = rt, void();
int q = T[p].nxt[ch];
if (T[q].L - 1 == T[p].L) f[np] = q;
else {
int nq = ++top; T[nq].L = T[p].L + 1; f[nq] = f[q];
T[nq].nxt = T[q].nxt;
while (p && T[p].nxt[ch] == q) T[p].nxt[ch] = nq, p = f[p];
f[np] = f[q] = nq;
}
}

int tax[maxn], tp[Maxn];
void rsort(int n) {
for (int i = 1; i <= n; ++i) tax[i] = 0;
for (int i = 1; i <= top; ++i) ++tax[T[i].L];
for (int i = 1; i <= n; ++i) tax[i] += tax[i - 1];
for (int i = 1; i <= top; ++i) tp[tax[T[i].L]--] = i;
for (int i = top, u = tp[i]; i > 1; u = tp[--i]) {
T[f[u]].pos = max(T[f[u]].pos, T[u].pos);
T[u].l = T[f[u]].L + 1;
G[f[u]].push_back({ s[T[u].pos - T[u].l + 1] - 'a', u });
}
}
}

vector<int> A[maxn], B[maxn];
int id[Maxn], bl[Maxn], cnt;
void dfs(int u) {
sort(G[u].begin(), G[u].end());
id[u] = ++cnt; bl[cnt] = u;
if (u != SAM::rt) {
A[SAM::T[u].l].push_back(u);
B[SAM::T[u].L + 1].push_back(u);
}
for (auto [w, v] : G[u]) dfs(v);
}

namespace Seg {
#define lc i << 1
#define rc i << 1 | 1
int T[Maxn * 4];
inline void maintain(int i) { T[i] = T[lc] + T[rc]; };

void update(int i, int l, int r, int k, int v) {
if (l == r) return T[i] = v, void();
int m = l + r >> 1;
if (k <= m) update(lc, l, m, k, v);
else update(rc, m + 1, r, k, v);
maintain(i);
}

int query(int i, int l, int r, int k) {
if (l == r) return bl[l];
int m = l + r >> 1;
if (k <= T[lc]) return query(lc, l, m, k);
else return query(rc, m + 1, r, k - T[lc]);
}
}

pair<int, int> ans[maxn];
vector<pair<int, int>> Q[maxn];

int main() {
ios::sync_with_stdio(false);
cin.tie(nullptr); cout.tie(nullptr);

cin >> s + 1; n = strlen(s + 1); reverse(s + 1, s + n + 1); SAM::init();
for (int i = 1; i <= n; ++i) SAM::extend(s[i] - 'a'); SAM::rsort(n);
for (int i = 2; i <= SAM::top; ++i) d[SAM::T[i].l]++, d[SAM::T[i].L + 1]--;
for (int i = 1; i <= n; ++i) d[i] += d[i - 1];
for (int i = 1; i <= n; ++i) d[i] += d[i - 1];
dfs(SAM::rt); cin >> m;
for (int i = 1; i <= m; ++i) {
ll k; cin >> k;
int p = lower_bound(d + 1, d + n + 1, k) - d;
if (p == n + 1) ans[i] = make_pair(-1, -1);
else k -= d[p - 1], Q[p].emplace_back(k, i);
}
for (int i = 1; i <= n; ++i) {
for (auto t : A[i]) Seg::update(1, 1, cnt, id[t], 1);
for (auto t : B[i]) Seg::update(1, 1, cnt, id[t], 0);
for (auto [k, id] : Q[i]) {
int u = Seg::query(1, 1, cnt, k);
ans[id] = make_pair(n - SAM::T[u].pos + 1, n - (SAM::T[u].pos - i + 1) + 1);
}
}
for (int i = 1; i <= m; ++i) cout << ans[i].first << " " << ans[i].second << "\n";
return 0;
}