CloudySky

纵使世界万般残酷

总有温暖值得守护

P4248 [AHOI2013]差异

题目描述

给定一个字符串,求两两后缀的 lcplcp 之和。

题目正解

后缀数组 SASA / 后缀自动机 SAMSAM \to 后缀树

题目思路 - SASA

这个题对于 SASA 来说略有思维难度。首先考虑任意两个后缀 i,ji, jlcplcp 是不是就是 mink=i+1jheighti\min_{k = i + 1}^j height_i 那么要求任意两个的话,直接枚举复杂度肯定是过不去的。可以考虑每个点作为最小值对答案造成贡献的区间。这个东西是可以用单调栈来求的,或者说叫笛卡尔树。

时间复杂度:O(n)O(n) 相对比较优秀。

题目代码 - SASA

代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
// Code By CloudySky
#include <bits/stdc++.h>
// #define int long long
namespace IO {
inline int read() {
int x = 0, f = 1; char c = getchar();
while (c < '0' || c > '9') {if (c == '-') f = -1; c = getchar();}
while (c >= '0' && c <= '9') {x = x * 10 + (c ^ 48); c = getchar();}
return x * f;
}
void print_n(long long x) {
if (x > 9) print_n(x / 10);
putchar(x % 10 + '0');
}
inline void print(long long x, char s = '\n') {
if (x < 0) putchar('-'), x = -x;
print_n(x), putchar(s);
}
} // namespace IO
using namespace IO;
const int Maxn = 2e6 + 10;
using namespace std;

char s[Maxn];
int sa[Maxn], h[Maxn], rk[Maxn << 1], trk[Maxn << 1];

bool cmp(int x, int y, int w) {return trk[x] == trk[y] && trk[x + w] == trk[y + w];}

void SA(int n) {
static int cnt[Maxn], id[Maxn];
int m = 300, p; //
for (int i = 1; i <= n; ++i) ++cnt[rk[i] = s[i]]; //
for (int i = 1; i <= m; ++i) cnt[i] += cnt[i - 1];
for (int i = n; i; --i) sa[cnt[rk[i]]--] = i;
for (int w = 1; p != n; w <<= 1, m = p) {
p = 0;
for (int i = n; i > n - w; --i) id[++p] = i;
for (int i = 1; i <= n; ++i) if (sa[i] > w) id[++p] = sa[i] - w;
memset(cnt, 0, sizeof(cnt));
for (int i = 1; i <= n; ++i) ++cnt[rk[id[i]]]; //
for (int i = 1; i <= m; ++i) cnt[i] += cnt[i - 1];
for (int i = n; i; --i) sa[cnt[rk[id[i]]]--] = id[i];
memcpy(trk, rk, sizeof(trk));
p = 0; //
for (int i = 1; i <= n; ++i)
rk[sa[i]] = cmp(sa[i], sa[i - 1], w) ? p : ++p;
if (p == n) for (int i = 1; i <= n; ++i) sa[rk[i]] = i;
}
for (int i = 1, k = 0; i <= n; ++i) {
if (k) --k;
while (s[i + k] == s[sa[rk[i] - 1] + k]) ++k; //
h[rk[i]] = k;
}
}

int stc[Maxn], top, l[Maxn], r[Maxn];

signed main() {
scanf("%s", s + 1);
int n = strlen(s + 1); SA(n);
stc[top = 1] = 1;
for (int i = 2; i <= n; ++i) {
while (top && h[stc[top]] > h[i]) r[stc[top--]] = i;
l[i] = stc[top], stc[++top] = i;
} while (top) r[stc[top--]] = n + 1;
long long ans = 1ll * (n - 1) * n * (n + 1) / 2;
for (int i = 2; i <= n; ++i) ans -= 2ll * (r[i] - i) * (i - l[i]) * h[i];
print(ans);
return 0;
}

题目思路 - SAMSAM

这个知道后缀树的直接无脑冲就完事了。

同样是求两两后缀之间的 lcplcp,在后缀树上体现为两个叶子节点的 lcalca。直接跑个树形 DPDPlcalca 处数点就行。

至于如何用 SAMSAM 去构建后缀树,倒着把字符串插进去得到的 failfail 树就是了。

题目代码 - SAMSAM

代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
// Code By CloudySky
#include <bits/stdc++.h>
// #define int long long
namespace IO {
inline int read() {
int x = 0, f = 1; char c = getchar();
while (c < '0' || c > '9') {if (c == '-') f = -1; c = getchar();}
while (c >= '0' && c <= '9') x = x * 10 + (c ^ 48), c = getchar();
return x * f;
}
void print_n(int x) {
if (x > 9) print_n(x / 10);
putchar(x % 10 + '0');
}
inline void print(int x, char s = '\n') {
if (x < 0) putchar('-'), x = -x;
print_n(x), putchar(s);
}
} // namespace IO
using namespace IO;
void openfile();
const int Maxn = 1e6 + 10;
using namespace std;

int sam[Maxn][26], siz[Maxn], fail[Maxn], len[Maxn], lst, tot;

void insert(char c) {
int p = lst, np = ++tot, ch = c - 'a';
lst = np, len[np] = len[p] + 1, siz[np] = 1;
for (; p != -1 && !sam[p][ch]; p = fail[p]) sam[p][ch] = np;
if (p == -1) return fail[np] = 0, void();
int q = sam[p][ch];
if (len[q] == len[p] + 1) return fail[np] = q, void();
int nq = ++tot;
fail[nq] = fail[q], fail[q] = fail[np] = nq;
memcpy(sam[nq], sam[q], sizeof(sam[nq]));
len[nq] = len[p] + 1;
for (; p != -1 && sam[p][ch] == q; p = fail[p]) sam[p][ch] = nq;
}

char s[Maxn];

void SAM() {
fail[0] = -1, lst = tot = 0;
int n = strlen(s);
for (int i = n - 1; i >= 0; --i) insert(s[i]);
}

long long ans;
vector <int> g[Maxn];

void dfs(int x) {
int st = siz[x];
for (int y : g[x]) dfs(y), siz[x] += siz[y];
for (int y : g[x]) ans += 1ll * siz[y] * (siz[x] - siz[y]) * len[x];
ans += 1ll * st * (siz[x] - st) * len[x];
}

signed main() {
openfile();
scanf("%s", s), SAM(); int n = strlen(s);
for (int i = 1; i <= tot; ++i) g[fail[i]].push_back(i);
dfs(0);
printf("%lld\n", 1ll * (n - 1) * n * (n + 1) / 2 - ans);
return 0;
}

void openfile() {
#ifndef ONLINE_JUDGE
freopen("in.in", "r", stdin);
freopen("out.out", "w", stdout);
#endif
}

总结

难得一见的好题,一道题牵扯了 33 个后缀数据结构。

加深了对笛卡尔树的印象(笛卡尔树的作用以及建树过程)。对 SAMSAM 有了更深入的了解(反串 failfail 树为后缀树),初识了后缀树(两个叶子节点 lcalca 即为两个后缀 lcplcp)。

本文作者:CloudySky
写作时间: 2022-03-12
最后更新时间: 2022-03-12
遵循协议: BY-NC-SA

Top