当前找出所有最短的重复串, 删去之后, 不会再出现小于等于当前长度的重复串了.
那么重复串的长度最多有 $O(\sqrt n)$ 种, 删去就用后缀数组实现, 枚举当前长度的分割点, 求公共前缀长度和公共后缀长度, 就是当前重复的长度了, 然后就打标记删去即可.
- #include <bits/stdc++.h>
- const int N = 1e5 + 7;
- namespace SA {
- int sa[N], rk[N], fir[N], sec[N], c[N], height[N];
- char s[N]; // 有时候可以用 int 数组防止爆 char
- int mn[N][20], lg[N];
- inline int MIN(int a, int b) {
- return height[a] <height[b] ? a : b;
- }
- void init(int len, char *str, int num = 122) {
- register int i, j, k;
- s[len + 1] = 0;
- for (i = 1; i <= len; i++) s[i] = s[2 * len + 1 - i + 1] = str[i];
- len = 2 * len + 1;
- s[len + 1] = 0;
- for (i = 1; i <= num; i++) c[i] = 0;
- for (i = 1; i <= len; i++) ++c[fir[i] = s[i]];
- for (i = 1; i <= num; i++) c[i] += c[i - 1];
- for (i = len; i>= 1; i--) sa[c[fir[i]]--] = i;
- for (k = 1; k <= len; k <<= 1) {
- int cnt = 0;
- for (i = len - k + 1; i <= len; i++) sec[++cnt] = i;
- for (i = 1; i <= len; i++) if (sa[i]> k) sec[++cnt] = sa[i] - k;
- for (i = 1; i <= num; i++) c[i] = 0;
- for (i = 1; i <= len; i++) ++c[fir[i]];
- for (i = 1; i <= num; i++) c[i] += c[i - 1];
- for (i = len; i>= 1; i--) sa[c[fir[sec[i]]]--] = sec[i], sec[i] = 0;
- std::swap(fir, sec);
- fir[sa[1]] = 1; cnt = 1;
- for (i = 2; i <= len; i++)
- fir[sa[i]] = (sec[sa[i]] == sec[sa[i - 1]] && sec[sa[i] + k] == sec[sa[i - 1] + k]) ? cnt : ++cnt;
- if (cnt == len) break;
- num = cnt;
- }
- k = 0;
- for (i = 1; i <= len; i++) rk[sa[i]] = i;
- for (i = 1; i <= len; i++) {
- if (rk[i] == 1) continue;
- if (k) k--;
- j = sa[rk[i] - 1];
- while (j + k <= len && i + k <= len && s[i + k] == s[j + k]) k++;
- height[rk[i]] = k;
- }
- for (i = 1, j = 0; i <= len; i++) {
- lg[i] = (1 <<(j + 1)) == i ? ++j : j;
- mn[i][0] = i;
- }
- for (int j = 1; j < 20; j++)
- for (int i = 1; i + (1 << j) - 1 <= len; i++)
- mn[i][j] = MIN(mn[i][j - 1], mn[i + (1 << (j - 1))][j - 1]);
- }
- inline int RMQ(int a, int b) {
- int log = lg[b - a + 1];
- b -= (1 << log) - 1;
- return MIN(mn[a][log], mn[b][log]);
- }
- inline int LCP(int i, int j) {
- i = rk[i], j = rk[j];
- if (i> j) std::swap(i, j);
- return height[RMQ(i + 1, j)];
- }
- }
- int cur, n, tag[N];
- char s[N];
- bool solve() {
- while (++cur <= n) {
- int last = 0;
- for (int i = cur, j = 2 * cur; j <= n; i += cur, j += cur) {
- int pre = SA::LCP(2 * n + 2 - i, 2 * n + 2 - j);
- int suf = SA::LCP(i, j);
- if (pre> cur) pre = cur;
- if (suf> cur) suf = cur;
- if (pre + suf - 1>= cur) {
- int from = i - pre + 1, to = i + suf - cur;
- if (to> last) {
- int pos = std::max(last + 1, from);
- last = pos + cur - 1;
- tag[last] = cur;
- }
- }
- }
- if (last) {
- for (int i = n; i>= 1; i--)
- if (tag[i]>= 2) tag[i - 1] = tag[i] - 1;
- int curn = 0;
- for (int i = 1; i <= n; i++)
- if (!tag[i]) s[++curn] = s[i];
- else tag[i] = 0;
- n = curn;
- s[n + 1] = 0;
- return 1;
- }
- }
- return 0;
- }
- int main() {
- scanf("%s", s + 1);
- n = strlen(s + 1);
- SA::init(n, s);
- while (solve()) SA::init(n, s);
- puts(s + 1);
- return 0;
- }
- View Code
来源: http://www.bubuko.com/infodetail-3402766.html