读书人

ac自动机以及它下面的DFA

发布时间: 2012-12-20 09:53:21 作者: rapoo

ac自动机以及它上面的DFA

???????? AC自动机(Aho-Corasick)主要用于多模式串的匹配问题,是前缀匹配搜索的一种方法,和KMP算法的思想类似。

??????? 总所周至,KMP算法中的关键next指针利用了模式串本身的性质,在失配时给出了重新匹配的位置。AC自动机中的fail指针也是一样,fail指针的构造关键是找到即是当点匹配串的后缀,又是trie中一个模式串的前缀的最长的字符串。

????????DFA是确定性有穷自动机,用于正则表达式的匹配,AC自动机可以用来构造DFA,将trie树中的节点看成是DFA中的状态,字符的匹配看成DFA的转化关系,再做一定的处理即可完成

??????? AC自动机本质上来说是一种基于trie树的kmp算法。下面是AC自动机和DFA实现的代码:

#include <queue>#include <cstdio>#include <cstring>#include <cassert>#include <algorithm>#include <functional>using namespace std;struct AhoCorasick {static const int UNDEF = 0;//非终结标识符static const int MAXN = 2048;static const int CHARSET = 2;int end;//节点尾int tag[MAXN];//是否为终结节点1.是0.不是int fail[MAXN];//失败指针int trie[MAXN][CHARSET];//trie树void init() {tag[0] = UNDEF;fill(trie[0], trie[0] + CHARSET, -1);end = 1;}//插入void add(int m, const int* s, int t) {int p = 0;for (int i = 0; i < m; ++i) {if (trie[p][*s] == -1) {tag[end] = UNDEF;fill(trie[end], trie[end] + CHARSET, -1);trie[p][*s] = end++;}p = trie[p][*s];++s;}tag[p] = t;}        //构造失败指针以及trie上的DFAvoid build() {queue<int> bfs;fail[0] = 0;for (int i = 0; i < CHARSET; ++i) {if (trie[0][i] != -1) {fail[trie[0][i]] = 0;bfs.push(trie[0][i]);} else {trie[0][i] = 0;}}while (!bfs.empty()) {int p = bfs.front();tag[p] |= tag[fail[p]];//bfs.pop();for (int i = 0; i < CHARSET; ++i) {if (trie[p][i] != -1) {fail[trie[p][i]] = trie[fail[p]][i];bfs.push(trie[p][i]);} else {trie[p][i] = trie[fail[p]][i];}}}}} ac;const int MAXN = 218;const long long MOD = 1000000009LL;int next[AhoCorasick::MAXN][10];long long dp[MAXN][AhoCorasick::MAXN];char str[MAXN];int num[MAXN];int _next(int p, int x) {for (int i = 3; i >= 0; --i) {p = ac.trie[p][(x & (1 << i)) ? 1 : 0];if (ac.tag[p] != AhoCorasick::UNDEF) {return -1;}}return p;}long long gao(int m, const int* s, int p) {if (p == -1) {return 0LL;} else if (m == 0) {return 1LL;} else {long long ret = 0LL;for (int i = 0; i < *s; ++i) {int q = next[p][i];if (q != -1) {ret += dp[m - 1][q];}}ret += gao(m - 1, s + 1, next[p][*s]);return ret % MOD;}}long long gao(int m, const int* s) {long long ret = 1LL;if (m > 0) {ret += gao(m - 1, s + 1, next[0][*s]);for (int i = 0; i < m; ++i) {for (int j = 1; j < (i == m - 1 ? *s : 10); ++j) {int p = next[0][j];if (p != -1) {ret += dp[i][p];}}}ret %= MOD;}return ret;}int main() {int re, n, m;long long ans;freopen("e:\\zoj\\zoj_3494.txt","r",stdin);scanf("%d", &re);for (int ri = 1; ri <= re; ++ri) {scanf("%d", &n);ac.init();for (int i = 0; i < n; ++i) {scanf("%s", str);m = strlen(str);transform(str, str + m, num, bind2nd(minus<char>(), '0'));ac.add(m, num, 1);}ac.build();for (int i = 0; i < ac.end; ++i) {for (int j = 0; j < 10; ++j) {next[i][j] = ac.tag[i] == AhoCorasick::UNDEF ? _next(i, j) : -1;}}for (int j = 0; j < ac.end; ++j) {dp[0][j] = ac.tag[j] != AhoCorasick::UNDEF ? 0 : 1;}for (int i = 1; i < MAXN; ++i) {for (int j = 0; j < ac.end; ++j) {dp[i][j] = 0;if (ac.tag[j] == AhoCorasick::UNDEF) {for (int k = 0; k < 10; ++k) {int jj = next[j][k];if (jj != -1) {dp[i][j] += dp[i - 1][jj];}}dp[i][j] %= MOD;}}}scanf("%s", str);m = strlen(str);transform(str, str + m, num, bind2nd(minus<char>(), '0'));for (int i = m - 1; i >= 0; --i) {if (num[i] == 0) {num[i] = 9;} else {--num[i];break;}}if (num[0] == 0) {ans = -gao(m - 1, num + 1);} else {ans = -gao(m, num);}scanf("%s", str);m = strlen(str);transform(str, str + m, num, bind2nd(minus<char>(), '0'));ans += gao(m, num);ans = (ans % MOD + MOD) % MOD;printf("%lld\n", ans);}assert(scanf("%d", &re) == EOF);return 0;}

?

读书人网 >编程

热点推荐