Written by nodejs-style
on 2021-11-14

[C++] 문자열

문자열

KMP 알고리즘

https://bowbowbow.tistory.com/6

https://www.acmicpc.net/problem/1786

#include #include #include using namespace std; vector kmp(string T, string P) { vector answer; int n = T.size(), m = P.size(); vector pi(m, 0); // '접두사 == 접미사'의 길이 for (int i = 1, j = 0; i < m; i++) { while (j > 0 && P[i] != P[j]) j = pi[j - 1]; if (P[i] == P[j]) pi[i] = ++j; } for (int i = 0, j = 0; i < n; i++) { while (j > 0 && T[i] != P[j]) j = pi[j - 1]; if (T[i] == P[j]) { if (j == m - 1) { answer.push_back(i - m + 2); j = pi[j]; } else { j++; } } } return answer; } int main() { string T, P; getline(cin, T); getline(cin, P); vector answer = kmp(T, P); cout << answer.size() << "

"; for (auto i : answer) cout << i << "

"; }

맨버-마이어스 알고리즘

https://hellogaon.tistory.com/18

https://www.acmicpc.net/problem/9248

#include #include #include using namespace std; int suffix[500000], lcp[500000]; int g[500000], tg[500000]; int tmp[500000]; string s; int n, t; bool compare(int a, int b) { if (g[a] == g[b]) return g[a + t] < g[b + t]; return g[a] < g[b]; } void SuffixArray() { t = 1; n = s.length(); for (int i = 0; i < n; i++) { suffix[i] = i; g[i] = s[i] - 'a'; } while (t <= n) { g[n] = -1; sort(suffix, suffix + n, compare); tg[suffix[0]] = 0; for (int i = 1; i < n; i++) { if (compare(suffix[i - 1], suffix[i])) tg[suffix[i]] = tg[suffix[i - 1]] + 1; else tg[suffix[i]] = tg[suffix[i - 1]]; } for (int i = 0; i < n; i++) g[i] = tg[i]; t *= 2; } } void LCP() { for (int i = 0; i < n; i++) tmp[suffix[i]] = i; int len = 0; for (int i = 0; i < n; i++) { if (tmp[i]) { int j = suffix[tmp[i] - 1]; while (s[j + len] == s[i + len]) len++; lcp[tmp[i]] = len; if (len) len--; } } } int main() { cin >> s; SuffixArray(); LCP(); for (int i = 0; i < n; i++) cout << suffix[i] + 1 << " "; cout << "

x "; for (int i = 1; i < n; i++) cout << lcp[i] << " "; }

아호 코라식 알고리즘(Aho-Corasick Algorithm)

https://m.blog.naver.com/kks227/220992598966

https://www.acmicpc.net/problem/9250

#include #include #include #include using namespace std; struct Trie { Trie* go[26]; // 현재 노드에서 해당 문자를 받으면 가는 노드 Trie* fail; // 현재 노드에서 해당 문자의 go 목적지가 없을 때 가는 노드 bool output; // 현재 노드에 도달하면 찾는 문자열 집합 있는지 판단 Trie() { fill(go, go + 26, nullptr); output = false; } ~Trie() { for (int i = 0; i < 26; i++) if (go[i]) delete go[i]; } void insert(string key) { if (key == "\0") { output = true; return; } int next = key[0] - 'a'; if (!go[next]) go[next] = new Trie; // 맨 앞 문자는 제외하고 전달한다. go[next]->insert(key.substr(1, key.size() - 1)); } }; int main() { int N, M; string str; Trie* root = new Trie; cin >> N; for (int i = 0; i < N; i++) { cin >> str; // 트라이에 S의 원소들을 모두 집어넣는다. root->insert(str); } // 트라이 노드를 방문하며 fail 함수를 만든다. queue q; root->fail = root; q.push(root); while (!q.empty()) { Trie* current = q.front(); q.pop(); // 26개의 input 각각에 대해 처리한다. for (int i = 0; i < 26; i++) { Trie* next = current->go[i]; if (!next) continue; // 루트의 fail은 루트다. if (current == root) next->fail = root; else { Trie* dest = current->fail; // fail을 참조할 가장 가까운 조상을 찾아간다. while (dest != root && !dest->go[i]) dest = dest->fail; // fail(px) = go(fail(p), x) if (dest->go[i]) dest = dest->go[i]; next->fail = dest; } // fail(x) = y일 때, output(y) ⊂ output(x) if (next->fail->output) next->output = true; // 큐에 다음 노드 push q.push(next); } } cin >> M; for (int i = 0; i < M; i++) { cin >> str; Trie* current = root; bool result = false; for (int j = 0; str[j]; j++) { int next = str[j] - 'a'; // 현재 노드에서 갈 수 없으면 fail을 계속 따라감 while (current != root && !current->go[next]) current = current->fail; // go 함수가 존재하면 이동. 루트면 이게 false일 수도 있다 if (current->go[next]) current = current->go[next]; // 현재 노드에 output이 있으면 찾은 것이다. if (current->output) { result = true; break; } } cout << (result ? "YES" : "NO") << endl; } }

라빈 카프 알고리즘(Rabin-Karp Algorithm)

https://m.blog.naver.com/kks227/220927272165

https://www.acmicpc.net/problem/3033

#include #include #include using namespace std; const int HASH_SIZE = 100003; int Mod(long long x) { if (x >= 0) return x % HASH_SIZE; return ((-x / HASH_SIZE + 1) * HASH_SIZE + x) % HASH_SIZE; } int main() { int L; string str; cin >> L >> str; int start = 0, end = L; while (start + 1 < end) { vector hash[HASH_SIZE]; int mid = (start + end) / 2, h = 0, po = 1; bool found = false; // 해시값(h) 만들기 for (int i = 0; i <= L - mid; i++) { if (i == 0) { for (int j = 0; j < mid; j++) { h = Mod(h + 1LL * str[mid - j - 1] * po); if (j < mid - 1) po = Mod(po * 2); } } else h = Mod(2 * (h - 1LL * str[i - 1] * po) + str[i + mid - 1]); if (!hash[h].empty()) { // 해시 충돌일 때 str의 [i, i+m-1]와 [p, p+mid-1]의 문자열이 같은지 비교 for (int p : hash[h]) { for (int j = 0; j < mid; j++) { if (str[i + j] != str[p + j]) break; if (j == mid - 1) found = true; } } } if (!found) hash[h].push_back(i); } if (found) start = mid; // 찾았으면 더 높은 길이 탐색 else end = mid; // 없으면 더 낮은 길이 탐색 } cout << start; }

from http://tech-interview.tistory.com/172 by ccl(A) rewrite - 2021-11-14 20:27:02

Top