c++ string匹配有哪些性能优化技巧-乐工具技术知识

在 C++ 中，使用 std::string 进行字符串匹配时，可以采用以下性能优化技巧：

KMP 算法（Knuth-Morris-Pratt）：这是一种高效的字符串匹配算法，通过预处理模式串来避免不必要的比较。时间复杂度为 O(m+n)，其中 m 是模式串长度，n 是文本串长度。

#include 
#include 
#include 

void compute_lps(const std::string& pattern, std::vector& lps) {
    int m = pattern.length();
    int len = 0;
    lps[0] = 0;

    int i = 1;
    while (i < m) {
        if (pattern[i] == pattern[len]) {
            len++;
            lps[i] = len;
            i++;
        } else {
            if (len != 0) {
                len = lps[len - 1];
            } else {
                lps[i] = 0;
                i++;
            }
        }
    }
}

int kmp_search(const std::string& text, const std::string& pattern) {
    int n = text.length();
    int m = pattern.length();
    std::vector lps(m, 0);
    compute_lps(pattern, lps);

    int i = 0;
    int j = 0;
    while (i < n) {
        if (text[i] == pattern[j]) {
            i++;
            j++;
        }

        if (j == m) {
            std::cout << "Pattern found at index: " << i - j << std::endl;
            j = lps[j - 1];
        } else if (i < n && text[i] != pattern[j]) {
            if (j != 0) {
                j = lps[j - 1];
            } else {
                i++;
            }
        }
    }

    return -1;
}

int main() {
    std::string text = "ABABDABACDABABCABAB";
    std::string pattern = "ABABCABAB";
    int result = kmp_search(text, pattern);
    return 0;
}

Boyer-Moore 算法：这是一种高效的字符串搜索算法，通过跳过部分字符来减少比较次数。时间复杂度为 O(n+m)，其中 n 是文本串长度，m 是模式串长度。

#include 
#include 

int bad_character_heuristic(const std::string& text, const std::string& pattern) {
    int n = text.length();
    int m = pattern.length();
    std::vector last_occurrence(256, -1);

    for (int i = 0; i < m; i++) {
        last_occurrence[pattern[i]] = i;
    }

    int i = 0;
    int j = 0;
    while (i < n) {
        if (text[i] == pattern[j]) {
            i++;
            j++;
        } else {
            i += std::max(1, j - last_occurrence[text[i]]);
        }
    }

    return j == m ? i - j : -1;
}

int main() {
    std::string text = "ABABDABACDABABCABAB";
    std::string pattern = "ABABCABAB";
    int result = bad_character_heuristic(text, pattern);
    return 0;
}

Rabin-Karp 算法：这是一种基于哈希的字符串匹配算法，通过计算文本串和模式串的哈希值来快速比较它们。时间复杂度为 O(n+m)，其中 n 是文本串长度，m 是模式串长度。

#include 
#include 
#include 
#include 

uint64_t hash_function(const std::string& s, int seed = 131) {
    uint64_t hash_value = https://www.yisu.com/ask/0;"hljs">int rabin_karp_search(const std::string& text, const std::string& pattern) {
    int n = text.length();
    int m = pattern.length();
    if (m > n) {
        return -1;
    }

    uint64_t pattern_hash = hash_function(pattern);
    uint64_t text_hash = hash_function(text, pattern_hash);

    while (text_hash != pattern_hash) {
        text_hash = (text_hash - hash_function(text[0]) * pattern_hash) + hash_function(text[n - m + 1]);
        n--;
    }

    for (int i = 0; i <= n - m; i++) {
        if (text.substr(i, m) == pattern) {
            return i;
        }
    }

    return -1;
}

int main() {
    std::string text = "ABABDABACDABABCABAB";
    std::string pattern = "ABABCABAB";
    int result = rabin_karp_search(text, pattern);
    return 0;
}

根据具体应用场景和需求，可以选择合适的字符串匹配算法进行优化。