📌  相关文章
📜  字符串中子字符串的频率

📅  最后修改于: 2021-04-23 21:19:46             🧑  作者: Mango

给定一个输入字符串和一个子字符串。查找给定字符串中子字符串出现的频率。

例子:

Input : man (pattern)
        dhimanman (string)
Output : 2

Input : nn (pattern)
        Banana (String)
Output : 0

Input : man (pattern)
        dhimanman (string)
Output : 2

Input : aa (pattern)
        aaaaa (String)
Output : 4

一个简单的解决方案是一个一个地匹配字符。每当我们看到完全匹配时,我们都会增加计数。以下是基于朴素模式搜索的简单解决方案。

C++
// Simple C++ program to count occurrences
// of pat in txt.
#include
using namespace std;
  
int countFreq(string &pat, string &txt)
{
    int M = pat.length();
    int N = txt.length();
    int res = 0;
   
    /* A loop to slide pat[] one by one */
    for (int i = 0; i <= N - M; i++)
    {
        /* For current index i, check for
           pattern match */
        int j;
        for (j = 0; j < M; j++)
            if (txt[i+j] != pat[j])
                break;
  
        // if pat[0...M-1] = txt[i, i+1, ...i+M-1]
        if (j == M) 
        {
           res++;
           j = 0;
        }
    }
    return res;
}
   
/* Driver program to test above function */
int main()
{
   string txt = "dhimanman";
   string pat = "man";
   cout << countFreq(pat, txt);
   return 0;
}


Java
// Simple Java program to count occurrences
// of pat in txt.
 
class GFG {
     
    static int countFreq(String pat, String txt) {       
        int M = pat.length();       
        int N = txt.length();       
        int res = 0;
 
        /* A loop to slide pat[] one by one */
        for (int i = 0; i <= N - M; i++) {
            /* For current index i, check for
        pattern match */
            int j;           
            for (j = 0; j < M; j++) {
                if (txt.charAt(i + j) != pat.charAt(j)) {
                    break;
                }
            }
 
            // if pat[0...M-1] = txt[i, i+1, ...i+M-1]
            if (j == M) {               
                res++;               
                j = 0;               
            }           
        }       
        return res;       
    }
 
    /* Driver program to test above function */
    static public void main(String[] args) {
        String txt = "dhimanman";       
        String pat = "man";       
        System.out.println(countFreq(pat, txt));       
    }
}
 
// This code is contributed by 29AjayKumar


Python3
# Simple python program to count
# occurrences of pat in txt.
def countFreq(pat, txt):
    M = len(pat)
    N = len(txt)
    res = 0
     
    # A loop to slide pat[] one by one
    for i in range(N - M + 1):
         
        # For current index i, check
        # for pattern match
        j = 0
        while j < M:
            if (txt[i + j] != pat[j]):
                break
            j += 1
 
        if (j == M):
            res += 1
            j = 0
    return res
     
# Driver Code
if __name__ == '__main__':
    txt = "dhimanman"
    pat = "man"
    print(countFreq(pat, txt))
 
# This code is contributed
# by PrinciRaj1992


C#
// Simple C# program to count occurrences
// of pat in txt.
using System;
public class GFG {
      
    static int countFreq(String pat, String txt) {       
        int M = pat.Length;       
        int N = txt.Length;       
        int res = 0;
  
        /* A loop to slide pat[] one by one */
        for (int i = 0; i <= N - M; i++) {
            /* For current index i, check for
        pattern match */
            int j;           
            for (j = 0; j < M; j++) {
                if (txt[i + j] != pat[j]) {
                    break;
                }
            }
  
            // if pat[0...M-1] = txt[i, i+1, ...i+M-1]
            if (j == M) {               
                res++;               
                j = 0;               
            }           
        }       
        return res;       
    }
  
    /* Driver program to test above function */
    static public void Main() {
        String txt = "dhimanman";       
        String pat = "man";       
        Console.Write(countFreq(pat, txt));       
    }
}
  
// This code is contributed by 29AjayKumar


PHP


Javascript


Java
// C++ program to count occurrences
// of pattern in a text.
#include 
using namespace std;
 
void computeLPSArray(string pat, int M,
                                 int lps[])
{
     
    // Length of the previous longest
    // prefix suffix
    int len = 0;
    int i = 1;
    lps[0] = 0; // lps[0] is always 0
 
    // The loop calculates lps[i] for
    // i = 1 to M-1
    while (i < M)
    {
        if (pat[i] == pat[len])
        {
            len++;
            lps[i] = len;
            i++;
        }
        else // (pat[i] != pat[len])
        {
             
            // This is tricky. Consider the example.
            // AAACAAAA and i = 7. The idea is similar
            // to search step.
            if (len != 0)
            {
                len = lps[len - 1];
 
                // Also, note that we do not
                // increment i here
            }
            else // if (len == 0)
            {
                lps[i] = len;
                i++;
            }
        }
    }
}
 
int KMPSearch(string pat, string txt)
{
    int M = pat.length();
    int N = txt.length();
 
    // Create lps[] that will hold the longest
    // prefix suffix values for pattern
    int lps[M];
    int j = 0; // index for pat[]
 
    // Preprocess the pattern (calculate lps[]
    // array)
    computeLPSArray(pat, M, lps);
 
    int i = 0; // index for txt[]
    int res = 0;
    int next_i = 0;
 
    while (i < N)
    {
        if (pat[j] == txt[i])
        {
            j++;
            i++;
        }
        if (j == M)
        {
             
            // When we find pattern first time,
            // we iterate again to check if there
            // exists more pattern
            j = lps[j - 1];
            res++;
        }
 
        // Mismatch after j matches
        else if (i < N && pat[j] != txt[i])
        {
             
            // Do not match lps[0..lps[j-1]]
            // characters, they will match anyway
            if (j != 0)
                j = lps[j - 1];
            else
                i = i + 1;
        }
    }
    return res;
}
 
// Driver code
int main()
{
    string txt = "geeksforgeeks";
    string pat = "eeks";
    int ans = KMPSearch(pat, txt);
     
    cout << ans;
     
    return 0;
}
 
// This code is contributed by akhilsaini


Java
// Java program to count occurrences of pattern
// in a text.
class KMP_String_Matching
{
    int KMPSearch(String pat, String txt)
    {
        int M = pat.length();
        int N = txt.length();
 
        // create lps[] that will hold the longest
        // prefix suffix values for pattern
        int lps[] = new int[M];
        int j = 0; // index for pat[]
 
        // Preprocess the pattern (calculate lps[]
        // array)
        computeLPSArray(pat,M,lps);
 
        int i = 0; // index for txt[]
        int res = 0;
        int next_i = 0;
         
        while (i < N)
        {
            if (pat.charAt(j) == txt.charAt(i))
            {
                j++;
                i++;
            }
            if (j == M)
            {
                // When we find pattern first time,
                // we iterate again to check if there
                // exists more pattern
                j = lps[j-1];
                res++;
 
                // We start i to check for more than once
                // appearance of pattern, we will reset i
                // to previous start+1
                if (lps[j]!=0)
                    i = ++next_i;
                j = 0;
            }
 
            // mismatch after j matches
            else if (i < N && pat.charAt(j) != txt.charAt(i))
            {
                // Do not match lps[0..lps[j-1]] characters,
                // they will match anyway
                if (j != 0)
                    j = lps[j-1];
                else
                    i = i+1;
            }
        }
        return res;
    }
 
    void computeLPSArray(String pat, int M, int lps[])
    {
        // length of the previous longest prefix suffix
        int len = 0;
        int i = 1;
        lps[0] = 0; // lps[0] is always 0
 
        // the loop calculates lps[i] for i = 1 to M-1
        while (i < M)
        {
            if (pat.charAt(i) == pat.charAt(len))
            {
                len++;
                lps[i] = len;
                i++;
            }
            else // (pat[i] != pat[len])
            {
                // This is tricky. Consider the example.
                // AAACAAAA and i = 7. The idea is similar
                // to search step.
                if (len != 0)
                {
                    len = lps[len-1];
 
                    // Also, note that we do not increment
                    // i here
                }
                else // if (len == 0)
                {
                    lps[i] = len;
                    i++;
                }
            }
        }
    }
 
    // Driver program to test above function
    public static void main(String args[])
    {
        String txt = "geeksforgeeks";
        String pat = "eeks";
        int ans = new KMP_String_Matching().KMPSearch(pat,txt);
        System.out.println(ans);
    }
}


Python3
# Python3 program to count occurrences of
# pattern in a text.
def KMPSearch(pat, txt):
     
    M = len(pat)
    N = len(txt)
 
    # Create lps[] that will hold the longest
    # prefix suffix values for pattern
    lps = [None] * M
    j = 0  # index for pat[]
 
    # Preprocess the pattern (calculate lps[]
    # array)
    computeLPSArray(pat, M, lps)
 
    i = 0  # index for txt[]
    res = 0
    next_i = 0
 
    while (i < N):
        if pat[j] == txt[i]:
            j = j + 1
            i = i + 1
        if j == M:
             
            # When we find pattern first time,
            # we iterate again to check if there
            # exists more pattern
            j = lps[j - 1]
            res = res + 1
 
            # We start i to check for more than once
            # appearance of pattern, we will reset i
            # to previous start+1
            if lps[j] != 0:
                next_i = next_i + 1
                i = next_i
                j = 0
 
        # Mismatch after j matches
        elif ((i < N) and (pat[j] != txt[i])):
             
            # Do not match lps[0..lps[j-1]]
            # characters, they will match anyway
            if (j != 0):
                j = lps[j - 1]
            else:
                i = i + 1
                 
    return res
     
def computeLPSArray(pat, M, lps):
     
    # Length of the previous longest
    # prefix suffix
    len = 0
    i = 1
    lps[0] = 0  # lps[0] is always 0
 
    # The loop calculates lps[i] for
    # i = 1 to M-1
    while (i < M):
        if pat[i] == pat[len]:
            len = len + 1
            lps[i] = len
            i = i + 1
             
        else:  # (pat[i] != pat[len])
         
            # This is tricky. Consider the example.
            # AAACAAAA and i = 7. The idea is similar
            # to search step.
            if len != 0:
                len = lps[len - 1]
                 
                # Also, note that we do not increment
                # i here
 
            else:  # if (len == 0)
                lps[i] = len
                i = i + 1
 
# Driver code
if __name__ == "__main__":
     
    txt = "geeksforgeeks"
    pat = "eeks"
    ans = KMPSearch(pat, txt)
     
    print(ans)
 
# This code is contributed by akhilsaini


C#
// C# program to count occurrences of pattern
// in a text.
using System;
     
public class KMP_String_Matching
{
    int KMPSearch(String pat, String txt)
    {
        int M = pat.Length;
        int N = txt.Length;
 
        // create lps[] that will hold the longest
        // prefix suffix values for pattern
        int []lps = new int[M];
        int j = 0; // index for pat[]
 
        // Preprocess the pattern (calculate lps[]
        // array)
        computeLPSArray(pat,M,lps);
 
        int i = 0; // index for txt[]
        int res = 0;
        int next_i = 0;
         
        while (i < N)
        {
            if (pat[j] == txt[i])
            {
                j++;
                i++;
            }
            if (j == M)
            {
                // When we find pattern first time,
                // we iterate again to check if there
                // exists more pattern
                j = lps[j-1];
                res++;
 
                // We start i to check for more than once
                // appearance of pattern, we will reset i
                // to previous start+1
                if (lps[j]!=0)
                    i = ++next_i;
                j = 0;
            }
 
            // mismatch after j matches
            else if (i < N && pat[j] != txt[i])
            {
                // Do not match lps[0..lps[j-1]] characters,
                // they will match anyway
                if (j != 0)
                    j = lps[j-1];
                else
                    i = i+1;
            }
        }
        return res;
    }
 
    void computeLPSArray(String pat, int M, int []lps)
    {
        // length of the previous longest prefix suffix
        int len = 0;
        int i = 1;
        lps[0] = 0; // lps[0] is always 0
 
        // the loop calculates lps[i] for i = 1 to M-1
        while (i < M)
        {
            if (pat[i] == pat[len])
            {
                len++;
                lps[i] = len;
                i++;
            }
            else // (pat[i] != pat[len])
            {
                // This is tricky. Consider the example.
                // AAACAAAA and i = 7. The idea is similar
                // to search step.
                if (len != 0)
                {
                    len = lps[len-1];
 
                    // Also, note that we do not increment
                    // i here
                }
                else // if (len == 0)
                {
                    lps[i] = len;
                    i++;
                }
            }
        }
    }
 
    // Driver code
    public static void Main(String []args)
    {
        String txt = "geeksforgeeks";
        String pat = "eeks";
        int ans = new KMP_String_Matching().KMPSearch(pat,txt);
        Console.WriteLine(ans);
    }
}
 
// This code is contributed by Princi Singh


输出 :

2

时间复杂度: O(M * N)

一个有效的解决方案是使用KMP算法。

Java

// C++ program to count occurrences
// of pattern in a text.
#include 
using namespace std;
 
void computeLPSArray(string pat, int M,
                                 int lps[])
{
     
    // Length of the previous longest
    // prefix suffix
    int len = 0;
    int i = 1;
    lps[0] = 0; // lps[0] is always 0
 
    // The loop calculates lps[i] for
    // i = 1 to M-1
    while (i < M)
    {
        if (pat[i] == pat[len])
        {
            len++;
            lps[i] = len;
            i++;
        }
        else // (pat[i] != pat[len])
        {
             
            // This is tricky. Consider the example.
            // AAACAAAA and i = 7. The idea is similar
            // to search step.
            if (len != 0)
            {
                len = lps[len - 1];
 
                // Also, note that we do not
                // increment i here
            }
            else // if (len == 0)
            {
                lps[i] = len;
                i++;
            }
        }
    }
}
 
int KMPSearch(string pat, string txt)
{
    int M = pat.length();
    int N = txt.length();
 
    // Create lps[] that will hold the longest
    // prefix suffix values for pattern
    int lps[M];
    int j = 0; // index for pat[]
 
    // Preprocess the pattern (calculate lps[]
    // array)
    computeLPSArray(pat, M, lps);
 
    int i = 0; // index for txt[]
    int res = 0;
    int next_i = 0;
 
    while (i < N)
    {
        if (pat[j] == txt[i])
        {
            j++;
            i++;
        }
        if (j == M)
        {
             
            // When we find pattern first time,
            // we iterate again to check if there
            // exists more pattern
            j = lps[j - 1];
            res++;
        }
 
        // Mismatch after j matches
        else if (i < N && pat[j] != txt[i])
        {
             
            // Do not match lps[0..lps[j-1]]
            // characters, they will match anyway
            if (j != 0)
                j = lps[j - 1];
            else
                i = i + 1;
        }
    }
    return res;
}
 
// Driver code
int main()
{
    string txt = "geeksforgeeks";
    string pat = "eeks";
    int ans = KMPSearch(pat, txt);
     
    cout << ans;
     
    return 0;
}
 
// This code is contributed by akhilsaini

Java

// Java program to count occurrences of pattern
// in a text.
class KMP_String_Matching
{
    int KMPSearch(String pat, String txt)
    {
        int M = pat.length();
        int N = txt.length();
 
        // create lps[] that will hold the longest
        // prefix suffix values for pattern
        int lps[] = new int[M];
        int j = 0; // index for pat[]
 
        // Preprocess the pattern (calculate lps[]
        // array)
        computeLPSArray(pat,M,lps);
 
        int i = 0; // index for txt[]
        int res = 0;
        int next_i = 0;
         
        while (i < N)
        {
            if (pat.charAt(j) == txt.charAt(i))
            {
                j++;
                i++;
            }
            if (j == M)
            {
                // When we find pattern first time,
                // we iterate again to check if there
                // exists more pattern
                j = lps[j-1];
                res++;
 
                // We start i to check for more than once
                // appearance of pattern, we will reset i
                // to previous start+1
                if (lps[j]!=0)
                    i = ++next_i;
                j = 0;
            }
 
            // mismatch after j matches
            else if (i < N && pat.charAt(j) != txt.charAt(i))
            {
                // Do not match lps[0..lps[j-1]] characters,
                // they will match anyway
                if (j != 0)
                    j = lps[j-1];
                else
                    i = i+1;
            }
        }
        return res;
    }
 
    void computeLPSArray(String pat, int M, int lps[])
    {
        // length of the previous longest prefix suffix
        int len = 0;
        int i = 1;
        lps[0] = 0; // lps[0] is always 0
 
        // the loop calculates lps[i] for i = 1 to M-1
        while (i < M)
        {
            if (pat.charAt(i) == pat.charAt(len))
            {
                len++;
                lps[i] = len;
                i++;
            }
            else // (pat[i] != pat[len])
            {
                // This is tricky. Consider the example.
                // AAACAAAA and i = 7. The idea is similar
                // to search step.
                if (len != 0)
                {
                    len = lps[len-1];
 
                    // Also, note that we do not increment
                    // i here
                }
                else // if (len == 0)
                {
                    lps[i] = len;
                    i++;
                }
            }
        }
    }
 
    // Driver program to test above function
    public static void main(String args[])
    {
        String txt = "geeksforgeeks";
        String pat = "eeks";
        int ans = new KMP_String_Matching().KMPSearch(pat,txt);
        System.out.println(ans);
    }
}

Python3

# Python3 program to count occurrences of
# pattern in a text.
def KMPSearch(pat, txt):
     
    M = len(pat)
    N = len(txt)
 
    # Create lps[] that will hold the longest
    # prefix suffix values for pattern
    lps = [None] * M
    j = 0  # index for pat[]
 
    # Preprocess the pattern (calculate lps[]
    # array)
    computeLPSArray(pat, M, lps)
 
    i = 0  # index for txt[]
    res = 0
    next_i = 0
 
    while (i < N):
        if pat[j] == txt[i]:
            j = j + 1
            i = i + 1
        if j == M:
             
            # When we find pattern first time,
            # we iterate again to check if there
            # exists more pattern
            j = lps[j - 1]
            res = res + 1
 
            # We start i to check for more than once
            # appearance of pattern, we will reset i
            # to previous start+1
            if lps[j] != 0:
                next_i = next_i + 1
                i = next_i
                j = 0
 
        # Mismatch after j matches
        elif ((i < N) and (pat[j] != txt[i])):
             
            # Do not match lps[0..lps[j-1]]
            # characters, they will match anyway
            if (j != 0):
                j = lps[j - 1]
            else:
                i = i + 1
                 
    return res
     
def computeLPSArray(pat, M, lps):
     
    # Length of the previous longest
    # prefix suffix
    len = 0
    i = 1
    lps[0] = 0  # lps[0] is always 0
 
    # The loop calculates lps[i] for
    # i = 1 to M-1
    while (i < M):
        if pat[i] == pat[len]:
            len = len + 1
            lps[i] = len
            i = i + 1
             
        else:  # (pat[i] != pat[len])
         
            # This is tricky. Consider the example.
            # AAACAAAA and i = 7. The idea is similar
            # to search step.
            if len != 0:
                len = lps[len - 1]
                 
                # Also, note that we do not increment
                # i here
 
            else:  # if (len == 0)
                lps[i] = len
                i = i + 1
 
# Driver code
if __name__ == "__main__":
     
    txt = "geeksforgeeks"
    pat = "eeks"
    ans = KMPSearch(pat, txt)
     
    print(ans)
 
# This code is contributed by akhilsaini

C#

// C# program to count occurrences of pattern
// in a text.
using System;
     
public class KMP_String_Matching
{
    int KMPSearch(String pat, String txt)
    {
        int M = pat.Length;
        int N = txt.Length;
 
        // create lps[] that will hold the longest
        // prefix suffix values for pattern
        int []lps = new int[M];
        int j = 0; // index for pat[]
 
        // Preprocess the pattern (calculate lps[]
        // array)
        computeLPSArray(pat,M,lps);
 
        int i = 0; // index for txt[]
        int res = 0;
        int next_i = 0;
         
        while (i < N)
        {
            if (pat[j] == txt[i])
            {
                j++;
                i++;
            }
            if (j == M)
            {
                // When we find pattern first time,
                // we iterate again to check if there
                // exists more pattern
                j = lps[j-1];
                res++;
 
                // We start i to check for more than once
                // appearance of pattern, we will reset i
                // to previous start+1
                if (lps[j]!=0)
                    i = ++next_i;
                j = 0;
            }
 
            // mismatch after j matches
            else if (i < N && pat[j] != txt[i])
            {
                // Do not match lps[0..lps[j-1]] characters,
                // they will match anyway
                if (j != 0)
                    j = lps[j-1];
                else
                    i = i+1;
            }
        }
        return res;
    }
 
    void computeLPSArray(String pat, int M, int []lps)
    {
        // length of the previous longest prefix suffix
        int len = 0;
        int i = 1;
        lps[0] = 0; // lps[0] is always 0
 
        // the loop calculates lps[i] for i = 1 to M-1
        while (i < M)
        {
            if (pat[i] == pat[len])
            {
                len++;
                lps[i] = len;
                i++;
            }
            else // (pat[i] != pat[len])
            {
                // This is tricky. Consider the example.
                // AAACAAAA and i = 7. The idea is similar
                // to search step.
                if (len != 0)
                {
                    len = lps[len-1];
 
                    // Also, note that we do not increment
                    // i here
                }
                else // if (len == 0)
                {
                    lps[i] = len;
                    i++;
                }
            }
        }
    }
 
    // Driver code
    public static void Main(String []args)
    {
        String txt = "geeksforgeeks";
        String pat = "eeks";
        int ans = new KMP_String_Matching().KMPSearch(pat,txt);
        Console.WriteLine(ans);
    }
}
 
// This code is contributed by Princi Singh

输出:

2

时间复杂度: O(M + N)

https://youtu.be/agteFWnjJ