📌  相关文章
📜  计算一个字符串恰好发生K次的M长度子字符串

📅  最后修改于: 2021-04-17 18:31:25             🧑  作者: Mango

给定长度为N的字符串S和两个整数MK ,任务是计算长度为M的子字符串在字符串S中正好发生K次的次数。

例子:

天真的方法:最简单的方法是生成所有长度为M的子字符串,并将每个子字符串的频率存储在Map中的字符串S中。现在,遍历Map,如果频率等于K ,则将count递增1 。完成上述步骤后,将打印计数作为结果。
时间复杂度: O((N – M)* N * M)
辅助空间: O(N – M)

高效方法:上述方法可以通过使用算法KMP用于找到字符串中的子串的频率进行优化。请按照以下步骤解决问题:

  • 初始化一个变量,例如count0 ,以存储所需子字符串的数量。
  • 生成所有长度为M的子字符串 从字符串S中插入到数组中,例如arr []。
  • 遍历数组arr [] ,对于数组中的每个字符串,使用KMP算法计算其在字符串S中的频率。
  • 如果字符串的频率等于P ,则将计数增加1
  • 完成上述步骤后,将count的值打印为子字符串的结果计数。

下面是上述方法的实现:

C++
// C++ program for the above approach
#include 
using namespace std;
 
// Function to compute the LPS array
void computeLPSArray(string pat, int M,
                     int lps[])
{
    // Length of the previous
    // longest prefix suffix
    int len = 0;
    int i = 1;
    lps[0] = 0;
 
    // Iterate from [1, M - 1] to find lps[i]
    while (i < M) {
 
        // If the characters match
        if (pat[i] == pat[len]) {
 
            len++;
            lps[i] = len;
            i++;
        }
 
        // If pat[i] != pat[len]
        else {
 
            // If length is non-zero
            if (len != 0) {
                len = lps[len - 1];
 
                // Also, note that i is
                // not incremented here
            }
 
            // Otherwise
            else {
                lps[i] = len;
                i++;
            }
        }
    }
}
 
// Function to find the frequency of
// pat in the string txt
int KMPSearch(string pat, string txt)
{
    // Stores length of both strings
    int M = pat.length();
    int N = txt.length();
 
    // Initialize lps[] to store the
    // longest prefix suffix values
    // for the string pattern
    int lps[M];
 
    // Store the index for pat[]
    int j = 0;
 
    // Preprocess the pattern
    // (calculate lps[] array)
    computeLPSArray(pat, M, lps);
 
    // Store the index for txt[]
    int i = 0;
    int res = 0;
    int next_i = 0;
 
    while (i < N) {
        if (pat[j] == txt[i]) {
            j++;
            i++;
        }
        if (j == M) {
 
            // If pattern is found the
            // first time, iterate again
            // to check for more patterns
            j = lps[j - 1];
            res++;
 
            // Start i to check for more
            // than once occurrence
            // of pattern, reset i to
            // previous start + 1
            if (lps[j] != 0)
                i = ++next_i;
            j = 0;
        }
 
        // Mismatch after j matches
        else if (i < N
                 && pat[j] != txt[i]) {
 
            // Do not match lps[0..lps[j-1]]
            // characters, they will
            // match anyway
            if (j != 0)
                j = lps[j - 1];
            else
                i = i + 1;
        }
    }
 
    // Return the required frequency
    return res;
}
 
// Function to find count of substrings
// of length M occurring exactly P times
// in the string, S
void findCount(string& S, int M, int P)
{
 
    // Store all substrings of length M
    set vec;
 
    // Store the size of the string, S
    int n = S.length();
 
    // Pick starting point
    for (int i = 0; i < n; i++) {
 
        // Pick ending point
        for (int len = 1;
             len <= n - i; len++) {
 
            // If the substring is of
            // length M, insert it in vec
            string s = S.substr(i, len);
            if (s.length() == M) {
                vec.insert(s);
            }
        }
    }
 
    // Initialise count as 0 to store
    // the required count of substrings
    int count = 0;
 
    // Iterate through the set of
    // substrings
    for (auto it : vec) {
 
        // Store its frequency
        int ans = KMPSearch(it, S);
 
        // If frequency is equal to P
        if (ans == P) {
 
            // Increment count by 1
            count++;
        }
    }
 
    // Print the answer
    cout << count;
}
 
// Driver Code
int main()
{
    string S = "abacaba";
    int M = 3, P = 2;
 
    // Function Call
    findCount(S, M, P);
 
    return 0;
}


Java
// Java Program to implement
// the above approach
 
import java.io.*;
import java.util.*;
 
class GFG {
 
    // Function to compute the LPS array
    static void computeLPSArray(String pat, int M,
                                int lps[])
    {
        // Length of the previous
        // longest prefix suffix
        int len = 0;
        int i = 1;
        lps[0] = 0;
 
        // Iterate from [1, M - 1] to find lps[i]
        while (i < M) {
 
            // If the characters match
            if (pat.charAt(i) == pat.charAt(len)) {
 
                len++;
                lps[i] = len;
                i++;
            }
 
            // If pat[i] != pat[len]
            else {
 
                // If length is non-zero
                if (len != 0) {
                    len = lps[len - 1];
 
                    // Also, note that i is
                    // not incremented here
                }
 
                // Otherwise
                else {
                    lps[i] = len;
                    i++;
                }
            }
        }
    }
 
    // Function to find the frequency of
    // pat in the string txt
    static int KMPSearch(String pat, String txt)
    {
        // Stores length of both strings
        int M = pat.length();
        int N = txt.length();
 
        // Initialize lps[] to store the
        // longest prefix suffix values
        // for the string pattern
        int lps[] = new int[M];
 
        // Store the index for pat[]
        int j = 0;
 
        // Preprocess the pattern
        // (calculate lps[] array)
        computeLPSArray(pat, M, lps);
 
        // Store the index for txt[]
        int i = 0;
        int res = 0;
        int next_i = 0;
 
        while (i < N) {
            if (pat.charAt(j) == txt.charAt(i)) {
                j++;
                i++;
            }
            if (j == M) {
 
                // If pattern is found the
                // first time, iterate again
                // to check for more patterns
                j = lps[j - 1];
                res++;
 
                // Start i to check for more
                // than once occurrence
                // of pattern, reset i to
                // previous start + 1
                if (lps[j] != 0)
                    i = ++next_i;
                j = 0;
            }
 
            // Mismatch after j matches
            else if (i < N
                     && pat.charAt(j) != txt.charAt(i)) {
 
                // Do not match lps[0..lps[j-1]]
                // characters, they will
                // match anyway
                if (j != 0)
                    j = lps[j - 1];
                else
                    i = i + 1;
            }
        }
 
        // Return the required frequency
        return res;
    }
 
    // Function to find count of substrings
    // of length M occurring exactly P times
    // in the string, S
    static void findCount(String S, int M, int P)
    {
 
        // Store all substrings of length M
        // set vec;
        TreeSet vec = new TreeSet<>();
 
        // Store the size of the string, S
        int n = S.length();
 
        // Pick starting point
        for (int i = 0; i < n; i++) {
 
            // Pick ending point
            for (int len = 1; len <= n - i; len++) {
 
                // If the substring is of
                // length M, insert it in vec
                String s = S.substring(i, i + len);
                if (s.length() == M) {
                    vec.add(s);
                }
            }
        }
 
        // Initialise count as 0 to store
        // the required count of substrings
        int count = 0;
 
        // Iterate through the set of
        // substrings
        for (String it : vec) {
 
            // Store its frequency
            int ans = KMPSearch(it, S);
 
            // If frequency is equal to P
            if (ans == P) {
 
                // Increment count by 1
                count++;
            }
        }
 
        // Print the answer
        System.out.println(count);
    }
 
    // Driver Code
    public static void main(String[] args)
    {
 
        String S = "abacaba";
        int M = 3, P = 2;
 
        // Function Call
        findCount(S, M, P);
    }
}
 
// This code is contributed by kingash.


Python3
# Python 3 program for the above approach
 
# Function to compute the LPS array
def computeLPSArray(pat, M, lps):
   
    # Length of the previous
    # longest prefix suffix
    len1 = 0
    i = 1
    lps[0] = 0
 
    # Iterate from [1, M - 1] to find lps[i]
    while (i < M):
       
        # If the characters match
        if (pat[i] == pat[len1]):
            len1 += 1
            lps[i] = len1
            i += 1
 
        # If pat[i] != pat[len]
        else:
            # If length is non-zero
            if (len1 != 0):
                len1 = lps[len1 - 1]
 
                # Also, note that i is
                # not incremented here
 
            # Otherwise
            else:
                lps[i] = len1
                i += 1
 
# Function to find the frequency of
# pat in the string txt
def KMPSearch(pat, txt):
   
    # Stores length of both strings
    M = len(pat)
    N = len(txt)
 
    # Initialize lps[] to store the
    # longest prefix suffix values
    # for the string pattern
    lps = [0 for i in range(M)]
 
    # Store the index for pat[]
    j = 0
 
    # Preprocess the pattern
    # (calculate lps[] array)
    computeLPSArray(pat, M, lps)
 
    # Store the index for txt[]
    i = 0
    res = 0
    next_i = 0
 
    while (i < N):
        if (pat[j] == txt[i]):
            j += 1
            i += 1
        if (j == M):
           
            # If pattern is found the
            # first time, iterate again
            # to check for more patterns
            j = lps[j - 1]
            res += 1
 
            # Start i to check for more
            # than once occurrence
            # of pattern, reset i to
            # previous start + 1
            if (lps[j] != 0):
                next_i += 1
                i = next_i
            j = 0
 
        # Mismatch after j matches
        elif (i < N and pat[j] != txt[i]):
            # Do not match lps[0..lps[j-1]]
            # characters, they will
            # match anyway
            if (j != 0):
                j = lps[j - 1]
            else:
                i = i + 1
 
    # Return the required frequency
    return res
 
# Function to find count of substrings
# of length M occurring exactly P times
# in the string, S
def findCount(S, M, P):
   
    # Store all substrings of length M
    vec = set()
 
    # Store the size of the string, S
    n = len(S)
 
    # Pick starting point
    for i in range(n):
       
        # Pick ending point
        for len1 in range(n - i + 1):
           
            # If the substring is of
            # length M, insert it in vec
            s = S[i:len1]
             
          #  if (len1(s) == M):
           #     vec.add(s)
 
    # Initialise count as 0 to store
    # the required count of substrings
    count = 1
 
    # Iterate through the set of
    # substrings
    for it in vec:
       
        # Store its frequency
        ans = KMPSearch(it, S)
 
        # If frequency is equal to P
        if (ans == P):
           
            # Increment count by 1
            count += 1
 
    # Print the answer
    print(count)
 
# Driver Code
if __name__ == '__main__':
    S = "abacaba"
    M = 3
    P = 2
 
    # Function Call
    findCount(S, M, P)
     
    # This code is contributed by ipg2016107.


C#
// C# program for the above approach
using System;
using System.Collections.Generic;
class GFG
{
 
  // Function to compute the LPS array
  static void computeLPSArray(string pat, int M, int[] lps)
  {
 
    // Length of the previous
    // longest prefix suffix
    int len = 0;
    int i = 1;
    lps[0] = 0;
 
    // Iterate from [1, M - 1] to find lps[i]
    while (i < M)
    {
 
      // If the characters match
      if (pat[i] == pat[len])
      {
        len++;
        lps[i] = len;
        i++;
      }
 
      // If pat[i] != pat[len]
      else {
 
        // If length is non-zero
        if (len != 0) {
          len = lps[len - 1];
 
          // Also, note that i is
          // not incremented here
        }
 
        // Otherwise
        else {
          lps[i] = len;
          i++;
        }
      }
    }
  }
 
  // Function to find the frequency of
  // pat in the string txt
  static int KMPSearch(string pat, string txt)
  {
 
    // Stores length of both strings
    int M = pat.Length;
    int N = txt.Length;
 
    // Initialize lps[] to store the
    // longest prefix suffix values
    // for the string pattern
    int[] lps = new int[M];
 
    // Store the index for pat[]
    int j = 0;
 
    // Preprocess the pattern
    // (calculate lps[] array)
    computeLPSArray(pat, M, lps);
 
    // Store the index for txt[]
    int i = 0;
    int res = 0;
    int next_i = 0;
 
    while (i < N) {
      if (pat[j] == txt[i]) {
        j++;
        i++;
      }
      if (j == M) {
 
        // If pattern is found the
        // first time, iterate again
        // to check for more patterns
        j = lps[j - 1];
        res++;
 
        // Start i to check for more
        // than once occurrence
        // of pattern, reset i to
        // previous start + 1
        if (lps[j] != 0)
          i = ++next_i;
        j = 0;
      }
 
      // Mismatch after j matches
      else if (i < N
               && pat[j] != txt[i]) {
 
        // Do not match lps[0..lps[j-1]]
        // characters, they will
        // match anyway
        if (j != 0)
          j = lps[j - 1];
        else
          i = i + 1;
      }
    }
 
    // Return the required frequency
    return res;
  }
 
  // Function to find count of substrings
  // of length M occurring exactly P times
  // in the string, S
  static void findCount(string S, int M, int P)
  {
 
    // Store all substrings of length M
    HashSet vec = new HashSet();
 
    // Store the size of the string, S
    int n = S.Length;
 
    // Pick starting point
    for (int i = 0; i < n; i++) {
 
      // Pick ending point
      for (int len = 1;
           len <= n - i; len++) {
 
        // If the substring is of
        // length M, insert it in vec
        string s = S.Substring(i, len);
        if (s.Length == M) {
          vec.Add(s);
        }
      }
    }
 
    // Initialise count as 0 to store
    // the required count of substrings
    int count = 0;
 
    // Iterate through the set of
    // substrings
    foreach(string it in vec) {
 
      // Store its frequency
      int ans = KMPSearch(it, S);
 
      // If frequency is equal to P
      if (ans == P) {
 
        // Increment count by 1
        count++;
      }
    }
 
    // Print the answer
    Console.WriteLine(count);
  }
 
  // Driver code
  static void Main() {
    string S = "abacaba";
    int M = 3, P = 2;
 
    // Function Call
    findCount(S, M, P);
  }
}
 
// This code is contributed by divyeshrabadiya07.


输出:
1

时间复杂度: O((N * M)+(N 2 – M 2 ))
辅助空间: O(N – M)