📜  使用改进的KMP算法计算字符串中每个前缀的出现次数

📅  最后修改于: 2021-04-28 18:28:18             🧑  作者: Mango

给定大小为N的字符串S ,任务是计算给定字符串S的所有前缀的出现。

例子:

天真的方法:

  1. 遍历集合P中的所有前缀。令x为前缀。
  2. 进行大小为| x |的滑动窗口方法。
  3. 检查S上的当前滑动窗口是否等于x。如果是,则将count [x]加1。

时间复杂度: O(N 3 )
辅助空间: O(N)

高效方法:
使用KMP算法中的LPS数组(也称为prefix_function )。
该字符串的前缀函数定义为长度为N的数组LPS ,其中LPS [i]是子字符串S [0…i]的最长适当前缀的长度,也是该子字符串的后缀。令occ [i]表示长度为i的前缀的出现次数。

以下是实现此方法的步骤:

  1. 计算LPS数组prefix_function
  2. 对于前缀函数的每个值,首先,计算它在LPS数组中出现的次数。
  3. 长度前缀i恰好出现ans [i]次,然后必须将此数字添加到也是其前缀的最长后缀的出现次数中。
  4. 最后,将occ array的所有值加1,因为也应计算原始前缀。

例如:
LPS [i]表示在位置i处,出现长度= LPS [i]的前缀。这是可能的最长前缀。但是可以出现较短的前缀。
对于字符串S = “ AAAA” ,以下是前缀:

原来:

步骤1:以下字符串的LPS数组表示最长前缀的长度,该长度也是后缀:

步骤2:将出现的这些前缀作为后缀添加到occ []数组中的答案中:

步骤3:现在从“ AAA”开始以相反的顺序遍历字符串(由于完整的字符串不是正确的前缀,因此最后一个值将始终为0)。

现在,字符串“ A A ”也包含“ A” ,但尚未计算在内,因此将occ [“ A”]中字符串“ A”的出现增加为occ [“ A”] + = occ [“ AA”] 。以下是相同的计数:

步骤4:最后,将所有未添加的原始前缀添加到所有出现的原始前缀中。

下面是上述方法的实现:

C++
// C++ program for the above approach
#include 
using namespace std;
 
// Function to print the count of all
// prefix in the given string
void print(vector& occ, string& s)
{
    // Iterate over string s
    for (int i = 1; i <= int(s.size());
         i++) {
 
        // Print the prefix and their
        // frequency
        cout << s.substr(0, i)
             << " occurs "
             << occ[i]
             << " times."
             << endl;
    }
}
 
// Function to implement the LPS
// array to store the longest prefix
// which is also a suffix for every
// substring of the string S
vector prefix_function(string& s)
{
    // Array to store LPS values
    vector LPS(s.size());
 
    // Value of lps[0] is 0
    // by definition
    LPS[0] = 0;
 
    // Find the values of LPS[i] for
    // the rest of the string using
    // two pointers and DP
    for (int i = 1;
         i < int(s.size());
         i++) {
 
        // Initially set the value
        // of j as the longest
        // prefix that is also a
        // suffix for i as LPS[i-1]
        int j = LPS[i - 1];
 
        // Check if the suffix of
        // length j+1 is also a prefix
        while (j > 0 && s[i] != s[j]) {
            j = LPS[j - 1];
        }
 
        // If s[i] = s[j] then, assign
        // LPS[i] as j+1
        if (s[i] == s[j]) {
            LPS[i] = j + 1;
        }
 
        // If we reached j = 0, assign
        // LPS[i] as 0 as there was no
        // prefix equal to suffix
        else {
            LPS[i] = 0;
        }
    }
 
    // Return the calculated
    // LPS array
    return LPS;
}
 
// Function to count the occurrence
// of all the prefix in the string S
void count_occurence(string& s)
{
    int n = s.size();
 
    // Call the prefix_function
    // to get LPS
    vector LPS
        = prefix_function(s);
 
    // To store the occurrence of
    // all the prefix
    vector occ(n + 1);
 
    // Count all the suffixes that
    // are also prefix
    for (int i = 0; i < n; i++) {
        occ[LPS[i]]++;
    }
 
    // Add the occurences of
    // i to smaller prefixes
    for (int i = n - 1;
         i > 0; i--) {
        occ[LPS[i - 1]] += occ[i];
    }
 
    // Adding 1 to all occ[i] for all
    // the orignal prefix
    for (int i = 0; i <= n; i++)
        occ[i]++;
 
    // Function Call to print the
    // occurence of all the prefix
    print(occ, s);
}
 
// Driver Code
int main()
{
    // Given String
    string A = "ABACABA";
 
    // Function Call
    count_occurence(A);
    return 0;
}


Java
// Java program for
// the above approach
import java.util.*;
class GFG{
 
// Function to print the count
// of all prefix in the
// given String
static void print(int[] occ,
                  String s)
{
  // Iterate over String s
  for (int i = 1;
           i <= s.length() - 1; i++)
  {
    // Print the prefix and their
    // frequency
    System.out.print(s.substring(0, i) +
                     " occurs " + occ[i] +
                     " times." + "\n");
  }
}
 
// Function to implement the LPS
// array to store the longest prefix
// which is also a suffix for every
// subString of the String S
static int[] prefix_function(String s)
{
  // Array to store LPS values
  int []LPS = new int[s.length()];
 
  // Value of lps[0] is 0
  // by definition
  LPS[0] = 0;
 
  // Find the values of LPS[i] for
  // the rest of the String using
  // two pointers and DP
  for (int i = 1;
       i < s.length(); i++)
  {
    // Initially set the value
    // of j as the longest
    // prefix that is also a
    // suffix for i as LPS[i-1]
    int j = LPS[i - 1];
 
    // Check if the suffix of
    // length j+1 is also a prefix
    while (j > 0 &&
           s.charAt(i) != s.charAt(j))
    {
      j = LPS[j - 1];
    }
 
    // If s[i] = s[j] then, assign
    // LPS[i] as j+1
    if (s.charAt(i) == s.charAt(j))
    {
      LPS[i] = j + 1;
    }
 
    // If we reached j = 0, assign
    // LPS[i] as 0 as there was no
    // prefix equal to suffix
    else
    {
      LPS[i] = 0;
    }
  }
 
  // Return the calculated
  // LPS array
  return LPS;
}
 
// Function to count the occurrence
// of all the prefix in the String S
static void count_occurence(String s)
{
  int n = s.length();
 
  // Call the prefix_function
  // to get LPS
  int[] LPS = prefix_function(s);
 
  // To store the occurrence of
  // all the prefix
  int []occ = new int[n + 1];
 
  // Count all the suffixes that
  // are also prefix
  for (int i = 0; i < n; i++)
  {
    occ[LPS[i]]++;
  }
 
  // Add the occurences of
  // i to smaller prefixes
  for (int i = n - 1;
           i > 0; i--)
  {
    occ[LPS[i - 1]] += occ[i];
  }
 
  // Adding 1 to all occ[i] for all
  // the orignal prefix
  for (int i = 0; i <= n; i++)
    occ[i]++;
 
  // Function Call to print the
  // occurence of all the prefix
  print(occ, s);
}
 
// Driver Code
public static void main(String[] args)
{
  // Given String
  String A = "ABACABA";
 
  // Function Call
  count_occurence(A);
}
}
 
// This code is contributed by Princi Singh


Python3
# Python3 program for the above approach
 
# Function to print the count of all
# prefix in the given string
def Print(occ, s):
     
    # Iterate over string s
    for i in range(1, len(s) + 1):
 
        # Print the prefix and their
        # frequency
        print(s[0 : i], "occur", occ[i], "times.")
 
# Function to implement the LPS
# array to store the longest prefix
# which is also a suffix for every
# substring of the string S
def prefix_function(s):
 
    # Array to store LPS values
    # Value of lps[0] is 0
    # by definition
    LPS = [0 for i in range(len(s))]
     
    # Find the values of LPS[i] for
    # the rest of the string using
    # two pointers and DP
    for i in range(1, len(s)):
 
        # Initially set the value
        # of j as the longest
        # prefix that is also a
        # suffix for i as LPS[i-1]
        j = LPS[i - 1]
 
        # Check if the suffix of
        # length j+1 is also a prefix
        while (j > 0 and s[i] != s[j]):
            j = LPS[j - 1]
 
        # If s[i] = s[j] then, assign
        # LPS[i] as j+1
        if (s[i] == s[j]):
            LPS[i] = j + 1
             
        # If we reached j = 0, assign
        # LPS[i] as 0 as there was no
        # prefix equal to suffix
        else:
            LPS[i] = 0
 
    # Return the calculated
    # LPS array
    return LPS
 
# Function to count the occurrence
# of all the prefix in the string S
def count_occurence(s):
     
    n = len(s)
 
    # Call the prefix_function
    # to get LPS
    LPS = prefix_function(s)
 
    # To store the occurrence of
    # all the prefix
    occ = [0 for i in range(n + 1)]
 
    # Count all the suffixes that
    # are also prefix
    for i in range(n):
        occ[LPS[i]] += 1
 
    # Add the occurences of
    # i to smaller prefixes
    for i in range(n - 1, 0, -1):
        occ[LPS[i - 1]] += occ[i]
     
    # Adding 1 to all occ[i] for all
    # the orignal prefix
    for i in range(n + 1):
        occ[i] += 1
         
    # Function Call to print the
    # occurence of all the prefix
    Print(occ, s)
 
# Driver Code
 
# Given String
A = "ABACABA"
 
# Function Call
count_occurence(A)
 
# This code is contributed by avanitrachhadiya2155


C#
// C# program for
// the above approach
using System;
class GFG{
 
// Function to print the
// count of all prefix
// in the given String
static void print(int[] occ,
                  String s)
{
  // Iterate over String s
  for (int i = 1;
           i <= s.Length - 1; i++)
  {
    // Print the prefix and their
    // frequency
    Console.Write(s.Substring(0, i) + 
                  " occurs " + occ[i] + 
                  " times." + "\n");
  }
}
 
// Function to implement the LPS
// array to store the longest prefix
// which is also a suffix for every
// subString of the String S
static int[] prefix_function(String s)
{
  // Array to store LPS values
  int []LPS = new int[s.Length];
 
  // Value of lps[0] is 0
  // by definition
  LPS[0] = 0;
 
  // Find the values of LPS[i] for
  // the rest of the String using
  // two pointers and DP
  for (int i = 1;
           i < s.Length; i++)
  {
    // Initially set the value
    // of j as the longest
    // prefix that is also a
    // suffix for i as LPS[i-1]
    int j = LPS[i - 1];
 
    // Check if the suffix of
    // length j+1 is also a prefix
    while (j > 0 && s[i] != s[j])
    {
      j = LPS[j - 1];
    }
 
    // If s[i] = s[j] then,
    // assign LPS[i] as j+1
    if (s[i] == s[j])
    {
      LPS[i] = j + 1;
    }
 
    // If we reached j = 0, assign
    // LPS[i] as 0 as there was no
    // prefix equal to suffix
    else
    {
      LPS[i] = 0;
    }
  }
 
  // Return the calculated
  // LPS array
  return LPS;
}
 
// Function to count the occurrence
// of all the prefix in the String S
static void count_occurence(String s)
{
  int n = s.Length;
 
  // Call the prefix_function
  // to get LPS
  int[] LPS = prefix_function(s);
 
  // To store the occurrence of
  // all the prefix
  int []occ = new int[n + 1];
 
  // Count all the suffixes that
  // are also prefix
  for (int i = 0; i < n; i++)
  {
    occ[LPS[i]]++;
  }
 
  // Add the occurences of
  // i to smaller prefixes
  for (int i = n - 1;
           i > 0; i--)
  {
    occ[LPS[i - 1]] += occ[i];
  }
 
  // Adding 1 to all occ[i] for all
  // the orignal prefix
  for (int i = 0; i <= n; i++)
    occ[i]++;
 
  // Function Call to print the
  // occurence of all the prefix
  print(occ, s);
}
 
// Driver Code
public static void Main(String[] args)
{
  // Given String
  String A = "ABACABA";
 
  // Function Call
  count_occurence(A);
}
}
 
// This code is contributed by Amit Katiyar


输出:
A occurs 4 times.
AB occurs 2 times.
ABA occurs 2 times.
ABAC occurs 1 times.
ABACA occurs 1 times.
ABACAB occurs 1 times.
ABACABA occurs 1 times.

时间复杂度: O(N 2 )
腋窝空间: O(N)