📌  相关文章
📜  使用修改的 KMP 算法计算字符串中每个前缀的出现次数

📅  最后修改于: 2021-09-17 07:41:41             🧑  作者: Mango

给定一个大小为N的字符串S ,任务是计算给定字符串S的所有前缀的出现次数。

例子:

天真的方法:

  1. 遍历集合 P 中的所有前缀。设 x 为前缀。
  2. 做一个大小为|x| 的滑动窗口方法。
  3. 检查 S 上的当前滑动窗口是否等于 x。如果是,则将 count[x] 增加 1。

时间复杂度: O(N 3 )
辅助空间: O(N)

有效的方法:
使用 KMP 算法中的LPS数组(也称为prefix_function )。
这个字符串的前缀函数被定义为一个长度为N的数组LPS ,其中LPS[i]是子字符串S[0…i]的最长适当前缀的长度,它也是这个子字符串的后缀。让occ[i]表示长度为i的前缀出现的次数。

以下是实现此方法的步骤:

  1. 计算LPS 数组prefix_function
  2. 对于前缀函数的每个值,首先计算它在LPS 数组中出现的次数。
  3. 长度前缀i恰好出现ans[i]次,然后必须将此数字添加到其最长后缀(也是前缀)的出现次数中。
  4. 最后,将occ array 的所有值加 1,因为原始前缀也应该计算在内。

例如:
LPS[i] 表示在位置i 出现一个长度为LPS[i]的前缀。这是可能的最长前缀。但是可能会出现更短的前缀。
对于 String S = “AAAA” ,以下是前缀:

最初:

Step1:以下字符串的LPS数组表示最长前缀的长度,也是后缀:

步骤 2:将这些出现的前缀作为后缀添加到occ[]数组中的答案中:

第 3 步:现在从“AAA”开始以相反的顺序遍历字符串(因为最后一个值将始终为 0,因为完整的字符串不是正确的前缀)。

现在字符串“A A ”也包含“A” ,但尚未计算在内,因此将字符串“A”在 occ[“A”] 中的出现次数增加为 occ[“A”] += occ[“AA”] .以下是相同的计数:

Step 4:最后在所有出现的原始前缀上加一,还没有统计。

下面是上述方法的实现:

C++
// C++ program for the above approach
#include 
using namespace std;
 
// Function to print the count of all
// prefix in the given string
void print(vector& occ, string& s)
{
    // Iterate over string s
    for (int i = 1; i <= int(s.size());
         i++) {
 
        // Print the prefix and their
        // frequency
        cout << s.substr(0, i)
             << " occurs "
             << occ[i]
             << " times."
             << endl;
    }
}
 
// Function to implement the LPS
// array to store the longest prefix
// which is also a suffix for every
// substring of the string S
vector prefix_function(string& s)
{
    // Array to store LPS values
    vector LPS(s.size());
 
    // Value of lps[0] is 0
    // by definition
    LPS[0] = 0;
 
    // Find the values of LPS[i] for
    // the rest of the string using
    // two pointers and DP
    for (int i = 1;
         i < int(s.size());
         i++) {
 
        // Initially set the value
        // of j as the longest
        // prefix that is also a
        // suffix for i as LPS[i-1]
        int j = LPS[i - 1];
 
        // Check if the suffix of
        // length j+1 is also a prefix
        while (j > 0 && s[i] != s[j]) {
            j = LPS[j - 1];
        }
 
        // If s[i] = s[j] then, assign
        // LPS[i] as j+1
        if (s[i] == s[j]) {
            LPS[i] = j + 1;
        }
 
        // If we reached j = 0, assign
        // LPS[i] as 0 as there was no
        // prefix equal to suffix
        else {
            LPS[i] = 0;
        }
    }
 
    // Return the calculated
    // LPS array
    return LPS;
}
 
// Function to count the occurrence
// of all the prefix in the string S
void count_occurence(string& s)
{
    int n = s.size();
 
    // Call the prefix_function
    // to get LPS
    vector LPS
        = prefix_function(s);
 
    // To store the occurrence of
    // all the prefix
    vector occ(n + 1);
 
    // Count all the suffixes that
    // are also prefix
    for (int i = 0; i < n; i++) {
        occ[LPS[i]]++;
    }
 
    // Add the occurences of
    // i to smaller prefixes
    for (int i = n - 1;
         i > 0; i--) {
        occ[LPS[i - 1]] += occ[i];
    }
 
    // Adding 1 to all occ[i] for all
    // the orignal prefix
    for (int i = 0; i <= n; i++)
        occ[i]++;
 
    // Function Call to print the
    // occurence of all the prefix
    print(occ, s);
}
 
// Driver Code
int main()
{
    // Given String
    string A = "ABACABA";
 
    // Function Call
    count_occurence(A);
    return 0;
}


Java
// Java program for
// the above approach
import java.util.*;
class GFG{
 
// Function to print the count
// of all prefix in the
// given String
static void print(int[] occ,
                  String s)
{
  // Iterate over String s
  for (int i = 1;
           i <= s.length() - 1; i++)
  {
    // Print the prefix and their
    // frequency
    System.out.print(s.substring(0, i) +
                     " occurs " + occ[i] +
                     " times." + "\n");
  }
}
 
// Function to implement the LPS
// array to store the longest prefix
// which is also a suffix for every
// subString of the String S
static int[] prefix_function(String s)
{
  // Array to store LPS values
  int []LPS = new int[s.length()];
 
  // Value of lps[0] is 0
  // by definition
  LPS[0] = 0;
 
  // Find the values of LPS[i] for
  // the rest of the String using
  // two pointers and DP
  for (int i = 1;
       i < s.length(); i++)
  {
    // Initially set the value
    // of j as the longest
    // prefix that is also a
    // suffix for i as LPS[i-1]
    int j = LPS[i - 1];
 
    // Check if the suffix of
    // length j+1 is also a prefix
    while (j > 0 &&
           s.charAt(i) != s.charAt(j))
    {
      j = LPS[j - 1];
    }
 
    // If s[i] = s[j] then, assign
    // LPS[i] as j+1
    if (s.charAt(i) == s.charAt(j))
    {
      LPS[i] = j + 1;
    }
 
    // If we reached j = 0, assign
    // LPS[i] as 0 as there was no
    // prefix equal to suffix
    else
    {
      LPS[i] = 0;
    }
  }
 
  // Return the calculated
  // LPS array
  return LPS;
}
 
// Function to count the occurrence
// of all the prefix in the String S
static void count_occurence(String s)
{
  int n = s.length();
 
  // Call the prefix_function
  // to get LPS
  int[] LPS = prefix_function(s);
 
  // To store the occurrence of
  // all the prefix
  int []occ = new int[n + 1];
 
  // Count all the suffixes that
  // are also prefix
  for (int i = 0; i < n; i++)
  {
    occ[LPS[i]]++;
  }
 
  // Add the occurences of
  // i to smaller prefixes
  for (int i = n - 1;
           i > 0; i--)
  {
    occ[LPS[i - 1]] += occ[i];
  }
 
  // Adding 1 to all occ[i] for all
  // the orignal prefix
  for (int i = 0; i <= n; i++)
    occ[i]++;
 
  // Function Call to print the
  // occurence of all the prefix
  print(occ, s);
}
 
// Driver Code
public static void main(String[] args)
{
  // Given String
  String A = "ABACABA";
 
  // Function Call
  count_occurence(A);
}
}
 
// This code is contributed by Princi Singh


Python3
# Python3 program for the above approach
 
# Function to print the count of all
# prefix in the given string
def Print(occ, s):
     
    # Iterate over string s
    for i in range(1, len(s) + 1):
 
        # Print the prefix and their
        # frequency
        print(s[0 : i], "occur", occ[i], "times.")
 
# Function to implement the LPS
# array to store the longest prefix
# which is also a suffix for every
# substring of the string S
def prefix_function(s):
 
    # Array to store LPS values
    # Value of lps[0] is 0
    # by definition
    LPS = [0 for i in range(len(s))]
     
    # Find the values of LPS[i] for
    # the rest of the string using
    # two pointers and DP
    for i in range(1, len(s)):
 
        # Initially set the value
        # of j as the longest
        # prefix that is also a
        # suffix for i as LPS[i-1]
        j = LPS[i - 1]
 
        # Check if the suffix of
        # length j+1 is also a prefix
        while (j > 0 and s[i] != s[j]):
            j = LPS[j - 1]
 
        # If s[i] = s[j] then, assign
        # LPS[i] as j+1
        if (s[i] == s[j]):
            LPS[i] = j + 1
             
        # If we reached j = 0, assign
        # LPS[i] as 0 as there was no
        # prefix equal to suffix
        else:
            LPS[i] = 0
 
    # Return the calculated
    # LPS array
    return LPS
 
# Function to count the occurrence
# of all the prefix in the string S
def count_occurence(s):
     
    n = len(s)
 
    # Call the prefix_function
    # to get LPS
    LPS = prefix_function(s)
 
    # To store the occurrence of
    # all the prefix
    occ = [0 for i in range(n + 1)]
 
    # Count all the suffixes that
    # are also prefix
    for i in range(n):
        occ[LPS[i]] += 1
 
    # Add the occurences of
    # i to smaller prefixes
    for i in range(n - 1, 0, -1):
        occ[LPS[i - 1]] += occ[i]
     
    # Adding 1 to all occ[i] for all
    # the orignal prefix
    for i in range(n + 1):
        occ[i] += 1
         
    # Function Call to print the
    # occurence of all the prefix
    Print(occ, s)
 
# Driver Code
 
# Given String
A = "ABACABA"
 
# Function Call
count_occurence(A)
 
# This code is contributed by avanitrachhadiya2155


C#
// C# program for
// the above approach
using System;
class GFG{
 
// Function to print the
// count of all prefix
// in the given String
static void print(int[] occ,
                  String s)
{
  // Iterate over String s
  for (int i = 1;
           i <= s.Length - 1; i++)
  {
    // Print the prefix and their
    // frequency
    Console.Write(s.Substring(0, i) + 
                  " occurs " + occ[i] + 
                  " times." + "\n");
  }
}
 
// Function to implement the LPS
// array to store the longest prefix
// which is also a suffix for every
// subString of the String S
static int[] prefix_function(String s)
{
  // Array to store LPS values
  int []LPS = new int[s.Length];
 
  // Value of lps[0] is 0
  // by definition
  LPS[0] = 0;
 
  // Find the values of LPS[i] for
  // the rest of the String using
  // two pointers and DP
  for (int i = 1;
           i < s.Length; i++)
  {
    // Initially set the value
    // of j as the longest
    // prefix that is also a
    // suffix for i as LPS[i-1]
    int j = LPS[i - 1];
 
    // Check if the suffix of
    // length j+1 is also a prefix
    while (j > 0 && s[i] != s[j])
    {
      j = LPS[j - 1];
    }
 
    // If s[i] = s[j] then,
    // assign LPS[i] as j+1
    if (s[i] == s[j])
    {
      LPS[i] = j + 1;
    }
 
    // If we reached j = 0, assign
    // LPS[i] as 0 as there was no
    // prefix equal to suffix
    else
    {
      LPS[i] = 0;
    }
  }
 
  // Return the calculated
  // LPS array
  return LPS;
}
 
// Function to count the occurrence
// of all the prefix in the String S
static void count_occurence(String s)
{
  int n = s.Length;
 
  // Call the prefix_function
  // to get LPS
  int[] LPS = prefix_function(s);
 
  // To store the occurrence of
  // all the prefix
  int []occ = new int[n + 1];
 
  // Count all the suffixes that
  // are also prefix
  for (int i = 0; i < n; i++)
  {
    occ[LPS[i]]++;
  }
 
  // Add the occurences of
  // i to smaller prefixes
  for (int i = n - 1;
           i > 0; i--)
  {
    occ[LPS[i - 1]] += occ[i];
  }
 
  // Adding 1 to all occ[i] for all
  // the orignal prefix
  for (int i = 0; i <= n; i++)
    occ[i]++;
 
  // Function Call to print the
  // occurence of all the prefix
  print(occ, s);
}
 
// Driver Code
public static void Main(String[] args)
{
  // Given String
  String A = "ABACABA";
 
  // Function Call
  count_occurence(A);
}
}
 
// This code is contributed by Amit Katiyar


输出:
A occurs 4 times.
AB occurs 2 times.
ABA occurs 2 times.
ABAC occurs 1 times.
ABACA occurs 1 times.
ABACAB occurs 1 times.
ABACABA occurs 1 times.

时间复杂度: O(N 2 )
腋窝: O(N)

如果您希望与专家一起参加现场课程,请参阅DSA 现场工作专业课程学生竞争性编程现场课程