📜  字谜子串搜索(或搜索所有排列)

📅  最后修改于: 2021-05-06 08:49:45             🧑  作者: Mango

给定一个文本txt [0..n-1]和一个模式pat [0..m-1],编写一个函数search(char pat [],char txt [])打印所有出现的pat []及其txt []中的排列(或字谜)。您可以假设n> m。
预期时间复杂度为O(n)

例子:

1) Input:  txt[] = "BACDGABCDA"  pat[] = "ABCD"
   Output:   Found at Index 0
             Found at Index 5
             Found at Index 6
2) Input: txt[] =  "AAABABAA" pat[] = "AABA"
   Output:   Found at Index 0
             Found at Index 1
             Found at Index 4

我们强烈建议您单击此处并进行实践,然后再继续解决方案。

这个问题与标准模式搜索问题略有不同,这里我们也需要搜索字谜。因此,我们无法直接应用标准模式搜索算法,例如KMP,Rabin Karp,Boyer Moore等。

一个简单的想法是修改Rabin Karp算法。例如,我们可以将哈希值作为所有字符的ASCII值的总和,以大质数为模。对于文本的每个字符,我们可以将当前字符添加到哈希值,然后减去上一个窗口的第一个字符。该解决方案看起来不错,但与标准Rabin Karp一样,该解决方案的最坏情况时间复杂度为O(mn)。当所有哈希值都匹配并且我们一一匹配所有字符时,就会发生最坏的情况。

我们可以假定字母表大小是固定的,因为我们有在ASCII最多256个可能的字符通常是真下达到O(n)的时间复杂度。这个想法是使用两个count数组:

1)第一个计数数组存储模式中字符的频率。
2)第二个计数数组在当前文本窗口中存储字符的频率。

需要注意的重要一点是,比较两个计数数组的时间复杂度为O(1),因为它们中的元素数量是固定的(与模式和文本大小无关)。以下是此算法的步骤。
1)将模式频率的计数存储在第一个计数数组countP []中还将字符的频率计数存储在数组countTW []中的文本的第一个窗口中。

2)现在运行一个从i = M到N-1的循环。循环执行。
…..a)如果两个计数数组相同,则发现一个事件。
…..b)countTW []中文本的当前字符的增量计数
…..c)countWT []中前一个窗口中第一个字符的递减计数

3)上面的循环未检查最后一个窗口,因此请显式检查它。

以下是上述算法的实现。

C++
// C++ program to search all anagrams of a pattern in a text
#include
#include
#define MAX 256
using namespace std;
  
// This function returns true if contents of arr1[] and arr2[]
// are same, otherwise false.
bool compare(char arr1[], char arr2[])
{
    for (int i=0; i


Java
// Java program to search all anagrams 
// of a pattern in a text
public class GFG 
{
    static final int MAX = 256;
      
    // This function returns true if contents
    // of arr1[] and arr2[] are same, otherwise
    // false.
    static boolean compare(char arr1[], char arr2[])
    {
        for (int i = 0; i < MAX; i++)
            if (arr1[i] != arr2[i])
                return false;
        return true;
    }
  
    // This function search for all permutations
    // of pat[] in txt[]
    static void search(String pat, String txt)
    {
        int M = pat.length();
        int N = txt.length();
  
        // countP[]:  Store count of all 
        // characters of pattern
        // countTW[]: Store count of current
        // window of text
        char[] countP = new char[MAX];
        char[] countTW = new char[MAX];
        for (int i = 0; i < M; i++)
        {
            (countP[pat.charAt(i)])++;
            (countTW[txt.charAt(i)])++;
        }
  
        // Traverse through remaining characters
        // of pattern
        for (int i = M; i < N; i++)
        {
            // Compare counts of current window
            // of text with counts of pattern[]
            if (compare(countP, countTW))
                System.out.println("Found at Index " +
                                          (i - M));
              
            // Add current character to current 
            // window
            (countTW[txt.charAt(i)])++;
  
            // Remove the first character of previous
            // window
            countTW[txt.charAt(i-M)]--;
        }
  
        // Check for the last window in text
        if (compare(countP, countTW))
            System.out.println("Found at Index " + 
                                       (N - M));
    }
  
    /* Driver program to test above function */
    public static void main(String args[])
    {
        String txt = "BACDGABCDA";
        String pat = "ABCD";
        search(pat, txt);
    }
}
// This code is contributed by Sumit Ghosh


Python3
# Python program to search all
# anagrams of a pattern in a text
  
MAX=256 
  
# This function returns true
# if contents of arr1[] and arr2[]
# are same, otherwise false.
def compare(arr1, arr2):
    for i in range(MAX):
        if arr1[i] != arr2[i]:
            return False
    return True
      
# This function search for all
# permutations of pat[] in txt[]  
def search(pat, txt):
  
    M = len(pat)
    N = len(txt)
  
    # countP[]:  Store count of
    # all characters of pattern
    # countTW[]: Store count of
    # current window of text
    countP = [0]*MAX
  
    countTW = [0]*MAX
  
    for i in range(M):
        (countP[ord(pat[i]) ]) += 1
        (countTW[ord(txt[i]) ]) += 1
  
    # Traverse through remaining
    # characters of pattern
    for i in range(M,N):
  
        # Compare counts of current
        # window of text with
        # counts of pattern[]
        if compare(countP, countTW):
            print("Found at Index", (i-M))
  
        # Add current character to current window
        (countTW[ ord(txt[i]) ]) += 1
  
        # Remove the first character of previous window
        (countTW[ ord(txt[i-M]) ]) -= 1
      
    # Check for the last window in text    
    if compare(countP, countTW):
        print("Found at Index", N-M)
          
# Driver program to test above function       
txt = "BACDGABCDA"
pat = "ABCD"       
search(pat, txt)   
  
# This code is contributed
# by Upendra Singh Bartwal


C#
// C# program to search all anagrams 
// of a pattern in a text 
using System;
  
class GFG
{
public const int MAX = 256;
  
// This function returns true if  
// contents of arr1[] and arr2[] 
// are same, otherwise false. 
public static bool compare(char[] arr1, 
                           char[] arr2)
{
    for (int i = 0; i < MAX; i++)
    {
        if (arr1[i] != arr2[i])
        {
            return false;
        }
    }
    return true;
}
  
// This function search for all 
// permutations of pat[] in txt[] 
public static void search(string pat, 
                          string txt)
{
    int M = pat.Length;
    int N = txt.Length;
  
    // countP[]: Store count of all 
    // characters of pattern 
    // countTW[]: Store count of current 
    // window of text 
    char[] countP = new char[MAX];
    char[] countTW = new char[MAX];
    for (int i = 0; i < M; i++)
    {
        (countP[pat[i]])++;
        (countTW[txt[i]])++;
    }
  
    // Traverse through remaining 
    // characters of pattern 
    for (int i = M; i < N; i++)
    {
        // Compare counts of current window 
        // of text with counts of pattern[] 
        if (compare(countP, countTW))
        {
            Console.WriteLine("Found at Index " + 
                             (i - M));
        }
  
        // Add current character to 
        // current window 
        (countTW[txt[i]])++;
  
        // Remove the first character of
        // previous window 
        countTW[txt[i - M]]--;
    }
  
    // Check for the last window in text 
    if (compare(countP, countTW))
    {
        Console.WriteLine("Found at Index " + 
                         (N - M));
    }
}
  
// Driver Code
public static void Main(string[] args)
{
    string txt = "BACDGABCDA";
    string pat = "ABCD";
    search(pat, txt);
}
}
  
// This code is contributed 
// by Shrikant1


输出:

Found at Index 0
Found at Index 5
Found at Index 6