📜  模式搜索第六套(有限自动机的有效构造)

📅  最后修改于: 2021-04-24 15:06:13             🧑  作者: Mango

在上一篇文章中,我们讨论了基于有限自动机的模式搜索算法。前一篇文章中讨论的FA(有限自动机)构造方法花费O((m ^ 3)* NO_OF_CHARS)时间。可以在O(m * NO_OF_CHARS)时间内构造FA。在这篇文章中,我们将讨论用于FA构造的O(m * NO_OF_CHARS)算法。这个想法类似于KMP算法中讨论的lps(最长前缀后缀)数组的构造。我们使用先前填充的行来填充新行。

上图代表ACACAGA模式的图形和表格表示。
算法:
1)填写第一行。除pat [0] 字符的条目外,第一行中的所有条目始终为0。对于pat [0]字符,我们总是需要进入状态1。
2)将lps初始化为0。第一个索引的lps始终为0。
3)对索引i = 1到M的行执行以下操作(M是模式的长度)
…..a)从索引等于lps的行中复制条目。
…..b)将pat [i]字符的条目更新为i + 1。
…..c)更新lps“ lps = TF [lps] [pat [i]]”,其中TF是正在构建的2D数组。
以下是上述算法的C / C++实现。
执行

C++
#include 
using namespace std;
#define NO_OF_CHARS 256
 
/* This function builds the TF table
which represents Finite Automata for a
given pattern */
void computeTransFun(char* pat, int M, int TF[][NO_OF_CHARS])
{
    int i, lps = 0, x;
 
    // Fill entries in first row
    for (x = 0; x < NO_OF_CHARS; x++)
        TF[0][x] = 0;
    TF[0][pat[0]] = 1;
 
    // Fill entries in other rows
    for (i = 1; i <= M; i++) {
        // Copy values from row at index lps
        for (x = 0; x < NO_OF_CHARS; x++)
            TF[i][x] = TF[lps][x];
 
        // Update the entry corresponding to this character
        TF[i][pat[i]] = i + 1;
 
        // Update lps for next row to be filled
        if (i < M)
            lps = TF[lps][pat[i]];
    }
}
 
/* Prints all occurrences of pat in txt */
void search(char pat[], char txt[])
{
    int M = strlen(pat);
    int N = strlen(txt);
 
    int TF[M + 1][NO_OF_CHARS];
 
    computeTransFun(pat, M, TF);
 
    // process text over FA.
    int i, j = 0;
    for (i = 0; i < N; i++) {
        j = TF[j][txt[i]];
        if (j == M) {
            cout << "pattern found at index " << i - M + 1 << endl;
        }
    }
}
 
/* Driver code */
int main()
{
    char txt[] = "GEEKS FOR GEEKS";
    char pat[] = "GEEKS";
    search(pat, txt);
    return 0;
}
 
// This is code is contributed by rathbhupendra
C#include 
#include 
#define NO_OF_CHARS 256

/* This function builds the TF table which represents Finite Automata for a
   given pattern  */
void computeTransFun(char* pat, int M, int TF[][NO_OF_CHARS])
{
    int i, lps = 0, x;

    // Fill entries in first row
    for (x = 0; x < NO_OF_CHARS; x++)
        TF[0][x] = 0;
    TF[0][pat[0]] = 1;

    // Fill entries in other rows
    for (i = 1; i <= M; i++) {
        // Copy values from row at index lps
        for (x = 0; x < NO_OF_CHARS; x++)
            TF[i][x] = TF[lps][x];

        // Update the entry corresponding to this character
        TF[i][pat[i]] = i + 1;

        // Update lps for next row to be filled
        if (i < M)
            lps = TF[lps][pat[i]];
    }
}

/* Prints all occurrences of pat in txt */
void search(char* pat, char* txt)
{
    int M = strlen(pat);
    int N = strlen(txt);

    int TF[M + 1][NO_OF_CHARS];

    computeTransFun(pat, M, TF);

    // process text over FA.
    int i, j = 0;
    for (i = 0; i < N; i++) {
        j = TF[j][txt[i]];
        if (j == M) {
            printf("\n pattern found at index %d", i - M + 1);
        }
    }
}

/* Driver program to test above function */
int main()
{
    char* txt = "GEEKS FOR GEEKS";
    char* pat = "GEEKS";
    search(pat, txt);
    getchar();
    return 0;
}


Java
/* A Java program to answer queries to check whether
the substrings are palindrome or not efficiently */
 
class GFG
{
 
    static int NO_OF_CHARS = 256;
 
    /* This function builds the TF table
    which represents Finite Automata for a
    given pattern */
    static void computeTransFun(char[] pat,
                                int M, int TF[][])
    {
        int i, lps = 0, x;
 
        // Fill entries in first row
        for (x = 0; x < NO_OF_CHARS; x++)
        {
            TF[0][x] = 0;
        }
        TF[0][pat[0]] = 1;
 
        // Fill entries in other rows
        for (i = 1; i < M; i++)
        {
            // Copy values from row at index lps
            for (x = 0; x < NO_OF_CHARS; x++)
            {
                TF[i][x] = TF[lps][x];
            }
 
            // Update the entry corresponding to this character
            TF[i][pat[i]] = i + 1;
 
            // Update lps for next row to be filled
            if (i < M)
            {
                lps = TF[lps][pat[i]];
            }
        }
    }
 
    /* Prints all occurrences of pat in txt */
    static void search(char pat[], char txt[])
    {
        int M = pat.length;
        int N = txt.length;
 
        int[][] TF = new int[M + 1][NO_OF_CHARS];
 
        computeTransFun(pat, M, TF);
 
        // process text over FA.
        int i, j = 0;
        for (i = 0; i < N; i++)
        {
            j = TF[j][txt[i]];
            if (j == M)
            {
                System.out.println("pattern found at index " +
                                                (i - M + 1));
            }
        }
    }
 
    /* Driver code */
    public static void main(String[] args)
    {
        char txt[] = "GEEKS FOR GEEKS".toCharArray();
        char pat[] = "GEEKS".toCharArray();
        search(pat, txt);
    }
}
 
// This code is contributed by Princi Singh


Python3
""" A Python3 program to answer queries to check whether 
the substrings are palindrome or not efficiently """
NO_OF_CHARS = 256
 
""" This function builds the TF table
which represents Finite Automata for a
given pattern """
def computeTransFun(pat, M, TF) :
 
    lps = 0
 
    # Fill entries in first row
    for x in range(NO_OF_CHARS) :
        TF[0][x] = 0
    TF[0][ord(pat[0])] = 1
 
    # Fill entries in other rows
    for i in range(1, M) :
       
        # Copy values from row at index lps
        for x in range(NO_OF_CHARS) :
            TF[i][x] = TF[lps][x]
 
        # Update the entry corresponding to this character
        TF[i][ord(pat[i])] = i + 1
 
        # Update lps for next row to be filled
        if (i < M) :
            lps = TF[lps][ord(pat[i])]
 
# Prints all occurrences of pat in txt
def search(pat, txt) :
    M = len(pat)
    N = len(txt)
    TF = [[0 for i in range(NO_OF_CHARS)] for j in range(M + 1)] 
    computeTransFun(pat, M, TF)
 
    # process text over FA.
    j = 0
    for i in range(N) :
        j = TF[j][ord(txt[i])]
        if (j == M) :
            print("pattern found at index" , i - M + 1)
 
# Driver code
txt = "GEEKS FOR GEEKS"
pat = "GEEKS"
search(pat, txt)
 
# This code is contributed by divyeshrabadiya07


C#
/* A C# program to answer queries to check whether
the substrings are palindrome or not efficiently */
using System;
     
class GFG
{
 
    static int NO_OF_CHARS = 256;
 
    /* This function builds the TF table
    which represents Finite Automata for a
    given pattern */
    static void computeTransFun(char[] pat,
                                int M, int [,]TF)
    {
        int i, lps = 0, x;
 
        // Fill entries in first row
        for (x = 0; x < NO_OF_CHARS; x++)
        {
            TF[0,x] = 0;
        }
        TF[0,pat[0]] = 1;
 
        // Fill entries in other rows
        for (i = 1; i < M; i++)
        {
            // Copy values from row at index lps
            for (x = 0; x < NO_OF_CHARS; x++)
            {
                TF[i,x] = TF[lps,x];
            }
 
            // Update the entry corresponding to this character
            TF[i,pat[i]] = i + 1;
 
            // Update lps for next row to be filled
            if (i < M)
            {
                lps = TF[lps,pat[i]];
            }
        }
    }
 
    /* Prints all occurrences of pat in txt */
    static void search(char []pat, char []txt)
    {
        int M = pat.Length;
        int N = txt.Length;
 
        int[,] TF = new int[M + 1,NO_OF_CHARS];
 
        computeTransFun(pat, M, TF);
 
        // process text over FA.
        int i, j = 0;
        for (i = 0; i < N; i++)
        {
            j = TF[j,txt[i]];
            if (j == M)
            {
                Console.WriteLine("pattern found at index " +
                                                (i - M + 1));
            }
        }
    }
 
    /* Driver code */
    public static void Main(String[] args)
    {
        char []txt = "GEEKS FOR GEEKS".ToCharArray();
        char []pat = "GEEKS".ToCharArray();
        search(pat, txt);
    }
}
 
// This code is contributed by Rajput-Ji


输出:

pattern found at index 0
 pattern found at index 10