📜  Z算法(线性时间模式搜索算法)

📅  最后修改于: 2021-04-24 04:46:34             🧑  作者: Mango

该算法查找线性时间内文本中所有模式的出现。假设文本的长度为n,图案的长度为m,则总时间为O(m + n),线性空间复杂度。现在我们可以看到时间和空间复杂度都与KMP算法相同,但是这种算法更易于理解。

在此算法中,我们构造了一个Z数组。

什么是Z阵列?
对于字符串str [0..n-1],Z数组的长度与字符串的长度相同。 Z数组的元素Z [i]存储从str [i]开始的最长子串的长度,该字符串也是str [0..n-1]的前缀。 Z数组的第一个条目意义不大,因为完整的字符串始终是其自身的前缀。

Example:
Index            0   1   2   3   4   5   6   7   8   9  10  11 
Text             a   a   b   c   a   a   b   x   a   a   a   z
Z values         X   1   0   0   3   1   0   0   2   2   1   0 
More Examples:
str  = "aaaaaa"
Z[]  = {x, 5, 4, 3, 2, 1}

str = "aabaacd"
Z[] = {x, 1, 0, 2, 1, 0, 0}

str = "abababab"
Z[] = {x, 0, 6, 0, 4, 0, 2, 0}
 

Z数组对线性时间搜索模式有何帮助?
这个想法是连接模式和文本,并创建一个字符串“ P $ T”,其中P是模式,$是一个特殊字符,模式和文本中不能出现,而T是文本。为串联的字符串构建Z数组。在Z数组中,如果任意点的Z值等于图案长度,则该点处存在图案。

Example:
Pattern P = "aab",  Text T = "baabaa"

The concatenated string is = "aab$baabaa"

Z array for above concatenated string is {x, 1, 0, 0, 0, 
                                          3, 1, 0, 2, 1}.
Since length of pattern is 3, the value 3 in Z array 
indicates presence of pattern. 

如何构造Z数组?
一个简单的解决方案是两个嵌套两个循环,外层循环到每个索引,内层循环找到与从当前索引开始的子字符串匹配的最长前缀的长度。该解决方案的时间复杂度为O(n 2 )。

我们可以在线性时间内构造Z数组。

The idea is to maintain an interval [L, R] which is the interval with max R
such that [L,R] is prefix substring (substring which is also prefix). 

Steps for maintaining this interval are as follows – 

1) If i > R then there is no prefix substring that starts before i and 
   ends after i, so we reset L and R and compute new [L,R] by comparing 
   str[0..] to str[i..] and get Z[i] (= R-L+1).

2) If i <= R then let K = i-L,  now Z[i] >= min(Z[K], R-i+1)  because 
   str[i..] matches with str[K..] for atleast R-i+1 characters (they are in
   [L,R] interval which we know is a prefix substring).     
   Now two sub cases arise – 
      a) If Z[K] < R-i+1  then there is no prefix substring starting at 
         str[i] (otherwise Z[K] would be larger)  so  Z[i] = Z[K]  and 
         interval [L,R] remains same.
      b) If Z[K] >= R-i+1 then it is possible to extend the [L,R] interval
         thus we will set L as i and start matching from str[R]  onwards  and
         get new R then we will update interval [L,R] and calculate Z[i] (=R-L+1).

为了更好地理解上述逐步过程,请检查此动画– http://www.utdallas.edu/~besp/demo/John2010/z-algorithm.htm

该算法以线性时间运行,因为我们从不比较小于R的字符,并且通过匹配将R增加1,因此最多只能进行T个比较。在不匹配的情况下,每个i只会发生一次不匹配(因为R停止),这最多是另一个T比较,从而使整体线性复杂。

以下是Z模式搜索算法的实现。

C++
// A C++ program that implements Z algorithm for pattern searching
#include
using namespace std;
  
void getZarr(string str, int Z[]);
  
// prints all occurrences of pattern in text using Z algo
void search(string text, string pattern)
{
    // Create concatenated string "P$T"
    string concat = pattern + "$" + text;
    int l = concat.length();
  
    // Construct Z array
    int Z[l];
    getZarr(concat, Z);
  
    // now looping through Z array for matching condition
    for (int i = 0; i < l; ++i)
    {
        // if Z[i] (matched region) is equal to pattern
        // length we got the pattern
        if (Z[i] == pattern.length())
            cout << "Pattern found at index "
                << i - pattern.length() -1 << endl;
    }
}
  
// Fills Z array for given string str[]
void getZarr(string str, int Z[])
{
    int n = str.length();
    int L, R, k;
  
    // [L,R] make a window which matches with prefix of s
    L = R = 0;
    for (int i = 1; i < n; ++i)
    {
        // if i>R nothing matches so we will calculate.
        // Z[i] using naive way.
        if (i > R)
        {
            L = R = i;
  
            // R-L = 0 in starting, so it will start
            // checking from 0'th index. For example,
            // for "ababab" and i = 1, the value of R
            // remains 0 and Z[i] becomes 0. For string
            // "aaaaaa" and i = 1, Z[i] and R become 5
            while (R


Java
// A Java program that implements Z algorithm for pattern
// searching
class GFG { 
  
    //  prints all occurrences of pattern in text using
    // Z algo
    public static void search(String text, String pattern)
    {
  
        // Create concatenated string "P$T"
        String concat = pattern + "$" + text;
  
        int l = concat.length();
  
        int Z[] = new int[l];
  
        // Construct Z array
        getZarr(concat, Z);
  
        // now looping through Z array for matching condition
        for(int i = 0; i < l; ++i){
  
            // if Z[i] (matched region) is equal to pattern
            // length we got the pattern
  
            if(Z[i] == pattern.length()){
                System.out.println("Pattern found at index "
                              + (i - pattern.length() - 1));
            }
        }
    }
  
    // Fills Z array for given string str[]
    private static void getZarr(String str, int[] Z) {
  
        int n = str.length();
          
        // [L,R] make a window which matches with 
        // prefix of s
        int L = 0, R = 0;
  
        for(int i = 1; i < n; ++i) {
  
            // if i>R nothing matches so we will calculate.
            // Z[i] using naive way.
            if(i > R){
  
                L = R = i;
  
                // R-L = 0 in starting, so it will start
                // checking from 0'th index. For example,
                // for "ababab" and i = 1, the value of R
                // remains 0 and Z[i] becomes 0. For string
                // "aaaaaa" and i = 1, Z[i] and R become 5
  
                while(R < n && str.charAt(R - L) == str.charAt(R))
                    R++;
                  
                Z[i] = R - L;
                R--;
  
            }
            else{
  
                // k = i-L so k corresponds to number which
                // matches in [L,R] interval.
                int k = i - L;
  
                // if Z[k] is less than remaining interval
                // then Z[i] will be equal to Z[k].
                // For example, str = "ababab", i = 3, R = 5
                // and L = 2
                if(Z[k] < R - i + 1)
                    Z[i] = Z[k];
  
                // For example str = "aaaaaa" and i = 2, R is 5,
                // L is 0
                else{
  
  
                // else start from R and check manually
                    L = i;
                    while(R < n && str.charAt(R - L) == str.charAt(R))
                        R++;
                      
                    Z[i] = R - L;
                    R--;
                }
            }
        }
    }
      
    // Driver program
    public static void main(String[] args) 
    {
        String text = "GEEKS FOR GEEKS";
        String pattern = "GEEK";
  
        search(text, pattern);
    }
}
  
// This code is contributed by PavanKoli.


Python3
# Python3 program that implements Z algorithm
# for pattern searching
  
# Fills Z array for given string str[]
def getZarr(string, z):
    n = len(string)
  
    # [L,R] make a window which matches
    # with prefix of s
    l, r, k = 0, 0, 0
    for i in range(1, n):
  
        # if i>R nothing matches so we will calculate.
        # Z[i] using naive way.
        if i > r:
            l, r = i, i
  
            # R-L = 0 in starting, so it will start
            # checking from 0'th index. For example,
            # for "ababab" and i = 1, the value of R
            # remains 0 and Z[i] becomes 0. For string
            # "aaaaaa" and i = 1, Z[i] and R become 5
            while r < n and string[r - l] == string[r]:
                r += 1
            z[i] = r - l
            r -= 1
        else:
  
            # k = i-L so k corresponds to number which
            # matches in [L,R] interval.
            k = i - l
  
            # if Z[k] is less than remaining interval
            # then Z[i] will be equal to Z[k].
            # For example, str = "ababab", i = 3, R = 5
            # and L = 2
            if z[k] < r - i + 1:
                z[i] = z[k]
  
            # For example str = "aaaaaa" and i = 2, 
            # R is 5, L is 0
            else:
  
                # else start from R and check manually
                l = i
                while r < n and string[r - l] == string[r]:
                    r += 1
                z[i] = r - l
                r -= 1
  
# prints all occurrences of pattern 
# in text using Z algo
def search(text, pattern):
  
    # Create concatenated string "P$T"
    concat = pattern + "$" + text
    l = len(concat)
  
    # Construct Z array
    z = [0] * l
    getZarr(concat, z)
  
    # now looping through Z array for matching condition
    for i in range(l):
  
        # if Z[i] (matched region) is equal to pattern
        # length we got the pattern
        if z[i] == len(pattern):
            print("Pattern found at index", 
                      i - len(pattern) - 1)
  
# Driver Code
if __name__ == "__main__":
    text = "GEEKS FOR GEEKS"
    pattern = "GEEK"
    search(text, pattern)
  
# This code is conributed by
# sanjeev2552


C#
// A C# program that implements Z 
// algorithm for pattern searching 
using System;
  
class GFG
{
  
// prints all occurrences of 
// pattern in text using Z algo 
public static void search(string text,
                          string pattern)
{
  
    // Create concatenated string "P$T" 
    string concat = pattern + "$" + text;
  
    int l = concat.Length;
  
    int[] Z = new int[l];
  
    // Construct Z array 
    getZarr(concat, Z);
  
    // now looping through Z array
    // for matching condition 
    for (int i = 0; i < l; ++i)
    {
  
        // if Z[i] (matched region) is equal 
        // to pattern length we got the pattern 
  
        if (Z[i] == pattern.Length)
        {
            Console.WriteLine("Pattern found at index " + 
                             (i - pattern.Length - 1));
        }
    }
}
  
// Fills Z array for given string str[] 
private static void getZarr(string str,
                            int[] Z)
{
  
    int n = str.Length;
  
    // [L,R] make a window which 
    // matches with prefix of s 
    int L = 0, R = 0;
  
    for (int i = 1; i < n; ++i)
    {
  
        // if i>R nothing matches so we will 
        // calculate. Z[i] using naive way. 
        if (i > R)
        {
            L = R = i;
  
            // R-L = 0 in starting, so it will start 
            // checking from 0'th index. For example, 
            // for "ababab" and i = 1, the value of R 
            // remains 0 and Z[i] becomes 0. For string 
            // "aaaaaa" and i = 1, Z[i] and R become 5 
            while (R < n && str[R - L] == str[R])
            {
                R++;
            }
  
            Z[i] = R - L;
            R--;
  
        }
        else
        {
  
            // k = i-L so k corresponds to number 
            // which matches in [L,R] interval. 
            int k = i - L;
  
            // if Z[k] is less than remaining interval 
            // then Z[i] will be equal to Z[k]. 
            // For example, str = "ababab", i = 3, 
            // R = 5 and L = 2 
            if (Z[k] < R - i + 1)
            {
                Z[i] = Z[k];
            }
  
            // For example str = "aaaaaa" and 
            // i = 2, R is 5, L is 0 
            else
            {
  
  
                // else start from R and 
                // check manually 
                L = i;
                while (R < n && str[R - L] == str[R])
                {
                    R++;
                }
  
                Z[i] = R - L;
                R--;
            }
        }
    }
}
  
// Driver Code 
public static void Main(string[] args)
{
    string text = "GEEKS FOR GEEKS";
    string pattern = "GEEK";
  
    search(text, pattern);
}
}
  
// This code is contributed by Shrikant13


输出:

Pattern found at index 0
Pattern found at index 10