📜  编辑距离 | DP 使用记忆化

📅  最后修改于: 2021-09-17 06:45:22             🧑  作者: Mango

给定两个字符串str1 和 str2 以及可以在 str1 上执行的操作。找出将“str1”转换为“str2”所需的最少编辑(操作)次数。

  • 插入
  • 消除
  • 代替

以上所有操作都是等价的。

例子:

这种情况下的子问题是什么?这个想法是从两个字符串的左侧或右侧开始一个一个地处理所有字符。让我们从右角开始遍历,每对被遍历的字符都有两种可能性。以下是条件:

  1. 如果两个字符串的最后一个字符相同,则没什么可做的。忽略最后一个字符并获取剩余字符串的计数。所以我们重复长度为 m-1 和 n-1。
  2. 否则(如果最后一个字符不相同),我们考虑对 ‘str1’ 的所有操作,考虑对第一个字符串 的最后一个字符的所有三个操作,递归计算所有三个操作的最小成本,并取三个值的最小值。
    • 插入:重复 m 和 n-1
    • 删除:重复 m-1 和 n
    • 替换:对 m-1 和 n-1 重复

下面是上述方法的实现:

C++
// A Naive recursive C++ program to find minimum number
// operations to convert str1 to str2
#include 
using namespace std;
 
// Utility function to find minimum of three numbers
int min(int x, int y, int z)
{
    return min(min(x, y), z);
}
 
int editDist(string str1, string str2, int m, int n)
{
    // If first string is empty, the only option is to
    // insert all characters of second string into first
    if (m == 0)
        return n;
 
    // If second string is empty, the only option is to
    // remove all characters of first string
    if (n == 0)
        return m;
 
    // If last characters of two strings are same, nothing
    // much to do. Ignore last characters and get count for
    // remaining strings.
    if (str1[m - 1] == str2[n - 1])
        return editDist(str1, str2, m - 1, n - 1);
 
    // If last characters are not same, consider all three
    // operations on last character of first string, recursively
    // compute minimum cost for all three operations and take
    // minimum of three values.
    return 1 + min(editDist(str1, str2, m, n - 1), // Insert
                   editDist(str1, str2, m - 1, n), // Remove
                   editDist(str1, str2, m - 1, n - 1) // Replace
                   );
}
 
// Driver program
int main()
{
    // your code goes here
    string str1 = "sunday";
    string str2 = "saturday";
 
    cout << editDist(str1, str2, str1.length(), str2.length());
 
    return 0;
}


Java
// A Naive recursive Java program to find minimum number
// operations to convert str1 to str2
class EDIST {
    static int min(int x, int y, int z)
    {
        if (x <= y && x <= z)
            return x;
        if (y <= x && y <= z)
            return y;
        else
            return z;
    }
 
    static int editDist(String str1, String str2, int m, int n)
    {
        // If first string is empty, the only option is to
        // insert all characters of second string into first
        if (m == 0)
            return n;
 
        // If second string is empty, the only option is to
        // remove all characters of first string
        if (n == 0)
            return m;
 
        // If last characters of two strings are same, nothing
        // much to do. Ignore last characters and get count for
        // remaining strings.
        if (str1.charAt(m - 1) == str2.charAt(n - 1))
            return editDist(str1, str2, m - 1, n - 1);
 
        // If last characters are not same, consider all three
        // operations on last character of first string, recursively
        // compute minimum cost for all three operations and take
        // minimum of three values.
        return 1 + min(editDist(str1, str2, m, n - 1), // Insert
                       editDist(str1, str2, m - 1, n), // Remove
                       editDist(str1, str2, m - 1, n - 1) // Replace
                       );
    }
 
    public static void main(String args[])
    {
        String str1 = "sunday";
        String str2 = "saturday";
 
        System.out.println(editDist(str1, str2, str1.length(), str2.length()));
    }
}


Python
# A Naive recursive Python program to find minimum number
# operations to convert str1 to str2
def editDistance(str1, str2, m, n):
 
    # If first string is empty, the only option is to
    # insert all characters of second string into first
    if m == 0:
         return n
 
    # If second string is empty, the only option is to
    # remove all characters of first string
    if n == 0:
        return m
 
    # If last characters of two strings are same, nothing
    # much to do. Ignore last characters and get count for
    # remaining strings.
    if str1[m-1]== str2[n-1]:
        return editDistance(str1, str2, m-1, n-1)
 
    # If last characters are not same, consider all three
    # operations on last character of first string, recursively
    # compute minimum cost for all three operations and take
    # minimum of three values.
    return 1 + min(editDistance(str1, str2, m, n-1),    # Insert
                   editDistance(str1, str2, m-1, n),    # Remove
                   editDistance(str1, str2, m-1, n-1)    # Replace
                   )
 
# Driver program to test the above function
str1 = "sunday"
str2 = "saturday"
print editDistance(str1, str2, len(str1), len(str2))


C#
// A Naive recursive C# program to
// find minimum numberoperations
// to convert str1 to str2
using System;
 
class GFG {
    static int min(int x, int y, int z)
    {
        if (x <= y && x <= z)
            return x;
        if (y <= x && y <= z)
            return y;
        else
            return z;
    }
 
    static int editDist(String str1, String str2, int m, int n)
    {
        // If first string is empty, the only option is to
        // insert all characters of second string into first
        if (m == 0)
            return n;
 
        // If second string is empty, the only option is to
        // remove all characters of first string
        if (n == 0)
            return m;
 
        // If last characters of two strings are same, nothing
        // much to do. Ignore last characters and get count for
        // remaining strings.
        if (str1[m - 1] == str2[n - 1])
            return editDist(str1, str2, m - 1, n - 1);
 
        // If last characters are not same, consider all three
        // operations on last character of first string, recursively
        // compute minimum cost for all three operations and take
        // minimum of three values.
        return 1 + min(editDist(str1, str2, m, n - 1), // Insert
                       editDist(str1, str2, m - 1, n), // Remove
                       editDist(str1, str2, m - 1, n - 1) // Replace
                       );
    }
 
    // Driver code
    public static void Main()
    {
        String str1 = "sunday";
        String str2 = "saturday";
        Console.WriteLine(editDist(str1, str2, str1.Length,
                                   str2.Length));
    }
}


Javascript


C++
// A memoization program to find minimum number
// operations to convert str1 to str2
#include 
using namespace std;
 
// Maximum 2-D array column size
const int maximum = 1000;
 
// Utility function to find minimum of three numbers
int min(int x, int y, int z)
{
    return min(min(x, y), z);
}
 
int editDist(string str1, string str2, int m, int n, int dp[][maximum])
{
    // If first string is empty, the only option is to
    // insert all characters of second string into first
    if (m == 0)
        return n;
 
    // If second string is empty, the only option is to
    // remove all characters of first string
    if (n == 0)
        return m;
 
    // if the recursive call has been
    // called previously, then return
    // the stored value that was calculated
    // previously
    if (dp[m - 1][n - 1] != -1)
        return dp[m - 1][n - 1];
 
    // If last characters of two strings are same, nothing
    // much to do. Ignore last characters and get count for
    // remaining strings.
 
    // Store the returned value at dp[m-1][n-1]
    // considering 1-based indexing
    if (str1[m - 1] == str2[n - 1])
        return dp[m - 1][n - 1] = editDist(str1, str2, m - 1, n - 1, dp);
 
    // If last characters are not same, consider all three
    // operations on last character of first string, recursively
    // compute minimum cost for all three operations and take
    // minimum of three values.
 
    // Store the returned value at dp[m-1][n-1]
    // considering 1-based indexing
    return dp[m - 1][n - 1] = 1 + min(editDist(str1, str2, m, n - 1, dp), // Insert
                                      editDist(str1, str2, m - 1, n, dp), // Remove
                                      editDist(str1, str2, m - 1, n - 1, dp) // Replace
                                      );
}
 
// Driver Code
int main()
{
 
    string str1 = "sunday";
    string str2 = "saturday";
    int m = str1.length();
    int n = str2.length();
 
    // Declare a dp array which stores
    // the answer to recursive calls
    int dp[m][maximum];
 
    // initially all index with -1
    memset(dp, -1, sizeof dp);
 
    // Function call
    // memoization and top-down approach
    cout << editDist(str1, str2, m, n, dp);
 
    return 0;
}


Python3
# A memoization program to find minimum number
# operations to convert str1 to str2
def editDistance(str1, str2, m, n, d = {}):
     
    key = m, n
 
    # If first string is empty, the only option
    # is to insert all characters of second
    # string into first
    if m == 0:
        return n
 
    # If second string is empty, the only
    # option is to remove all characters
    # of first string
    if n == 0:
        return m
 
    if key in d:
        return d[key]
         
    # If last characters of two strings are same,
    # nothing much to do. Ignore last characters
    # and get count for remaining strings.
    if str1[m - 1] == str2[n - 1]:
        return editDistance(str1, str2, m - 1, n - 1)
 
    # If last characters are not same, consider
    # all three operations on last character of
    # first string, recursively compute minimum
    # cost for all three operations and take
    # minimum of three values.
     
    # Store the returned value at dp[m-1][n-1]
    # considering 1-based indexing
    d[key] = 1 + min(editDistance(str1, str2, m, n - 1), # Insert
                     editDistance(str1, str2, m - 1, n), # Remove
                     editDistance(str1, str2, m - 1, n - 1)) # Replace
    return d[key]
 
# Driver code
str1 = "sunday"
str2 = "saturday"
 
print(editDistance(str1, str2, len(str1), len(str2)))
 
# This code is contributed by puranjanprithu


输出:

3

上述解决方案的时间复杂度是 O(3^n) 是指数的。最坏的情况发生在两个字符串的字符都不匹配时。下面是最坏情况的递归调用图。

编辑距离

我们可以看到很多子问题都解决了,一次又一次,例如eD(2, 2)被调用了3次。由于再次调用相同的子问题,因此该问题具有重叠子问题的属性。所以编辑距离问题具有动态规划问题的两个属性(见this和this)。与其他典型的动态规划 (DP) 问题一样,可以通过构造一个存储子问题结果的临时数组来避免对相同子问题的重新计算。可以在此处找到自下而上的方法。

这个问题也可以使用自顶向下的动态规划和记忆法来解决。在递归代码中,可以使用记忆化来避免重叠问题。如果第一次调用时存储了该值,则可以在 O(1) 中计算多个重复调用。在观察递归代码时,可以看到最多两个参数在每次递归调用时改变它们的值。会出现先前已调用相同递归调用的情况。由于两个参数不是常数,因此可以使用二维数组来避免重复调用。因此,返回值存储在某个二维数组中。以下是步骤:

  • 用 -1 在所有索引处初始化大小为 m *n 的二维 DP 数组。
  • 在每次递归调用时,将返回值存储在 dp[m][n] 中,这样如果再次调用func(m, n) ,它就可以在 O(1) 中回答,而无需使用递归。
  • 通过检查 dp[m][n] 处的值来检查递归调用之前是否被访问过。

下面是上述方法的实现:

C++

// A memoization program to find minimum number
// operations to convert str1 to str2
#include 
using namespace std;
 
// Maximum 2-D array column size
const int maximum = 1000;
 
// Utility function to find minimum of three numbers
int min(int x, int y, int z)
{
    return min(min(x, y), z);
}
 
int editDist(string str1, string str2, int m, int n, int dp[][maximum])
{
    // If first string is empty, the only option is to
    // insert all characters of second string into first
    if (m == 0)
        return n;
 
    // If second string is empty, the only option is to
    // remove all characters of first string
    if (n == 0)
        return m;
 
    // if the recursive call has been
    // called previously, then return
    // the stored value that was calculated
    // previously
    if (dp[m - 1][n - 1] != -1)
        return dp[m - 1][n - 1];
 
    // If last characters of two strings are same, nothing
    // much to do. Ignore last characters and get count for
    // remaining strings.
 
    // Store the returned value at dp[m-1][n-1]
    // considering 1-based indexing
    if (str1[m - 1] == str2[n - 1])
        return dp[m - 1][n - 1] = editDist(str1, str2, m - 1, n - 1, dp);
 
    // If last characters are not same, consider all three
    // operations on last character of first string, recursively
    // compute minimum cost for all three operations and take
    // minimum of three values.
 
    // Store the returned value at dp[m-1][n-1]
    // considering 1-based indexing
    return dp[m - 1][n - 1] = 1 + min(editDist(str1, str2, m, n - 1, dp), // Insert
                                      editDist(str1, str2, m - 1, n, dp), // Remove
                                      editDist(str1, str2, m - 1, n - 1, dp) // Replace
                                      );
}
 
// Driver Code
int main()
{
 
    string str1 = "sunday";
    string str2 = "saturday";
    int m = str1.length();
    int n = str2.length();
 
    // Declare a dp array which stores
    // the answer to recursive calls
    int dp[m][maximum];
 
    // initially all index with -1
    memset(dp, -1, sizeof dp);
 
    // Function call
    // memoization and top-down approach
    cout << editDist(str1, str2, m, n, dp);
 
    return 0;
}

蟒蛇3

# A memoization program to find minimum number
# operations to convert str1 to str2
def editDistance(str1, str2, m, n, d = {}):
     
    key = m, n
 
    # If first string is empty, the only option
    # is to insert all characters of second
    # string into first
    if m == 0:
        return n
 
    # If second string is empty, the only
    # option is to remove all characters
    # of first string
    if n == 0:
        return m
 
    if key in d:
        return d[key]
         
    # If last characters of two strings are same,
    # nothing much to do. Ignore last characters
    # and get count for remaining strings.
    if str1[m - 1] == str2[n - 1]:
        return editDistance(str1, str2, m - 1, n - 1)
 
    # If last characters are not same, consider
    # all three operations on last character of
    # first string, recursively compute minimum
    # cost for all three operations and take
    # minimum of three values.
     
    # Store the returned value at dp[m-1][n-1]
    # considering 1-based indexing
    d[key] = 1 + min(editDistance(str1, str2, m, n - 1), # Insert
                     editDistance(str1, str2, m - 1, n), # Remove
                     editDistance(str1, str2, m - 1, n - 1)) # Replace
    return d[key]
 
# Driver code
str1 = "sunday"
str2 = "saturday"
 
print(editDistance(str1, str2, len(str1), len(str2)))
 
# This code is contributed by puranjanprithu

输出:

3

时间复杂度:O(M * N)
辅助空间:O(M * N)

如果您希望与专家一起参加现场课程,请参阅DSA 现场工作专业课程学生竞争性编程现场课程