📌  相关文章
📜  给定 N 个数组的最长公共子数组的长度

📅  最后修改于: 2022-05-13 01:57:35.980000             🧑  作者: Mango

给定 N 个数组的最长公共子数组的长度

给定一个包含N个数组的二维数组array[][] ,任务是在N个数组中找到最长的公共子数组(LCS)。

例子:

方法:很明显LCS的长度可以二分查找。也就是说,如果有一个长度为L的公共子数组,那么总会有一个长度小于L的公共子数组。因此,二分查找框架如下:

所以,这里的重点是检查是否有一些常见的长度为中间的子数组。一种常用的方法是采用散列,即Rabin Karp Hashing。

这里最方便的一点是Hash(S[0…i])可用于在O(1)时间内计算Hash(S[l…r]) ,准备时间为O(N) 。那是,

因此,可以从两个给定的数组中找到长度为中间的子数组的所有哈希值,然后检查是否有重叠。这个过程可以通过 O(|S|) 中的哈希表或 O(|S|log|S|) 中的 Set(平衡二叉搜索树)来完成。因此,Binary Search + Hash 可以在 O(|S| log|S|) 时间内解决这个问题。请按照以下步骤解决此问题:

  • 将变量min_len初始化为可能的最大长度,即INT_MAX
  • 使用变量i遍历范围[0, N)并执行以下任务:
    • min_len的值设置为min_lenarray[i].size() 的最小值。
  • 初始化变量start0 , endmin_lenmid0以对长度执行二进制搜索。
  • 遍历 while 循环,直到start小于等于end并执行以下步骤:
    • mid的值设置为startend 的平均值。
    • 调用函数check(array, mid)来检查长度mid是否可以作为答案或不使用 Rabin-karp 散列。
    • 如果函数返回true,则将start的值设置为mid+1 ,否则将end设置为mid-1。
  • 执行上述步骤后,打印end的值作为答案。

下面是上述方法的实现

C++
// C++ program for the above approach
#include 
using namespace std;
 
const long long p = 1299827;
const long long mod = 1e11 + 7;
long long M;
 
// Function to implement rabin - carp
// hashing to check whether the given length
// is possible or not
bool check(vector >& array, int len)
{
    if (len == 0)
        return true;
    map freq;
    for (int i = 0; i < M; i++) {
        long long curr_hash = 0, pow = 1;
        set found_hashes;
        for (int j = 0; j < len; j++) {
            curr_hash = (curr_hash * p) % mod;
            curr_hash += array[i][j];
            if (j != len - 1)
                pow = (pow * p) % mod;
        }
        found_hashes.insert(curr_hash);
        for (int j = len; j < array[i].size(); j++) {
            curr_hash += mod;
            curr_hash -= (array[i][j - len] * pow) % mod;
            curr_hash %= mod;
            curr_hash = curr_hash * p;
            curr_hash %= mod;
            curr_hash += array[i][j];
            curr_hash %= mod;
            found_hashes.insert(curr_hash);
        }
        while (found_hashes.size()) {
            long long h = *(found_hashes.begin());
            found_hashes.erase(found_hashes.begin());
            freq[h]++;
            if (freq[h] == M)
                return true;
        }
    }
    return false;
}
 
// Function to find the longest common sub-array
// from the given N arrays
int longestCommonSubpath(long long N,
                         vector >& array)
{
 
    M = N;
 
    // Find the maximum length possible
    int minlen = INT_MAX;
    for (int i = 0; i < array.size(); i++) {
        minlen = min(minlen, (int)array[i].size());
    }
 
    // Binary search on the length
    int start = 0, end = minlen, mid = 0;
    while (start <= end) {
        int mid = (start + end) / 2;
 
        // Function Call to check whether
        // it is possible or not
        if (check(array, mid)) {
            start = mid + 1;
        }
        else {
            end = mid - 1;
        }
    }
    return end;
}
 
// Driver Code
int main()
{
    vector > arr{ { 0, 1, 2, 3, 4 },
                              { 2, 3, 4 },
                              { 4, 0, 1, 2, 3 } };
 
    long long N = arr.size();
 
    cout << longestCommonSubpath(N, arr);
 
    return 0;
}


Java
// Java program for the above approach
import java.util.HashMap;
import java.util.HashSet;
 
class GFG {
 
  static long p = 1299827;
  static long mod = (long) 1E11 + 7;
  static long M;
 
  // Function to implement rabin - carp
  // hashing to check whether the given length
  // is possible or not
  static boolean check(int[][] array, int len) {
    if (len == 0)
      return true;
    HashMap freq = new HashMap();
    for (int i = 0; i < M; i++) {
      long curr_hash = 0, pow = 1;
      HashSet found_hashes = new HashSet();
      for (int j = 0; j < len; j++) {
        curr_hash = (curr_hash * p) % mod;
        curr_hash += array[i][j];
        if (j != len - 1)
          pow = (pow * p) % mod;
      }
      found_hashes.add(curr_hash);
      for (int j = len; j < array[i].length; j++) {
        curr_hash += mod;
        curr_hash -= (array[i][j - len] * pow) % mod;
        curr_hash %= mod;
        curr_hash = curr_hash * p;
        curr_hash %= mod;
        curr_hash += array[i][j];
        curr_hash %= mod;
        found_hashes.add(curr_hash);
      }
      while (found_hashes.size() > 0) {
        long h = found_hashes.iterator().next();
        found_hashes.remove(h);
        if (freq.containsKey(h)) {
          freq.put(h, freq.get(h) + 1);
        } else {
          freq.put(h, 1);
        }
        if (freq.get(h) == M)
          return true;
      }
    }
    return false;
  }
 
  // Function to find the longest common sub-array
  // from the given N arrays
  public static int longestCommonSubpath(long N, int[][] array) {
 
    M = N;
 
    // Find the maximum length possible
    int minlen = Integer.MAX_VALUE;
    for (int i = 0; i < array.length; i++) {
      minlen = Math.min(minlen, (int) array[i].length);
    }
 
    // Binary search on the length
    int start = 0, end = minlen, mid = 0;
    while (start <= end) {
      mid = (start + end) / 2;
 
      // Function Call to check whether
      // it is possible or not
      if (check(array, mid)) {
        start = mid + 1;
      } else {
        end = mid - 1;
      }
    }
    return end;
  }
 
  // Driver Code
  public static void main(String args[]) {
    int[][] arr = { { 0, 1, 2, 3, 4 }, { 2, 3, 4 }, { 4, 0, 1, 2, 3 } };
 
    long N = arr.length;
 
    System.out.println(longestCommonSubpath(N, arr));
  }
}
 
// This code is contributed by gfgking.


Python3
# Python Program to implement
# the above approach
p = 1299827
mod = 1e11 + 7
M = None
 
# Function to implement rabin - carp
# hashing to check whether the given length
# is possible or not
def check(array, _len, M):
    if (_len == 0):
        return True
    freq = {}
 
    for i in range(M):
        curr_hash = 0
        pow = 1
        found_hashes = set()
        for j in range(_len):
            curr_hash = (curr_hash * p) % mod
            curr_hash = curr_hash + array[i][j]
            if (j != _len - 1):
                pow = (pow * p) % mod
 
        found_hashes.add(curr_hash)
        for j in range(_len, len(array[i])):
            curr_hash = curr_hash + mod
            curr_hash = curr_hash - (array[i][j - _len] * pow) % mod
            curr_hash = curr_hash % mod
            curr_hash = curr_hash * p
            curr_hash = curr_hash % mod
            curr_hash = curr_hash + array[i][j]
            curr_hash = curr_hash % mod
            found_hashes.add(curr_hash)
        while (len(found_hashes) != 0):
            it = list(found_hashes)
 
            # get first entry:
            h = it[0]
            found_hashes.remove(h)
 
            if (h not in freq):
                freq[h] = 1
            else:
                freq[h] += 1
 
            if (h in freq and freq[h] == M):
                return True
    return False
 
# Function to find the longest common sub-array
# from the given N arrays
def longestCommonSubpath(N, array):
    M = N
 
    # Find the maximum length possible
    minlen = 10 ** 9
    for i in range(len(array)):
        minlen = min(minlen, len(array[i]))
 
    # Binary search on the length
    start = 0
    end = minlen
    mid = 0
    while (start <= end):
        mid = (start + end) // 2
 
        # Function Call to check whether
        # it is possible or not
        if (check(array, mid, M)):
            start = mid + 1
        else:
            end = mid - 1
    return end
 
# Driver Code
arr = [[0, 1, 2, 3, 4], [2, 3, 4], [4, 0, 1, 2, 3]]
 
N = len(arr)
print(longestCommonSubpath(N, arr))
 
# This code is contributed by Saurabh Jaiswal


Javascript


输出
2

时间复杂度: O(N*log(N))
辅助空间: O(N)