📜  如何在C++中有效使用unordered_map

📅  最后修改于: 2021-05-30 02:36:10             🧑  作者: Mango

前提条件:无序集,无序图

C++提供std :: unordered_set和std :: unordered_map分别用作哈希集和哈希图。它们以恒定的平均时间执行插入/删除/访问。

  1. 但是,最坏情况下的复杂度是O(n 2 )。
  2. 原因是unordered_map通过将输入值的模数乘以质数来存储键值对,然后将其存储在哈希表中。
  3. 当输入数据很大并且输入值是此质数的倍数时,会发生很多冲突,并且可能会导致O(n 2 )的复杂性。
  4. 根据编译器的质数可能是107897或126271。

示例1:如果我们插入上述两个质数的倍数并计算执行时间。一个质数比另一个质数要花费更长的时间。

C++
// C++ program to determine worst case
// time complexity of an unordered_map
  
#include 
using namespace std;
using namespace std::chrono;
int N = 55000;
int prime1 = 107897;
int prime2 = 126271;
  
void insert(int prime)
{
  
    // Starting the clock
    auto start
        = high_resolution_clock::now();
  
    unordered_map umap;
  
    // Inserting multiples of prime
    // number as key in the map
    for (int i = 1; i <= N; i++)
        umap[i * prime] = i;
  
    // Stopping the clock
    auto stop
        = high_resolution_clock::now();
  
    // Typecasting the time to
    // milliseconds
    auto duration
        = duration_cast(
            stop - start);
  
    // Time in seconds
    cout << "for " << prime << " : "
         << duration.count() / 1000.0
         << " seconds "
         << endl;
}
  
// Driver code
int main()
{
    // Function call for prime 1
    insert(prime1);
  
    // Function call for prime 2
    insert(prime2);
}


C++
struct hash {
    size_t operator()(uint64_t x)
        const { return x; }
};


C++
struct modified_hash {
    static uint64_t splitmix64(uint64_t x)
    {
  
        // 0x9e3779b97f4a7c15,
        // 0xbf58476d1ce4e5b9,
        // 0x94d049bb133111eb are numbers
        // that are obtained by dividing
        // high powers of two with Phi
        // (1.6180..) In this way the
        // value of x is modified
        // to evenly distribute
        // keys in hash table
        x += 0x9e3779b97f4a7c15;
        x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9;
        x = (x ^ (x >> 27)) * 0x94d049bb133111eb;
        return x ^ (x >> 31);
    }
  
    int operator()(uint64_t x) const
    {
        static const uint64_t random
            = steady_clock::now()
                  .time_since_epoch()
                  .count();
  
        // The above line generates a
        // random number using
        // high precision clock
        return splitmix64(
  
            // It returns final hash value
            x + random);
    }
};


C++
// C++ program to determine worst case
// time complexity of an unordered_map
// using modified hash function
  
#include 
using namespace std;
using namespace std::chrono;
  
struct modified_hash {
  
    static uint64_t splitmix64(uint64_t x)
    {
        x += 0x9e3779b97f4a7c15;
        x = (x ^ (x >> 30))
            * 0xbf58476d1ce4e5b9;
        x = (x ^ (x >> 27))
            * 0x94d049bb133111eb;
        return x ^ (x >> 31);
    }
  
    int operator()(uint64_t x) const
    {
        static const uint64_t random
            = steady_clock::now()
                  .time_since_epoch()
                  .count();
        return splitmix64(x + random);
    }
};
  
int N = 55000;
int prime1 = 107897;
int prime2 = 126271;
  
// Function to insert in the hashMap
void insert(int prime)
{
    auto start = high_resolution_clock::now();
  
    // Third argument in initialisation
    // of unordered_map ensures that
    // the map uses the hash function
    unordered_map
        umap;
  
    // Inserting multiples of prime
    // number as key in the map
    for (int i = 1; i <= N; i++)
        umap[i * prime] = i;
  
    auto stop
        = high_resolution_clock::now();
  
    auto duration
        = duration_cast(
            stop - start);
  
    cout << "for " << prime << " : "
         << duration.count() / 1000.0
         << " seconds "
         << endl;
}
  
// Driver Code
int main()
{
    // Function call for prime 1
    insert(prime1);
  
    // Function call for prime 2
    insert(prime2);
}


输出:
for 107897 : 2.261 seconds 
for 126271 : 0.024 seconds

显然,对于素数之一,时间复杂度为O(n 2 )。

unordered_map可以使用的标准内置哈希函数与此类似:

C++

struct hash {
    size_t operator()(uint64_t x)
        const { return x; }
};

上面的函数会产生大量的冲突。插入HashMap中的密钥分布不均,插入大量的质数倍数后,进一步的插入会导致哈希函数将所有先前的密钥重新分配给新的插槽,从而使其变慢。因此,我们的想法是我们必须随机化哈希函数。
想法是使用一种方法,使哈希图中的键均匀分布。这样可以防止发生碰撞。为此,我们使用斐波那契数。与斐波那契数列( Phi = 1.618 )相关的黄金分割率具有一种特性,它可以均匀地细分任何范围,而无需循环回到起始位置。

我们可以创建自己的简单哈希函数。下面是哈希函数:

C++

struct modified_hash {
    static uint64_t splitmix64(uint64_t x)
    {
  
        // 0x9e3779b97f4a7c15,
        // 0xbf58476d1ce4e5b9,
        // 0x94d049bb133111eb are numbers
        // that are obtained by dividing
        // high powers of two with Phi
        // (1.6180..) In this way the
        // value of x is modified
        // to evenly distribute
        // keys in hash table
        x += 0x9e3779b97f4a7c15;
        x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9;
        x = (x ^ (x >> 27)) * 0x94d049bb133111eb;
        return x ^ (x >> 31);
    }
  
    int operator()(uint64_t x) const
    {
        static const uint64_t random
            = steady_clock::now()
                  .time_since_epoch()
                  .count();
  
        // The above line generates a
        // random number using
        // high precision clock
        return splitmix64(
  
            // It returns final hash value
            x + random);
    }
};

基本上,以上散列函数生成随机散列值以存储密钥。要了解有关此内容的更多信息,请参阅本文斐波那契哈希。

示例2:使用上面的哈希函数,该程序可以非常快速地运行。

C++

// C++ program to determine worst case
// time complexity of an unordered_map
// using modified hash function
  
#include 
using namespace std;
using namespace std::chrono;
  
struct modified_hash {
  
    static uint64_t splitmix64(uint64_t x)
    {
        x += 0x9e3779b97f4a7c15;
        x = (x ^ (x >> 30))
            * 0xbf58476d1ce4e5b9;
        x = (x ^ (x >> 27))
            * 0x94d049bb133111eb;
        return x ^ (x >> 31);
    }
  
    int operator()(uint64_t x) const
    {
        static const uint64_t random
            = steady_clock::now()
                  .time_since_epoch()
                  .count();
        return splitmix64(x + random);
    }
};
  
int N = 55000;
int prime1 = 107897;
int prime2 = 126271;
  
// Function to insert in the hashMap
void insert(int prime)
{
    auto start = high_resolution_clock::now();
  
    // Third argument in initialisation
    // of unordered_map ensures that
    // the map uses the hash function
    unordered_map
        umap;
  
    // Inserting multiples of prime
    // number as key in the map
    for (int i = 1; i <= N; i++)
        umap[i * prime] = i;
  
    auto stop
        = high_resolution_clock::now();
  
    auto duration
        = duration_cast(
            stop - start);
  
    cout << "for " << prime << " : "
         << duration.count() / 1000.0
         << " seconds "
         << endl;
}
  
// Driver Code
int main()
{
    // Function call for prime 1
    insert(prime1);
  
    // Function call for prime 2
    insert(prime2);
}
输出:
for 107897 : 0.025 seconds 
for 126271 : 0.024 seconds
要从最佳影片策划和实践问题去学习,检查了C++基础课程为基础,以先进的C++和C++ STL课程基础加上STL。要完成从学习语言到DS Algo等的更多准备工作,请参阅“完整面试准备课程”