📜  使用Trie的自动完成功能

📅  最后修改于: 2021-04-17 11:41:35             🧑  作者: Mango

给我们一个Trie,其中存储了一组字符串。现在,用户输入了他的搜索查询的前缀,我们需要为他提供所有建议,以便根据存储在Trie中的字符串自动完成他的查询。我们假设Trie存储用户过去的搜索。

例如,如果Trie商店{“ abc”,“ abcd”,“ aa”,“ abbbaba”}和用户键入“ ab”,则必须向他显示{“ abc”,“ abcd”,“ abbbaba”}。

前提条件Trie搜索和插入。

给定查询前缀,我们搜索所有具有该查询的单词。

  1. 使用标准Trie搜索算法搜索给定查询。
  2. 如果查询前缀本身不存在,则返回-1表示相同。
  3. 如果查询存在并且在Trie中是单词的结尾,则打印查询。可以通过查看最后一个匹配的节点是否设置了isEndWord标志来快速检查。我们在Trie中使用此标志来标记单词节点的末尾,以进行搜索。
  4. 如果查询的最后一个匹配节点没有子节点,则返回。
  5. 否则,递归地打印最后一个匹配节点的子树下的所有节点。

以下是上述步骤的一些实现。

C++
// C++ program to demonstrate auto-complete feature
// using Trie data structure.
#include
using namespace std;
  
// Alphabet size (# of symbols)
#define ALPHABET_SIZE (26)
  
// Converts key current character into index
// use only 'a' through 'z' and lower case
#define CHAR_TO_INDEX(c) ((int)c - (int)'a')
  
// trie node
struct TrieNode
{
    struct TrieNode *children[ALPHABET_SIZE];
  
    // isWordEnd is true if the node represents
    // end of a word
    bool isWordEnd;
};
  
// Returns new trie node (initialized to NULLs)
struct TrieNode *getNode(void)
{
    struct TrieNode *pNode = new TrieNode;
    pNode->isWordEnd = false;
  
    for (int i = 0; i < ALPHABET_SIZE; i++)
        pNode->children[i] = NULL;
  
    return pNode;
}
  
// If not present, inserts key into trie.  If the
// key is prefix of trie node, just marks leaf node
void insert(struct TrieNode *root,  const string key)
{
    struct TrieNode *pCrawl = root;
  
    for (int level = 0; level < key.length(); level++)
    {
        int index = CHAR_TO_INDEX(key[level]);
        if (!pCrawl->children[index])
            pCrawl->children[index] = getNode();
  
        pCrawl = pCrawl->children[index];
    }
  
    // mark last node as leaf
    pCrawl->isWordEnd = true;
}
  
// Returns true if key presents in trie, else false
bool search(struct TrieNode *root, const string key)
{
    int length = key.length();
    struct TrieNode *pCrawl = root;
    for (int level = 0; level < length; level++)
    {
        int index = CHAR_TO_INDEX(key[level]);
  
        if (!pCrawl->children[index])
            return false;
  
        pCrawl = pCrawl->children[index];
    }
  
    return (pCrawl != NULL && pCrawl->isWordEnd);
}
  
// Returns 0 if current node has a child
// If all children are NULL, return 1.
bool isLastNode(struct TrieNode* root)
{
    for (int i = 0; i < ALPHABET_SIZE; i++)
        if (root->children[i])
            return 0;
    return 1;
}
  
// Recursive function to print auto-suggestions for given
// node.
void suggestionsRec(struct TrieNode* root, string currPrefix)
{
    // found a string in Trie with the given prefix
    if (root->isWordEnd)
    {
        cout << currPrefix;
        cout << endl;
    }
  
    // All children struct node pointers are NULL
    if (isLastNode(root))
        return;
  
    for (int i = 0; i < ALPHABET_SIZE; i++)
    {
        if (root->children[i])
        {
            // append current character to currPrefix string
            currPrefix.push_back(97 + i);
  
            // recur over the rest
            suggestionsRec(root->children[i], currPrefix);
            // remove last character
            currPrefix.pop_back();
        }
    }
}
  
// print suggestions for given query prefix.
int printAutoSuggestions(TrieNode* root, const string query)
{
    struct TrieNode* pCrawl = root;
  
    // Check if prefix is present and find the
    // the node (of last level) with last character
    // of given string.
    int level;
    int n = query.length();
    for (level = 0; level < n; level++)
    {
        int index = CHAR_TO_INDEX(query[level]);
  
        // no string in the Trie has this prefix
        if (!pCrawl->children[index])
            return 0;
  
        pCrawl = pCrawl->children[index];
    }
  
    // If prefix is present as a word.
    bool isWord = (pCrawl->isWordEnd == true);
  
    // If prefix is last node of tree (has no
    // children)
    bool isLast = isLastNode(pCrawl);
  
    // If prefix is present as a word, but
    // there is no subtree below the last
    // matching node.
    if (isWord && isLast)
    {
        cout << query << endl;
        return -1;
    }
  
    // If there are are nodes below last
    // matching character.
    if (!isLast)
    {
        string prefix = query;
        suggestionsRec(pCrawl, prefix);
        return 1;
    }
}
  
// Driver Code
int main()
{
    struct TrieNode* root = getNode();
    insert(root, "hello");
    insert(root, "dog");
    insert(root, "hell");
    insert(root, "cat");
    insert(root, "a");
    insert(root, "hel");
    insert(root, "help");
    insert(root, "helps");
    insert(root, "helping");
    int comp = printAutoSuggestions(root, "hel");
  
    if (comp == -1)
        cout << "No other strings found with this prefix\n";
  
    else if (comp == 0)
        cout << "No string found with this prefix\n";
  
    return 0;
}


Python3
# Python3 program to demonstrate auto-complete 
# feature using Trie data structure.
# Note: This is a basic implementation of Trie
# and not the most optimized one.
class TrieNode():
    def __init__(self):
          
        # Initialising one node for trie
        self.children = {}
        self.last = False
  
class Trie():
    def __init__(self):
          
        # Initialising the trie structure.
        self.root = TrieNode()
        self.word_list = []
  
    def formTrie(self, keys):
          
        # Forms a trie structure with the given set of strings
        # if it does not exists already else it merges the key
        # into it by extending the structure as required
        for key in keys:
            self.insert(key) # inserting one key to the trie.
  
    def insert(self, key):
          
        # Inserts a key into trie if it does not exist already.
        # And if the key is a prefix of the trie node, just 
        # marks it as leaf node.
        node = self.root
  
        for a in list(key):
            if not node.children.get(a):
                node.children[a] = TrieNode()
  
            node = node.children[a]
  
        node.last = True
  
    def search(self, key):
          
        # Searches the given key in trie for a full match
        # and returns True on success else returns False.
        node = self.root
        found = True
  
        for a in list(key):
            if not node.children.get(a):
                found = False
                break
  
            node = node.children[a]
  
        return node and node.last and found
  
    def suggestionsRec(self, node, word):
          
        # Method to recursively traverse the trie
        # and return a whole word. 
        if node.last:
            self.word_list.append(word)
  
        for a,n in node.children.items():
            self.suggestionsRec(n, word + a)
  
    def printAutoSuggestions(self, key):
          
        # Returns all the words in the trie whose common
        # prefix is the given key thus listing out all 
        # the suggestions for autocomplete.
        node = self.root
        not_found = False
        temp_word = ''
  
        for a in list(key):
            if not node.children.get(a):
                not_found = True
                break
  
            temp_word += a
            node = node.children[a]
  
        if not_found:
            return 0
        elif node.last and not node.children:
            return -1
  
        self.suggestionsRec(node, temp_word)
  
        for s in self.word_list:
            print(s)
        return 1
  
# Driver Code
keys = ["hello", "dog", "hell", "cat", "a", 
        "hel", "help", "helps", "helping"] # keys to form the trie structure.
key = "hel" # key for autocomplete suggestions.
status = ["Not found", "Found"]
  
# creating trie object
t = Trie()
  
# creating the trie structure with the 
# given set of strings.
t.formTrie(keys)
  
# autocompleting the given key using 
# our trie structure.
comp = t.printAutoSuggestions(key)
  
if comp == -1:
    print("No other strings found with this prefix\n")
elif comp == 0:
    print("No string found with this prefix\n")
  
# This code is contributed by amurdia


输出:

hel
hell
hello
help
helping
helps

我们该怎样改进这个?
匹配数目可能太大,因此在显示它们时我们必须要有选择。我们可以限制自己仅显示相关结果。根据相关性,我们可以考虑过去的搜索历史,并仅显示搜索次数最多的匹配字符串作为相关结果。
为每个节点存储另一个值,其中isleaf = True,其中包含该查询搜索的命中数。例如,如果搜索“帽子” 10次,那么我们将这10个存储在“帽子”的最后一个节点中。现在,当我们要显示建议时,我们将显示匹配数最高的前k个匹配项。尝试自己实现。