📜  如何使用三元搜索树实现文本自动完成功能

📅  最后修改于: 2021-09-16 11:15:07             🧑  作者: Mango

给定一组字符串S和一个字符串patt ,任务是使用三元搜索树将字符串patt自动补全S中以patt作为前缀的字符串。如果没有字符串匹配给定的前缀,则打印“None”
例子:

Trie 方法:请参阅本文以了解使用 Trie 数据结构的实现。
三元搜索树方法按照以下步骤解决问题:

  • 根据以下条件,将S中字符串的所有字符插入三叉搜索树中:
    1. 如果要插入的字符小于当前节点值,则遍历左子树。
    2. 如果要插入的字符大于当前节点值,则遍历右子树。
    3. 如果要插入的字符与当前节点值相同,如果不是词尾,则遍历相等子树。如果是,则将该节点标记为单词的结尾。
  • 遵循类似的方法来提取建议。
  • 按照上述类似的遍历技术遍历树以搜索给定的前缀patt
  • 如果没有找到给定的前缀,打印“None”。
  • 如果找到给定的前缀,则从前缀结束的节点开始遍历树。遍历左子树并生成建议,然后是来自每个节点的右子树和相等的子树。
  • 每次遇到设置了endofWord变量的节点时,就表示已获得建议。将该建议插入文字中
  • 生成所有可能的建议后返回单词

下面是上述方法的实现:

C++
// C++ Program to generate
// autocompleted texts from
// a given prefix using a
// Ternary Search Tree
#include 
using namespace std;
 
// Define the Node of the
// tree
struct Node {
 
    // Store the character
    // of a string
    char data;
    // Store the end of
    // word
    int end;
    // Left Subtree
    struct Node* left;
 
    // Equal Subtree
    struct Node* eq;
 
    // Right Subtree
    struct Node* right;
};
 
// Function to create a Node
Node* createNode(char newData)
{
    struct Node* newNode = new Node();
    newNode->data = newData;
    newNode->end = 0;
    newNode->left = NULL;
    newNode->eq = NULL;
    newNode->right = NULL;
    return newNode;
}
 
// Function to insert a word
// in the tree
void insert(Node** root,
            string word,
            int pos = 0)
{
 
    // Base case
    if (!(*root))
        *root = createNode(word[pos]);
 
    // If the current character is
    // less than root's data, then
    // it is inserted in the
    // left subtree
 
    if ((*root)->data > word[pos])
        insert(&((*root)->left), word,
               pos);
 
    // If current character is
    // more than root's data, then
    // it is inserted in the right
    // subtree
 
    else if ((*root)->data < word[pos])
        insert(&((*root)->right), word,
               pos);
 
    // If current character is same
    // as that of the root's data
 
    else {
        // If it is the end of word
 
        if (pos + 1 == word.size())
            // Mark it as the
            // end of word
            (*root)->end = 1;
 
        // If it is not the end of
        // the string, then the
        // current character is
        // inserted in the equal subtree
 
        else
            insert(&((*root)->eq), word, pos + 1);
    }
}
 
// Function to traverse the ternary search tree
void traverse(Node* root,
              vector& ret,
              char* buff,
              int depth = 0)
{
    // Base case
    if (!root)
        return;
    // The left subtree is
    // traversed first
    traverse(root->left, ret,
             buff, depth);
 
    // Store the current character
    buff[depth] = root->data;
 
    // If the end of the string
    // is detected, store it in
    // the final ans
    if (root->end) {
        buff[depth + 1] = '\0';
        ret.push_back(string(buff));
    }
 
    // Traverse the equal subtree
    traverse(root->eq, ret,
             buff, depth + 1);
 
    // Traverse the right subtree
    traverse(root->right, ret,
             buff, depth);
}
 
// Utility function to find
// all the words
vector util(Node* root,
                    string pattern)
{
    // Stores the words
    // to suggest
    char buffer[1001];
 
    vector ret;
 
    traverse(root, ret, buffer);
 
    if (root->end == 1)
        ret.push_back(pattern);
    return ret;
}
 
// Function to autocomplete
// based on the given prefix
// and return the suggestions
vector autocomplete(Node* root,
                            string pattern)
{
    vector words;
    int pos = 0;
 
    // If pattern is empty
    // return an empty list
    if (pattern.empty())
        return words;
 
    // Iterating over the characters
    // of the pattern and find it's
    // corresponding node in the tree
 
    while (root && pos < pattern.length()) {
 
        // If current character is smaller
        if (root->data > pattern[pos])
            // Search the left subtree
            root = root->left;
 
        // current character is greater
        else if (root->data < pattern[pos])
            // Search right subtree
            root = root->right;
 
        // If current character is equal
        else if (root->data == pattern[pos]) {
           
            // Search equal subtree
              // since character is found, move to the next character in the pattern
            root = root->eq;
            pos++;
        }
 
        // If not found
        else
            return words;
 
    }
 
    // Search for all the words
    // from the current node
    words = util(root, pattern);
 
    return words;
}
 
// Function to print
// suggested words
 
void print(vector sugg,
           string pat)
{
    for (int i = 0; i < sugg.size();
         i++)
        cout << pat << sugg[i].c_str()
             << "\n";
}
 
// Driver Code
int main()
{
    vector S
        = { "wallstreet", "geeksforgeeks",
            "wallmart", "walmart",
            "waldormort", "word" };
 
    Node* tree = NULL;
 
    // Insert the words in the
    // Ternary Search Tree
    for (string str : S)
        insert(&tree, str);
 
    string pat = "wall";
 
    vector sugg
        = autocomplete(tree, pat);
 
    if (sugg.size() == 0)
        cout << "None";
 
    else
        print(sugg, pat);
 
    return 0;
}


输出
wallmart
wallstreet

时间复杂度: O(L* log N) 其中L是最长单词的长度。
空间与要存储的字符串的长度成正比。
辅助空间: O(N)