📜  压缩尝试

📅  最后修改于: 2021-04-17 12:06:54             🧑  作者: Mango

特里树是一种数据结构,它像树数据结构一样存储字符串。节点中最大的子代数等于字母的大小。一个人可以轻松地按字母顺序打印字母,这是哈希无法实现的。

特里的属性

  • 这是一棵多路树。
  • 每个节点有1N个子节点。
  • 每个叶节点都对应于存储的字符串,该字符串是从根到其侧面的路径上的字符链。

特里的类型

  • 标准特里
  • 后缀特里
  • 压缩特里

压缩特里

尝试使用度数至少为2的节点。它是通过压缩标准Trie的节点来完成的。它也被称为Radix Tries 。用于实现空间优化

由于节点被压缩。让我们直观地比较“标准”树和“压缩”树的结构,以获得更好的方法。在内存方面,压缩的特里树使用的节点数量很少,这为带有长公共前缀的字符串提供了巨大的内存优势(尤其是对于长字符串)。就速度而言,常规的trie树会稍快一些,因为它的操作不涉及任何字符串操作,它们是简单的循环。

在下图中,左侧的树是标准Trie,右侧的树是压缩的Trie。

执行:

一个标准的trie节点如下所示:

Java
class node {
    node[] children = new node[26];
    boolean isWordEnd;
}


Java
class node {
    node[] children = new node[26];
    StringBuilder[] edgeLabel = new StringBuilder[26];
    boolean isEnd;
}


Java
class CompressedNode {
    int bitNumber;
    int data;
    CompressedNode leftChild, rightChild;
}


Java
class CompressedNode {
  
    // Root Node
    private CompressedNode root;
  
    private static final int MaxBits = 10;
  
    // Constructor
    public CompressedNode() { root = null; }
  
    // Function to check if empty
    public boolean isEmpty() { return root == null; }
  
    // Function to clear
    public void makeEmpty() { root = null; }
}


Java
// Function to search a key k
// in the trie
public boolean search(int k)
{
    // Find the number of bits
    int numOfBits = (int)(Math.log(k) / Math.log(2)) + 1;
  
    // If error occurs
    if (numOfBits > MaxBits) {
        System.out.println("Error : Number too large");
        return false;
    }
  
    // Search Node
    CompressedNode searchNode = search(root, k);
  
    // If the data matches
    if (searchNode.data == k)
        return true;
  
    // Else return false
    else
        return false;
}


Java
// Function to implement the insert
// functionality in the trie
private CompressedNode insert(
    CompressedNode t, int ele)
{
    CompressedNode current, parent;
    CompressedNode lastNode, newNode;
    int i;
  
    // If Node is NULL
    if (t == null) {
        t = new CompressedNode();
        t.bitNumber = 0;
        t.data = ele;
        t.leftChild = t;
        t.rightChild = null;
        return t;
    }
  
    // Search the key ele
    lastNode = search(t, ele);
  
    // If already present key
    if (ele == lastNode.data) {
        System.out.println(
            "Error : key is already present\n");
        return t;
    }
  
    for (i = 1; bit(ele, i) == bit(lastNode.data, i); i++)
        ;
  
    current = t.leftChild;
    parent = t;
    while (current.bitNumber > parent.bitNumber
           && current.bitNumber < i) {
        parent = current;
        current = (bit(ele, current.bitNumber))
                      ? current.rightChild
                      : current.leftChild;
    }
  
    newNode = new CompressedNode();
    newNode.bitNumber = i;
    newNode.data = ele;
    newNode.leftChild = bit(ele, i) ? current : newNode;
    newNode.rightChild = bit(ele, i) ? newNode : current;
  
    if (current == parent.leftChild)
        parent.leftChild = newNode;
    else
        parent.rightChild = newNode;
  
    return t;
}


Java
// Java program to implement the
// Compressed Trie
  
class Trie {
  
    // Root Node
    private final Node root = new Node(false);
  
    // 'a' for lower, 'A' for upper
    private final char CASE;
  
    // Default case
    public Trie() { CASE = 'a'; }
  
    // Constructor accepting the
    // starting symbol
    public Trie(char CASE)
    {
        this.CASE = CASE;
    }
  
    // Function to insert a word in
    // the compressed trie
    public void insert(String word)
    {
        // Store the root
        Node trav = root;
        int i = 0;
  
        // Iterate i less than word
        // length
        while (i < word.length()
               && trav.edgeLabel[word.charAt(i) - CASE]
                      != null) {
  
            // Find the index
            int index = word.charAt(i) - CASE, j = 0;
            StringBuilder label = trav.edgeLabel[index];
  
            // Iterate till j is less
            // than label length
            while (j < label.length() && i < word.length()
                   && label.charAt(j) == word.charAt(i)) {
                ++i;
                ++j;
            }
  
            // If is the same as the
            // label length
            if (j == label.length()) {
                trav = trav.children[index];
            }
            else {
  
                // Inserting a prefix of
                // the existing word
                if (i == word.length()) {
                    Node existingChild
                        = trav.children[index];
                    Node newChild = new Node(true);
                    StringBuilder remainingLabel
                        = strCopy(label, j);
  
                    // Making "faceboook"
                    // as "face"
                    label.setLength(j);
  
                    // New node for "face"
                    trav.children[index] = newChild;
                    newChild
                        .children[remainingLabel.charAt(0)
                                  - CASE]
                        = existingChild;
                    newChild
                        .edgeLabel[remainingLabel.charAt(0)
                                   - CASE]
                        = remainingLabel;
                }
                else {
  
                    // Inserting word which has
                    // a partial match with
                    // existing word
                    StringBuilder remainingLabel
                        = strCopy(label, j);
  
                    Node newChild = new Node(false);
                    StringBuilder remainingWord
                        = strCopy(word, i);
  
                    // Store the trav in
                    // temp node
                    Node temp = trav.children[index];
  
                    label.setLength(j);
                    trav.children[index] = newChild;
                    newChild
                        .edgeLabel[remainingLabel.charAt(0)
                                   - CASE]
                        = remainingLabel;
                    newChild
                        .children[remainingLabel.charAt(0)
                                  - CASE]
                        = temp;
                    newChild
                        .edgeLabel[remainingWord.charAt(0)
                                   - CASE]
                        = remainingWord;
                    newChild
                        .children[remainingWord.charAt(0)
                                  - CASE]
                        = new Node(true);
                }
  
                return;
            }
        }
  
        // Insert new node for new word
        if (i < word.length()) {
            trav.edgeLabel[word.charAt(i) - CASE]
                = strCopy(word, i);
            trav.children[word.charAt(i) - CASE]
                = new Node(true);
        }
        else {
  
            // Insert "there" when "therein"
            // and "thereafter" are existing
            trav.isEnd = true;
        }
    }
  
    // Function that creates new String
    // from an existing string starting
    // from the given index
    private StringBuilder strCopy(
        CharSequence str, int index)
    {
        StringBuilder result
            = new StringBuilder(100);
  
        while (index != str.length()) {
            result.append(str.charAt(index++));
        }
  
        return result;
    }
  
    // Function to print the Trie
    public void print()
    {
        printUtil(root, new StringBuilder());
    }
  
    // Fuction to print the word
    // starting from the given node
    private void printUtil(
        Node node, StringBuilder str)
    {
        if (node.isEnd) {
            System.out.println(str);
        }
  
        for (int i = 0;
             i < node.edgeLabel.length; ++i) {
  
            // If edgeLabel is not
            // NULL
            if (node.edgeLabel[i] != null) {
                int length = str.length();
  
                str = str.append(node.edgeLabel[i]);
                printUtil(node.children[i], str);
                str = str.delete(length, str.length());
            }
        }
    }
  
    // Function to search a word
    public boolean search(String word)
    {
        int i = 0;
  
        // Stores the root
        Node trav = root;
  
        while (i < word.length()
               && trav.edgeLabel[word.charAt(i) - CASE]
                      != null) {
            int index = word.charAt(i) - CASE;
            StringBuilder label = trav.edgeLabel[index];
            int j = 0;
  
            while (i < word.length()
                   && j < label.length()) {
  
                // Character mismatch
                if (word.charAt(i) != label.charAt(j)) {
                    return false;
                }
  
                ++i;
                ++j;
            }
  
            if (j == label.length() && i <= word.length()) {
  
                // Traverse further
                trav = trav.children[index];
            }
            else {
  
                // Edge label is larger
                // than target word
                // searching for "face"
                // when tree has "facebook"
                return false;
            }
        }
  
        // Target word fully traversed
        // and current node is word
        return i == word.length() && trav.isEnd;
    }
  
    // Function to search the prefix
    public boolean startsWith(String prefix)
    {
        int i = 0;
  
        // Stores the root
        Node trav = root;
  
        while (i < prefix.length()
               && trav.edgeLabel[prefix.charAt(i) - CASE]
                      != null) {
            int index = prefix.charAt(i) - CASE;
            StringBuilder label = trav.edgeLabel[index];
            int j = 0;
  
            while (i < prefix.length()
                   && j < label.length()) {
  
                // Character mismatch
                if (prefix.charAt(i) != label.charAt(j)) {
                    return false;
                }
  
                ++i;
                ++j;
            }
  
            if (j == label.length()
                && i <= prefix.length()) {
  
                // Traverse further
                trav = trav.children[index];
            }
            else {
  
                // Edge label is larger
                // than target word,
                // which is fine
                return true;
            }
        }
  
        return i == prefix.length();
    }
}
  
// Node class
class Node {
  
    // Number of symbols
    private final static int SYMBOLS = 26;
    Node[] children = new Node[SYMBOLS];
    StringBuilder[] edgeLabel = new StringBuilder[SYMBOLS];
    boolean isEnd;
  
    // Function to check if the end
    // of the string is reached
    public Node(boolean isEnd)
    {
        this.isEnd = isEnd;
    }
}
  
class GFG {
  
    // Driver Code
    public static void main(String[] args)
    {
        Trie trie = new Trie();
  
        // Insert words
        trie.insert("facebook");
        trie.insert("face");
        trie.insert("this");
        trie.insert("there");
        trie.insert("then");
  
        // Print inserted words
        trie.print();
  
        // Check if these words
        // are present or not
        System.out.println(
            trie.search("there"));
        System.out.println(
            trie.search("therein"));
        System.out.println(
            trie.startsWith("th"));
        System.out.println(
            trie.startsWith("fab"));
    }
}


但是对于压缩的特里树,树的重新设计将如下所示,在一般的特里树中,边“ a”由引用数组中的此特定元素表示,但是在压缩的特里中,“边’face’是在参考数组中由该特定元素表示”。代码是:

Java

class node {
    node[] children = new node[26];
    StringBuilder[] edgeLabel = new StringBuilder[26];
    boolean isEnd;
}

压缩特里节点:

Java

class CompressedNode {
    int bitNumber;
    int data;
    CompressedNode leftChild, rightChild;
}

类压缩的特里:

Java

class CompressedNode {
  
    // Root Node
    private CompressedNode root;
  
    private static final int MaxBits = 10;
  
    // Constructor
    public CompressedNode() { root = null; }
  
    // Function to check if empty
    public boolean isEmpty() { return root == null; }
  
    // Function to clear
    public void makeEmpty() { root = null; }
}

在压缩特里中搜索:

在压缩的Trie树中搜索非常类似于搜索。在这里,我们不比较单个字符,而是比较字符串。

Java

// Function to search a key k
// in the trie
public boolean search(int k)
{
    // Find the number of bits
    int numOfBits = (int)(Math.log(k) / Math.log(2)) + 1;
  
    // If error occurs
    if (numOfBits > MaxBits) {
        System.out.println("Error : Number too large");
        return false;
    }
  
    // Search Node
    CompressedNode searchNode = search(root, k);
  
    // If the data matches
    if (searchNode.data == k)
        return true;
  
    // Else return false
    else
        return false;
}

在Compressed Trie中插入元素:

Java

// Function to implement the insert
// functionality in the trie
private CompressedNode insert(
    CompressedNode t, int ele)
{
    CompressedNode current, parent;
    CompressedNode lastNode, newNode;
    int i;
  
    // If Node is NULL
    if (t == null) {
        t = new CompressedNode();
        t.bitNumber = 0;
        t.data = ele;
        t.leftChild = t;
        t.rightChild = null;
        return t;
    }
  
    // Search the key ele
    lastNode = search(t, ele);
  
    // If already present key
    if (ele == lastNode.data) {
        System.out.println(
            "Error : key is already present\n");
        return t;
    }
  
    for (i = 1; bit(ele, i) == bit(lastNode.data, i); i++)
        ;
  
    current = t.leftChild;
    parent = t;
    while (current.bitNumber > parent.bitNumber
           && current.bitNumber < i) {
        parent = current;
        current = (bit(ele, current.bitNumber))
                      ? current.rightChild
                      : current.leftChild;
    }
  
    newNode = new CompressedNode();
    newNode.bitNumber = i;
    newNode.data = ele;
    newNode.leftChild = bit(ele, i) ? current : newNode;
    newNode.rightChild = bit(ele, i) ? newNode : current;
  
    if (current == parent.leftChild)
        parent.leftChild = newNode;
    else
        parent.rightChild = newNode;
  
    return t;
}

下面是实现压缩Trie所有功能的程序:

Java

// Java program to implement the
// Compressed Trie
  
class Trie {
  
    // Root Node
    private final Node root = new Node(false);
  
    // 'a' for lower, 'A' for upper
    private final char CASE;
  
    // Default case
    public Trie() { CASE = 'a'; }
  
    // Constructor accepting the
    // starting symbol
    public Trie(char CASE)
    {
        this.CASE = CASE;
    }
  
    // Function to insert a word in
    // the compressed trie
    public void insert(String word)
    {
        // Store the root
        Node trav = root;
        int i = 0;
  
        // Iterate i less than word
        // length
        while (i < word.length()
               && trav.edgeLabel[word.charAt(i) - CASE]
                      != null) {
  
            // Find the index
            int index = word.charAt(i) - CASE, j = 0;
            StringBuilder label = trav.edgeLabel[index];
  
            // Iterate till j is less
            // than label length
            while (j < label.length() && i < word.length()
                   && label.charAt(j) == word.charAt(i)) {
                ++i;
                ++j;
            }
  
            // If is the same as the
            // label length
            if (j == label.length()) {
                trav = trav.children[index];
            }
            else {
  
                // Inserting a prefix of
                // the existing word
                if (i == word.length()) {
                    Node existingChild
                        = trav.children[index];
                    Node newChild = new Node(true);
                    StringBuilder remainingLabel
                        = strCopy(label, j);
  
                    // Making "faceboook"
                    // as "face"
                    label.setLength(j);
  
                    // New node for "face"
                    trav.children[index] = newChild;
                    newChild
                        .children[remainingLabel.charAt(0)
                                  - CASE]
                        = existingChild;
                    newChild
                        .edgeLabel[remainingLabel.charAt(0)
                                   - CASE]
                        = remainingLabel;
                }
                else {
  
                    // Inserting word which has
                    // a partial match with
                    // existing word
                    StringBuilder remainingLabel
                        = strCopy(label, j);
  
                    Node newChild = new Node(false);
                    StringBuilder remainingWord
                        = strCopy(word, i);
  
                    // Store the trav in
                    // temp node
                    Node temp = trav.children[index];
  
                    label.setLength(j);
                    trav.children[index] = newChild;
                    newChild
                        .edgeLabel[remainingLabel.charAt(0)
                                   - CASE]
                        = remainingLabel;
                    newChild
                        .children[remainingLabel.charAt(0)
                                  - CASE]
                        = temp;
                    newChild
                        .edgeLabel[remainingWord.charAt(0)
                                   - CASE]
                        = remainingWord;
                    newChild
                        .children[remainingWord.charAt(0)
                                  - CASE]
                        = new Node(true);
                }
  
                return;
            }
        }
  
        // Insert new node for new word
        if (i < word.length()) {
            trav.edgeLabel[word.charAt(i) - CASE]
                = strCopy(word, i);
            trav.children[word.charAt(i) - CASE]
                = new Node(true);
        }
        else {
  
            // Insert "there" when "therein"
            // and "thereafter" are existing
            trav.isEnd = true;
        }
    }
  
    // Function that creates new String
    // from an existing string starting
    // from the given index
    private StringBuilder strCopy(
        CharSequence str, int index)
    {
        StringBuilder result
            = new StringBuilder(100);
  
        while (index != str.length()) {
            result.append(str.charAt(index++));
        }
  
        return result;
    }
  
    // Function to print the Trie
    public void print()
    {
        printUtil(root, new StringBuilder());
    }
  
    // Fuction to print the word
    // starting from the given node
    private void printUtil(
        Node node, StringBuilder str)
    {
        if (node.isEnd) {
            System.out.println(str);
        }
  
        for (int i = 0;
             i < node.edgeLabel.length; ++i) {
  
            // If edgeLabel is not
            // NULL
            if (node.edgeLabel[i] != null) {
                int length = str.length();
  
                str = str.append(node.edgeLabel[i]);
                printUtil(node.children[i], str);
                str = str.delete(length, str.length());
            }
        }
    }
  
    // Function to search a word
    public boolean search(String word)
    {
        int i = 0;
  
        // Stores the root
        Node trav = root;
  
        while (i < word.length()
               && trav.edgeLabel[word.charAt(i) - CASE]
                      != null) {
            int index = word.charAt(i) - CASE;
            StringBuilder label = trav.edgeLabel[index];
            int j = 0;
  
            while (i < word.length()
                   && j < label.length()) {
  
                // Character mismatch
                if (word.charAt(i) != label.charAt(j)) {
                    return false;
                }
  
                ++i;
                ++j;
            }
  
            if (j == label.length() && i <= word.length()) {
  
                // Traverse further
                trav = trav.children[index];
            }
            else {
  
                // Edge label is larger
                // than target word
                // searching for "face"
                // when tree has "facebook"
                return false;
            }
        }
  
        // Target word fully traversed
        // and current node is word
        return i == word.length() && trav.isEnd;
    }
  
    // Function to search the prefix
    public boolean startsWith(String prefix)
    {
        int i = 0;
  
        // Stores the root
        Node trav = root;
  
        while (i < prefix.length()
               && trav.edgeLabel[prefix.charAt(i) - CASE]
                      != null) {
            int index = prefix.charAt(i) - CASE;
            StringBuilder label = trav.edgeLabel[index];
            int j = 0;
  
            while (i < prefix.length()
                   && j < label.length()) {
  
                // Character mismatch
                if (prefix.charAt(i) != label.charAt(j)) {
                    return false;
                }
  
                ++i;
                ++j;
            }
  
            if (j == label.length()
                && i <= prefix.length()) {
  
                // Traverse further
                trav = trav.children[index];
            }
            else {
  
                // Edge label is larger
                // than target word,
                // which is fine
                return true;
            }
        }
  
        return i == prefix.length();
    }
}
  
// Node class
class Node {
  
    // Number of symbols
    private final static int SYMBOLS = 26;
    Node[] children = new Node[SYMBOLS];
    StringBuilder[] edgeLabel = new StringBuilder[SYMBOLS];
    boolean isEnd;
  
    // Function to check if the end
    // of the string is reached
    public Node(boolean isEnd)
    {
        this.isEnd = isEnd;
    }
}
  
class GFG {
  
    // Driver Code
    public static void main(String[] args)
    {
        Trie trie = new Trie();
  
        // Insert words
        trie.insert("facebook");
        trie.insert("face");
        trie.insert("this");
        trie.insert("there");
        trie.insert("then");
  
        // Print inserted words
        trie.print();
  
        // Check if these words
        // are present or not
        System.out.println(
            trie.search("there"));
        System.out.println(
            trie.search("therein"));
        System.out.println(
            trie.startsWith("th"));
        System.out.println(
            trie.startsWith("fab"));
    }
}
输出:
face
facebook
then
there
this
true
false
true
false