I'm a fan of tries. I can't claim that I've used them in practice, but I wish I had. They're super!
Common uses:
- substring search
- spell check
- autocomplete
This is an implementation I just put together. Enjoy!
Trie API:
- add(word) - Add the given word to the trie.
- contains(word) - Return True if given word exists in the trie.
- suggest(prefix) - Return word suggestions for the given prefix, including prefix if it is a word.
- suffix_count(prefix) - Return the number of words available for the given prefix.
class TrieNode:
"""
A TrieNode represents a letter and its continuing suffixes along
a word path. This implementation keeps track of counts of suffixes.
"""
__slots__ = ['letter', 'ends_word', 'suffixes', 'counts']
def __init__(self, letter):
self.letter = letter
self.ends_word = False
self.suffixes = {}
self.counts = [0] * 26
def get_letter(self):
return self.letter
def suffix_exists(self, letter):
return letter in self.suffixes
def get_suffix(self, letter):
return self.suffixes[letter]
def add_suffix(self, letter):
self.suffixes[letter] = TrieNode(letter)
def get_suffixes(self):
for key in self.suffixes:
yield key
def increment(self, letter):
index = self.letter_to_index(letter)
self.counts[index] += 1
def get_count(self, letter):
index = self.letter_to_index(letter)
return self.counts[index]
def end_word(self):
self.ends_word = True
def ends(self):
return self.ends_word
def letter_to_index(self, letter):
return ord(letter) - ord('a')
class Trie:
"""
A Trie stores letters/symbols for a set of words for fast
word, prefix, and suffix search.
"""
def __init__(self):
self.root = TrieNode('')
def add(self, word):
"""
Add the given word to the trie.
:param word:
:return: None
"""
current = self.root
for c in word.lower():
if not current.suffix_exists(c):
current.add_suffix(c)
current.increment(c)
current = current.get_suffix(c)
current.end_word()
def contains(self, word):
"""
Return True if given word exists in the trie.
:param word:
:return: bool
"""
end_node = self._traverse(word.lower(), True)
return end_node is not None
def suggest(self, prefix):
"""
Return word suggestions for the given prefix, including prefix
if it is a word.
:param prefix:
:return: list
"""
prefix = prefix.lower()
prefix_node = self._traverse(prefix, False)
suggestions = []
word = list(prefix[:-1])
# now dfs (iterated), finding all ending words
stack = []
stack.append(prefix_node)
while stack:
node = stack.pop()
if node is None:
word.pop()
continue
word.append(node.get_letter())
if node.ends():
suggestions.append(''.join(word))
for c in node.get_suffixes():
stack.append(None)
stack.append(node.get_suffix(c))
return suggestions
def word_count(self, prefix):
"""
Return the number of words available for the given prefix.
:param prefix:
:return: int
"""
prefix = prefix.lower()
prefix_node = self._traverse(prefix[:-1], False)
return prefix_node.get_count(prefix[-1])
def _traverse(self, prefix, ends):
current = self.root
for c in prefix:
if current.suffix_exists(c):
current = current.get_suffix(c)
else:
return None
if ends and not current.ends():
return None
return current
Is that code too long?
Here's a much shorter version, optimized for only adding words (in all lowercase) and telling you how many words it contains with a given prefix. This is much faster.
class TrieNode:
__slots__ = ['letters', 'word_count', 'ends_word']
def __init__(self):
self.letters = {}
self.word_count = 0
self.ends_word = False
class Contacts:
def __init__(self):
self.root = TrieNode()
def add(self, word):
current = self.root
for ch in word:
current.word_count += 1
if ch not in current.letters:
current.letters[ch] = TrieNode()
current = current.letters[ch]
current.word_count += 1
current.ends_word = True
def partial(self, prefix):
"""
Returns the number of words containing this prefix.
:param prefix:
:return:
"""
current = self.root
for ch in prefix:
if ch in current.letters:
current = current.letters[ch]
else:
# prefix was not found
return 0
return current.word_count