-
Notifications
You must be signed in to change notification settings - Fork 0
/
Reversed_Tree.py
71 lines (60 loc) · 2.35 KB
/
Reversed_Tree.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import chardet
import time
import string
import re
import os
class Node:
def __init__(self, char, list_of_files=None):
self.letter = char
self.files = list_of_files
self.children = {}
def __str__(self):
return self.children
class Reversed_Tree:
@staticmethod
def process_word(word):
return word.rstrip().strip(string.punctuation).lower()[::-1]
def __add_word_to_vocabulary(self, word, file_name):
if not word or not len(word):
return
if word[0] not in self.nodes.keys():
self.nodes[word[0]] = Node(word[0])
cur_node = self.nodes[word[0]]
for i in range(1, len(word)):
if word[i] not in cur_node.children.keys():
cur_node.children[word[i]] = Node(word[i])
cur_node = cur_node.children[word[i]]
if not cur_node.files:
cur_node.files = {file_name}
else:
cur_node.files |= {file_name}
def is_present(self, word):
if not word or not len(word):
return False
if word[0] not in self.nodes.keys():
self.nodes[word[0]] = Node(word[0])
cur_node = self.nodes[word[0]]
for i in range(1, len(word)):
if word[i] not in cur_node.children.keys():
return False
cur_node = cur_node.children[word[i]]
return cur_node.files
def __init__(self, file_folder):
list_of_files = [f for f in os.listdir(file_folder) if os.path.isfile(os.path.join(file_folder, f))]
self.nodes = {}
for file_name in list_of_files:
start = time.process_time()
encoding = chardet.detect(open(os.path.join(file_folder, file_name), "rb").read())['encoding']
with open(os.path.join(file_folder, file_name), "r", encoding=encoding) as file:
words = {Reversed_Tree.process_word(w) for w in re.split("\W+", file.read().rstrip())}
for w in words:
self.__add_word_to_vocabulary(w, file_name)
end = time.process_time()
print(file_name, "done in", end - start, "s")
if __name__ == "__main__":
rt = Reversed_Tree("text")
while True:
query = input("\n\nEnter TWD search request: ")
if query.lower() == "exit":
break
print(rt.is_present(Reversed_Tree.process_word(query)))