-
Notifications
You must be signed in to change notification settings - Fork 0
/
InvertedPageIndex.java
79 lines (72 loc) · 2.17 KB
/
InvertedPageIndex.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
public class InvertedPageIndex
{
private MyHashTable hashTable;
public InvertedPageIndex()
{
hashTable = new MyHashTable();
}
// Add a new page entry p to the inverted page index.
public void addPage(PageEntry p)
{
MyLinkedList<WordEntry>.Node tmp = p.getPageIndex().getWordEntries().head;
while(tmp != null)
{
hashTable.addPositionsForWord(tmp.obj);
tmp = tmp.next;
}
}
public WordEntry getEntryFromWord(String str)
{
return hashTable.searchWord(str);
}
// Return a set of page-entries of webpages which contain the word str.
public MySet<PageEntry> getPagesWhichContainWord(String str)
{
MySet<PageEntry> page_entries = new MySet<PageEntry>();
WordEntry entry = getEntryFromWord(str);
if(entry == null) return page_entries;
MyLinkedList<Position>.Node tmp = entry.getAllPositionsForThisWord().head;
while(tmp != null)
{
try { page_entries.addElement(tmp.obj.getPageEntry()); }
catch(RuntimeException e) {}
tmp = tmp.next;
}
return page_entries;
}
public MySet<PageEntry> getPagesWhichContainAllWords(String[] words)
{
MySet<PageEntry> and_pages = new MySet<PageEntry>();
if(words.length == 0) return and_pages;
and_pages = getPagesWhichContainWord(words[0]);
for(int i=1;i<words.length;i++)
{
and_pages = and_pages.intersection(getPagesWhichContainWord(words[i]));
if(and_pages.size() == 0) return and_pages;
}
return and_pages;
}
public MySet<PageEntry> getPagesWhichContainAnyOfTheseWords(String[] words)
{
MySet<PageEntry> or_pages = new MySet<PageEntry>();
if(words.length == 0) return or_pages;
or_pages = getPagesWhichContainWord(words[0]);
for(int i=1;i<words.length;i++)
or_pages = or_pages.union(getPagesWhichContainWord(words[i]));
return or_pages;
}
public MySet<PageEntry> getPagesWhichContainPhrase(String[] str)
{
MySet<PageEntry> webPages = getPagesWhichContainAllWords(str);
if(webPages.size() == 0) return webPages;
MyLinkedList<PageEntry>.Node tmp = webPages.getElements().head;
MySet<PageEntry> phrasePages = new MySet<PageEntry>();
while(tmp != null)
{
if(tmp.obj.containsPhrase(str) > 0)
phrasePages.addElement(tmp.obj);
tmp = tmp.next;
}
return phrasePages;
}
}