From 2a1585f97dbbfe109f70d3b3b55098e4b5d793c9 Mon Sep 17 00:00:00 2001
From: Adrien Grand <jpountz@gmail.com>
Date: Thu, 14 Mar 2019 08:46:52 +0100
Subject: [PATCH 1/6] Upgrade to Lucene 8.

---
 pom.xml                                       |   2 +-
 .../analysis/EnglishStemmingAnalyzer.java     |  11 +-
 .../anserini/analysis/FreebaseAnalyzer.java   |  28 ++---
 .../io/anserini/index/IndexCollection.java    |   2 +-
 .../java/io/anserini/index/IndexUtils.java    |  13 +-
 .../java/io/anserini/kg/IndexFreebase.java    |   2 +-
 .../io/anserini/kg/LookupFreebaseNodes.java   |   4 +-
 .../io/anserini/ltr/BaseFeatureExtractor.java |  11 +-
 .../feature/base/BM25FeatureExtractor.java    |   4 +-
 .../ltr/feature/base/PMIFeatureExtractor.java |   6 +-
 .../io/anserini/rerank/lib/AxiomReranker.java |   6 +-
 .../anserini/rerank/lib/RankLibReranker.java  |   4 +-
 .../io/anserini/rerank/lib/Rm3Reranker.java   |   4 +-
 .../java/io/anserini/search/SearchArgs.java   |   8 +-
 .../io/anserini/search/SearchCollection.java  |  19 ++-
 .../io/anserini/search/SimpleSearcher.java    |  10 +-
 .../similarity/AxiomaticSimilarity.java       | 116 +++++-------------
 .../search/similarity/RankLibSimilarity.java  |   9 +-
 .../io/anserini/util/ExtractTopDfTerms.java   |   4 +-
 .../io/anserini/integration/IndexerTest.java  |   2 +-
 .../integration/MultiThreadingSearchTest.java |   3 +-
 .../integration/TrecEndToEndTest.java         |   3 +-
 .../integration/TweetEndToEndTest.java        |   3 +-
 23 files changed, 103 insertions(+), 171 deletions(-)
diff --git a/pom.xml b/pom.xml
index 37c54f0fa2..a2996778e6 100644
--- a/pom.xml
+++ b/pom.xml
@@ -33,7 +33,7 @@
   </developers>
 
   <properties>
-    <lucene.version>7.6.0</lucene.version>
+    <lucene.version>8.0.0</lucene.version>
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
   </properties>
 
diff --git a/src/main/java/io/anserini/analysis/EnglishStemmingAnalyzer.java b/src/main/java/io/anserini/analysis/EnglishStemmingAnalyzer.java
index 1ed4055d96..5e8be821c6 100644
--- a/src/main/java/io/anserini/analysis/EnglishStemmingAnalyzer.java
+++ b/src/main/java/io/anserini/analysis/EnglishStemmingAnalyzer.java
@@ -17,12 +17,11 @@
 package io.anserini.analysis;
 
 import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.en.EnglishAnalyzer;
 import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
 import org.apache.lucene.analysis.en.KStemFilter;
 import org.apache.lucene.analysis.en.PorterStemFilter;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 
 public class EnglishStemmingAnalyzer extends StopwordAnalyzerBase {
@@ -30,11 +29,11 @@ public class EnglishStemmingAnalyzer extends StopwordAnalyzerBase {
   private final CharArraySet stemExclusionSet;
   
   public EnglishStemmingAnalyzer() {
-    this("", StandardAnalyzer.STOP_WORDS_SET);
+    this("", EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
   }
   
   public EnglishStemmingAnalyzer(String stemmer) {
-    this(stemmer, StandardAnalyzer.STOP_WORDS_SET, CharArraySet.EMPTY_SET);
+    this(stemmer, EnglishAnalyzer.ENGLISH_STOP_WORDS_SET, CharArraySet.EMPTY_SET);
   }
   
   public EnglishStemmingAnalyzer(CharArraySet stopwords) {
@@ -54,7 +53,7 @@ public EnglishStemmingAnalyzer(String stemmer, CharArraySet stopwords, CharArray
   protected TokenStreamComponents createComponents(String fieldName) {
     Tokenizer source = new StandardTokenizer();
     TokenStream result = null;
-    result = new StandardFilter(source);
+    result = source;
     result = new EnglishPossessiveFilter(result);
     result = new LowerCaseFilter(result);
     result = new StopFilter(result, this.stopwords);
@@ -72,7 +71,7 @@ protected TokenStreamComponents createComponents(String fieldName) {
   }
   
   protected TokenStream normalize(String fieldName, TokenStream in) {
-    TokenStream result = new StandardFilter(in);
+    TokenStream result = in;
     result = new LowerCaseFilter(result);
     return result;
   }
diff --git a/src/main/java/io/anserini/analysis/FreebaseAnalyzer.java b/src/main/java/io/anserini/analysis/FreebaseAnalyzer.java
index f53237aec2..31eae0065d 100644
--- a/src/main/java/io/anserini/analysis/FreebaseAnalyzer.java
+++ b/src/main/java/io/anserini/analysis/FreebaseAnalyzer.java
@@ -21,13 +21,11 @@
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Analyzer.TokenStreamComponents;
+import org.apache.lucene.analysis.en.EnglishAnalyzer;
 import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
 import org.apache.lucene.analysis.en.PorterStemFilter;
 import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 
 /* ASCIIFoldingFilter is used for accent folding. This will normalize the characters
@@ -54,23 +52,23 @@ public FreebaseAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
 
   protected TokenStreamComponents createComponents(String fieldName) {
     StandardTokenizer source = new StandardTokenizer();
-    StandardFilter result = new StandardFilter(source);
-    EnglishPossessiveFilter result2 = new EnglishPossessiveFilter(result);
-    LowerCaseFilter result3 = new LowerCaseFilter(result2);
-    Object result4 = new StopFilter(result3, this.stopwords);
-    result4 = new ASCIIFoldingFilter((TokenStream) result4);
+    TokenStream result = source;
+    result = new EnglishPossessiveFilter(result);
+    result = new LowerCaseFilter(result);
+    result = new StopFilter(result, this.stopwords);
+    result = new ASCIIFoldingFilter(result);
     if(!this.stemExclusionSet.isEmpty()) {
-      result4 = new SetKeywordMarkerFilter((TokenStream)result4, this.stemExclusionSet);
+      result = new SetKeywordMarkerFilter(result, this.stemExclusionSet);
     }
 
-    PorterStemFilter result1 = new PorterStemFilter((TokenStream)result4);
-    return new TokenStreamComponents(source, result1);
+    result = new PorterStemFilter(result);
+    return new TokenStreamComponents(source, result);
   }
 
   protected TokenStream normalize(String fieldName, TokenStream in) {
-    StandardFilter result = new StandardFilter(in);
-    LowerCaseFilter result1 = new LowerCaseFilter(result);
-    return result1;
+    TokenStream result = in;
+    result = new LowerCaseFilter(result);
+    return result;
   }
 
   private static class DefaultSetHolder {
@@ -80,7 +78,7 @@ private DefaultSetHolder() {
     }
 
     static {
-      DEFAULT_STOP_SET = StandardAnalyzer.STOP_WORDS_SET;
+      DEFAULT_STOP_SET = EnglishAnalyzer.ENGLISH_STOP_WORDS_SET;
     }
   }
 }
\ No newline at end of file
diff --git a/src/main/java/io/anserini/index/IndexCollection.java b/src/main/java/io/anserini/index/IndexCollection.java
index 24aecd6d44..6b5d221f50 100644
--- a/src/main/java/io/anserini/index/IndexCollection.java
+++ b/src/main/java/io/anserini/index/IndexCollection.java
@@ -594,7 +594,7 @@ public void run() throws IOException {
     if (args.solr) {
       numIndexed = counters.indexed.get();
     } else {
-      numIndexed = args.dryRun ? counters.indexed.get() : writer.maxDoc();
+      numIndexed = args.dryRun ? counters.indexed.get() : writer.getDocStats().maxDoc;
     }
 
     // Do a final commit
diff --git a/src/main/java/io/anserini/index/IndexUtils.java b/src/main/java/io/anserini/index/IndexUtils.java
index 1544510059..2bd7036073 100755
--- a/src/main/java/io/anserini/index/IndexUtils.java
+++ b/src/main/java/io/anserini/index/IndexUtils.java
@@ -45,7 +45,6 @@
 import java.io.*;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
-import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.nio.file.StandardOpenOption;
 import java.util.ArrayList;
@@ -147,8 +146,7 @@ public InputStream getReadFileStream(String path) throws IOException {
   }
 
   void printIndexStats() throws IOException {
-    Fields fields = MultiFields.getFields(reader);
-    Terms terms = fields.terms(LuceneDocumentGenerator.FIELD_BODY);
+    Terms terms = MultiTerms.getTerms(reader, LuceneDocumentGenerator.FIELD_BODY);
 
     System.out.println("Index statistics");
     System.out.println("----------------");
@@ -159,10 +157,9 @@ void printIndexStats() throws IOException {
 
     System.out.println("stored fields:");
 
-    FieldInfos fieldInfos = MultiFields.getMergedFieldInfos(reader);
-    for (String fd : fields) {
-      FieldInfo fi = fieldInfos.fieldInfo(fd);
-      System.out.println("  " + fd + " (" + "indexOption: " + fi.getIndexOptions() +
+    FieldInfos fieldInfos = FieldInfos.getMergedFieldInfos(reader);
+    for (FieldInfo fi : fieldInfos) {
+      System.out.println("  " + fi.name + " (" + "indexOption: " + fi.getIndexOptions() +
           ", hasVectors: " + fi.hasVectors() + ")");
     }
   }
@@ -178,7 +175,7 @@ public void printTermCounts(String termStr) throws IOException, ParseException {
     System.out.println("collection frequency: " + reader.totalTermFreq(t));
     System.out.println("document frequency:   " + reader.docFreq(t));
 
-    PostingsEnum postingsEnum = MultiFields.getTermDocsEnum(reader, LuceneDocumentGenerator.FIELD_BODY, t.bytes());
+    PostingsEnum postingsEnum = MultiTerms.getTermPostingsEnum(reader, LuceneDocumentGenerator.FIELD_BODY, t.bytes());
     System.out.println("postings:\n");
     while (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
       System.out.printf("\t%s, %s\n", postingsEnum.docID(), postingsEnum.freq());
diff --git a/src/main/java/io/anserini/kg/IndexFreebase.java b/src/main/java/io/anserini/kg/IndexFreebase.java
index d163c8e448..5732f050e1 100644
--- a/src/main/java/io/anserini/kg/IndexFreebase.java
+++ b/src/main/java/io/anserini/kg/IndexFreebase.java
@@ -161,7 +161,7 @@ public void run() throws IOException {
 
     LOG.info(String.format("%,d triples indexed.", triplesCount.get()));
     LOG.info(String.format("%,d documents added.", docCount.get()));
-    int numIndexed = writer.maxDoc();
+    int numIndexed = writer.getDocStats().maxDoc;
 
     try {
       writer.commit();
diff --git a/src/main/java/io/anserini/kg/LookupFreebaseNodes.java b/src/main/java/io/anserini/kg/LookupFreebaseNodes.java
index 4ac64e11b9..52ebb40d88 100644
--- a/src/main/java/io/anserini/kg/LookupFreebaseNodes.java
+++ b/src/main/java/io/anserini/kg/LookupFreebaseNodes.java
@@ -104,11 +104,11 @@ public Document lookupMid(String mid) throws IOException {
     TermQuery query = new TermQuery(new Term(IndexFreebase.FIELD_ID, mid));
 
     TopDocs topDocs = searcher.search(query, 1);
-    if (topDocs.totalHits == 0) {
+    if (topDocs.totalHits.value == 0) {
       System.err.println("Error: mid not found!");
       return null;
     }
-    if (topDocs.totalHits > 1) {
+    if (topDocs.totalHits.value > 1) {
       System.err.println("Error: more than one matching mid found. This shouldn't happen!");
       return null;
     }
diff --git a/src/main/java/io/anserini/ltr/BaseFeatureExtractor.java b/src/main/java/io/anserini/ltr/BaseFeatureExtractor.java
index 92d384ae6f..e2e0077bed 100644
--- a/src/main/java/io/anserini/ltr/BaseFeatureExtractor.java
+++ b/src/main/java/io/anserini/ltr/BaseFeatureExtractor.java
@@ -25,7 +25,8 @@
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.MultiBits;
+import org.apache.lucene.index.MultiTerms;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
@@ -168,7 +169,7 @@ private void printHeader(PrintStream out, FeatureExtractors extractors) {
     public void printFeatureForAllDocs(PrintStream out) throws IOException {
       Map<String, RerankerContext<K>> queryContextMap = buildRerankerContextMap();
       FeatureExtractors extractors = getExtractors();
-      Bits liveDocs = MultiFields.getLiveDocs(reader);
+      Bits liveDocs = MultiBits.getLiveDocs(reader);
       Set<String> fieldsToLoad = getFieldsToLoad();
 
       this.printHeader(out, extractors);
@@ -183,7 +184,7 @@ public void printFeatureForAllDocs(PrintStream out) throws IOException {
         String docIdString = doc.get(getIdField());
         // NOTE doc frequencies should not be retrieved from here, term vector returned is as if on single document
         // index
-        Terms terms = MultiFields.getTerms(reader, getTermVectorField());//reader.getTermVector(docId, getTermVectorField());
+        Terms terms = MultiTerms.getTerms(reader, getTermVectorField());//reader.getTermVector(docId, getTermVectorField());
 
         if (terms == null) {
           continue;
@@ -207,7 +208,7 @@ public void printFeatureForAllDocs(PrintStream out) throws IOException {
     public void printFeatures(PrintStream out) throws IOException {
       Map<String, RerankerContext<K>> queryContextMap = buildRerankerContextMap();
       FeatureExtractors extractors = getExtractors();
-      Bits liveDocs = MultiFields.getLiveDocs(reader);
+      Bits liveDocs = MultiBits.getLiveDocs(reader);
       Set<String> fieldsToLoad = getFieldsToLoad();
 
       // We need to open a searcher
@@ -227,7 +228,7 @@ public void printFeatures(PrintStream out) throws IOException {
           int qrelScore = entry.getValue();
           // We issue a specific query
           TopDocs topDocs = searcher.search(docIdQuery(docId), 1);
-          if (topDocs.totalHits == 0) {
+          if (topDocs.totalHits.value == 0) {
             LOG.warn(String.format("Document Id %s expected but not found in index, skipping...", docId));
             continue;
           }
diff --git a/src/main/java/io/anserini/ltr/feature/base/BM25FeatureExtractor.java b/src/main/java/io/anserini/ltr/feature/base/BM25FeatureExtractor.java
index e0bee3295d..be6c845176 100644
--- a/src/main/java/io/anserini/ltr/feature/base/BM25FeatureExtractor.java
+++ b/src/main/java/io/anserini/ltr/feature/base/BM25FeatureExtractor.java
@@ -23,7 +23,7 @@
 import org.apache.logging.log4j.Logger;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.MultiTerms;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
@@ -83,7 +83,7 @@ private double computeAvgFL(long sumTermFreqs, long maxDocs) {
   private long getSumTermFrequency(IndexReader reader, String fieldName) {
     Terms collectionTermVector = null;
     try {
-      collectionTermVector = MultiFields.getTerms(reader, fieldName);
+      collectionTermVector = MultiTerms.getTerms(reader, fieldName);
       long totalTermFreq = collectionTermVector.getSumTotalTermFreq();
       return totalTermFreq;
     } catch (IOException e) {
diff --git a/src/main/java/io/anserini/ltr/feature/base/PMIFeatureExtractor.java b/src/main/java/io/anserini/ltr/feature/base/PMIFeatureExtractor.java
index 15385d9d49..80067b6ed4 100644
--- a/src/main/java/io/anserini/ltr/feature/base/PMIFeatureExtractor.java
+++ b/src/main/java/io/anserini/ltr/feature/base/PMIFeatureExtractor.java
@@ -21,7 +21,7 @@
 import io.anserini.rerank.RerankerContext;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.MultiTerms;
 import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.Terms;
@@ -100,8 +100,8 @@ public float extract(Document doc, Terms terms, RerankerContext<T> context) {
           for (int j = i +1; j < queryTokens.size(); j++) {
             pairsComputed ++;
             String secondToken = queryTokens.get(j);
-            PostingsEnum firstEnum = MultiFields.getTermDocsEnum(reader,LuceneDocumentGenerator.FIELD_BODY, new BytesRef(firstToken));
-            PostingsEnum secondEnum = MultiFields.getTermDocsEnum(reader,LuceneDocumentGenerator.FIELD_BODY, new BytesRef(secondToken));
+            PostingsEnum firstEnum = MultiTerms.getTermPostingsEnum(reader,LuceneDocumentGenerator.FIELD_BODY, new BytesRef(firstToken));
+            PostingsEnum secondEnum = MultiTerms.getTermPostingsEnum(reader,LuceneDocumentGenerator.FIELD_BODY, new BytesRef(secondToken));
             int intersect;
             if (firstEnum == null || secondEnum == null) {
               intersect = 0;
diff --git a/src/main/java/io/anserini/rerank/lib/AxiomReranker.java b/src/main/java/io/anserini/rerank/lib/AxiomReranker.java
index 342afe7b7f..414c118856 100644
--- a/src/main/java/io/anserini/rerank/lib/AxiomReranker.java
+++ b/src/main/java/io/anserini/rerank/lib/AxiomReranker.java
@@ -192,9 +192,9 @@ private ScoredDocuments searchTopDocs(Query query, RerankerContext<T> context) t
     if (context.getSearchArgs().arbitraryScoreTieBreak) {
       rs = searcher.search(finalQuery, context.getSearchArgs().hits);
     } else if (context.getSearchArgs().searchtweets) {
-      rs = searcher.search(finalQuery, context.getSearchArgs().hits, BREAK_SCORE_TIES_BY_TWEETID, true, true);
+      rs = searcher.search(finalQuery, context.getSearchArgs().hits, BREAK_SCORE_TIES_BY_TWEETID, true);
     } else {
-      rs = searcher.search(finalQuery, context.getSearchArgs().hits, BREAK_SCORE_TIES_BY_DOCID, true, true);
+      rs = searcher.search(finalQuery, context.getSearchArgs().hits, BREAK_SCORE_TIES_BY_DOCID, true);
     }
 
     return ScoredDocuments.fromTopDocs(rs, searcher);
@@ -262,7 +262,7 @@ private ScoredDocuments processExternalContext(ScoredDocuments docs, RerankerCon
       }
       IndexReader reader = DirectoryReader.open(FSDirectory.open(indexPath));
       IndexSearcher searcher = new IndexSearcher(reader);
-      searcher.setSimilarity(context.getIndexSearcher().getSimilarity(true));
+      searcher.setSimilarity(context.getIndexSearcher().getSimilarity());
 
       SearchArgs args = new SearchArgs();
       args.hits = this.R;
diff --git a/src/main/java/io/anserini/rerank/lib/RankLibReranker.java b/src/main/java/io/anserini/rerank/lib/RankLibReranker.java
index ab5d42d302..55475415fb 100644
--- a/src/main/java/io/anserini/rerank/lib/RankLibReranker.java
+++ b/src/main/java/io/anserini/rerank/lib/RankLibReranker.java
@@ -28,7 +28,7 @@
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 import org.apache.lucene.document.Document;
-import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.MultiTerms;
 import org.apache.lucene.index.Terms;
 
 import java.io.IOException;
@@ -49,7 +49,7 @@ public class RankLibReranker<T> implements Reranker<T> {
   private DataPoint convertToDataPoint(Document doc, RerankerContext<T> context) {
     Terms terms = null;
     try {
-      terms = MultiFields.getTerms(context.getIndexSearcher().getIndexReader(), this.termsField);
+      terms = MultiTerms.getTerms(context.getIndexSearcher().getIndexReader(), this.termsField);
     } catch (IOException e) {
       LOG.error("Unable to retrieve term vectors");
     }
diff --git a/src/main/java/io/anserini/rerank/lib/Rm3Reranker.java b/src/main/java/io/anserini/rerank/lib/Rm3Reranker.java
index 5e0d014a6e..bc54a54b19 100644
--- a/src/main/java/io/anserini/rerank/lib/Rm3Reranker.java
+++ b/src/main/java/io/anserini/rerank/lib/Rm3Reranker.java
@@ -107,9 +107,9 @@ public ScoredDocuments rerank(ScoredDocuments docs, RerankerContext context) {
       if (context.getSearchArgs().arbitraryScoreTieBreak) {
         rs = searcher.search(finalQuery, context.getSearchArgs().hits);
       } else if (context.getSearchArgs().searchtweets) {
-        rs = searcher.search(finalQuery, context.getSearchArgs().hits, BREAK_SCORE_TIES_BY_TWEETID, true, true);
+        rs = searcher.search(finalQuery, context.getSearchArgs().hits, BREAK_SCORE_TIES_BY_TWEETID, true);
       } else {
-        rs = searcher.search(finalQuery, context.getSearchArgs().hits, BREAK_SCORE_TIES_BY_DOCID, true, true);
+        rs = searcher.search(finalQuery, context.getSearchArgs().hits, BREAK_SCORE_TIES_BY_DOCID, true);
       }
     } catch (IOException e) {
       e.printStackTrace();
diff --git a/src/main/java/io/anserini/search/SearchArgs.java b/src/main/java/io/anserini/search/SearchArgs.java
index 74f8a12172..9aa0bf787a 100644
--- a/src/main/java/io/anserini/search/SearchArgs.java
+++ b/src/main/java/io/anserini/search/SearchArgs.java
@@ -126,11 +126,11 @@ public class SearchArgs {
   @Option(name = "-b", handler = StringArrayOptionHandler.class, usage = "BM25 b parameter")
   public String[] b = new String[] {"0.4"};
   
-  @Option(name = "-pl2", usage = "use PL2 scoring model")
-  public boolean pl2 = false;
+  @Option(name = "-inl2", usage = "use I(n)L2 scoring model")
+  public boolean inl2 = false;
   
-  @Option(name = "-pl2.c", metaVar = "[value]", usage = "PL2 c parameter")
-  public String[] pl2_c = new String[] {"0.1"};
+  @Option(name = "-inl2.c", metaVar = "[value]", usage = "I(n)L2 c parameter")
+  public String[] inl2_c = new String[] {"0.1"};
 
   @Option(name = "-spl", usage = "use SPL scoring model")
   public boolean spl = false;
diff --git a/src/main/java/io/anserini/search/SearchCollection.java b/src/main/java/io/anserini/search/SearchCollection.java
index a4bd424238..664b1a0004 100644
--- a/src/main/java/io/anserini/search/SearchCollection.java
+++ b/src/main/java/io/anserini/search/SearchCollection.java
@@ -44,7 +44,6 @@
 import org.apache.lucene.document.LongPoint;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
 import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
 import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
 import org.apache.lucene.search.*;
@@ -229,9 +228,9 @@ public List<TaggedSimilarity> constructSimiliries() {
           similarities.add(new TaggedSimilarity(new BM25Similarity(Float.valueOf(k1), Float.valueOf(b)), "k1:"+k1+",b:"+b));
         }
       }
-    } else if (args.pl2) {
-      for (String c : args.pl2_c) {
-        similarities.add(new TaggedSimilarity(new DFRSimilarity(new BasicModelP(), new AfterEffectL(), new NormalizationH2(Float.valueOf(c))), "c:"+c));
+    } else if (args.inl2) {
+      for (String c : args.inl2_c) {
+        similarities.add(new TaggedSimilarity(new DFRSimilarity(new BasicModelIn(), new AfterEffectL(), new NormalizationH2(Float.valueOf(c))), "c:"+c));
       };
     } else if (args.spl) {
       for (String c : args.spl_c) {
@@ -351,12 +350,12 @@ public<K> ScoredDocuments search(IndexSearcher searcher, K qid, String queryStri
       query = new BagOfWordsQueryGenerator().buildQuery(FIELD_BODY, analyzer, queryString);
     }
 
-    TopDocs rs = new TopDocs(0, new ScoreDoc[]{}, Float.NaN);
+    TopDocs rs = new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[]{});
     if (!(isRerank && args.rerankcutoff <= 0)) {
       if (args.arbitraryScoreTieBreak) {// Figure out how to break the scoring ties.
         rs = searcher.search(query, isRerank ? args.rerankcutoff : args.hits);
       } else {
-        rs = searcher.search(query, isRerank ? args.rerankcutoff : args.hits, BREAK_SCORE_TIES_BY_DOCID, true, true);
+        rs = searcher.search(query, isRerank ? args.rerankcutoff : args.hits, BREAK_SCORE_TIES_BY_DOCID, true);
       }
     }
 
@@ -396,12 +395,12 @@ public<K> ScoredDocuments searchBackgroundLinking(IndexSearcher searcher, K qid,
       builder.add(q, BooleanClause.Occur.MUST);
       query = builder.build();
       
-      TopDocs rs = new TopDocs(0, new ScoreDoc[]{}, Float.NaN);
+      TopDocs rs = new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[]{});
       if (!(isRerank && args.rerankcutoff <= 0)) {
         if (args.arbitraryScoreTieBreak) {// Figure out how to break the scoring ties.
           rs = searcher.search(query, isRerank ? args.rerankcutoff : args.hits);
         } else {
-          rs = searcher.search(query, isRerank ? args.rerankcutoff : args.hits, BREAK_SCORE_TIES_BY_DOCID, true, true);
+          rs = searcher.search(query, isRerank ? args.rerankcutoff : args.hits, BREAK_SCORE_TIES_BY_DOCID, true);
         }
       }
       
@@ -464,12 +463,12 @@ public<K> ScoredDocuments searchTweets(IndexSearcher searcher, K qid, String que
     Query compositeQuery = builder.build();
 
 
-    TopDocs rs = new TopDocs(0, new ScoreDoc[]{}, Float.NaN);
+    TopDocs rs = new TopDocs(new TotalHits(0,TotalHits.Relation.EQUAL_TO), new ScoreDoc[]{});
     if (!(isRerank && args.rerankcutoff <= 0)) {
       if (args.arbitraryScoreTieBreak) {// Figure out how to break the scoring ties.
         rs = searcher.search(compositeQuery, isRerank ? args.rerankcutoff : args.hits);
       } else {
-        rs = searcher.search(compositeQuery, isRerank ? args.rerankcutoff : args.hits, BREAK_SCORE_TIES_BY_TWEETID, true, true);
+        rs = searcher.search(compositeQuery, isRerank ? args.rerankcutoff : args.hits, BREAK_SCORE_TIES_BY_TWEETID, true);
       }
     }
 
diff --git a/src/main/java/io/anserini/search/SimpleSearcher.java b/src/main/java/io/anserini/search/SimpleSearcher.java
index 8ab4876f98..1cc37b57fb 100644
--- a/src/main/java/io/anserini/search/SimpleSearcher.java
+++ b/src/main/java/io/anserini/search/SimpleSearcher.java
@@ -132,7 +132,7 @@ public void setBM25Similarity(float k1, float b) {
   }
 
   public void setDFRSimilarity(float c) {
-    this.similarity = new DFRSimilarity(new BasicModelP(), new AfterEffectL(), new NormalizationH2(c));
+    this.similarity = new DFRSimilarity(new BasicModelIn(), new AfterEffectL(), new NormalizationH2(c));
   }
 
   public void setIBSimilarity(float c) {
@@ -171,7 +171,7 @@ public Result[] search(String q, int k, long t) throws IOException {
     searchArgs.hits = k;
     searchArgs.searchtweets = searchtweets;
 
-    TopDocs rs = new TopDocs(0, new ScoreDoc[]{}, Float.NaN);
+    TopDocs rs = new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[]{});
     RerankerContext context;
     if (searchtweets) {
       if (t > 0) {
@@ -183,14 +183,14 @@ public Result[] search(String q, int k, long t) throws IOException {
         builder.add(filter, BooleanClause.Occur.FILTER);
         builder.add(query, BooleanClause.Occur.MUST);
         Query compositeQuery = builder.build();
-        rs = searcher.search(compositeQuery, isRerank ? searchArgs.rerankcutoff : k, BREAK_SCORE_TIES_BY_TWEETID, true, true);
+        rs = searcher.search(compositeQuery, isRerank ? searchArgs.rerankcutoff : k, BREAK_SCORE_TIES_BY_TWEETID, true);
         context = new RerankerContext<>(searcher, null, compositeQuery, null, q, queryTokens, filter, searchArgs);
       } else {
-        rs = searcher.search(query, isRerank ? searchArgs.rerankcutoff : k, BREAK_SCORE_TIES_BY_TWEETID, true, true);
+        rs = searcher.search(query, isRerank ? searchArgs.rerankcutoff : k, BREAK_SCORE_TIES_BY_TWEETID, true);
         context = new RerankerContext<>(searcher, null, query, null, q, queryTokens, null, searchArgs);
       }
     } else {
-      rs = searcher.search(query, isRerank ? searchArgs.rerankcutoff : k, BREAK_SCORE_TIES_BY_DOCID, true, true);
+      rs = searcher.search(query, isRerank ? searchArgs.rerankcutoff : k, BREAK_SCORE_TIES_BY_DOCID, true);
         context = new RerankerContext<>(searcher, null, query, null, q, queryTokens, null, searchArgs);
     }
 
diff --git a/src/main/java/io/anserini/search/similarity/AxiomaticSimilarity.java b/src/main/java/io/anserini/search/similarity/AxiomaticSimilarity.java
index 36a4bf1858..d691f32a98 100644
--- a/src/main/java/io/anserini/search/similarity/AxiomaticSimilarity.java
+++ b/src/main/java/io/anserini/search/similarity/AxiomaticSimilarity.java
@@ -16,12 +16,9 @@
 
 package io.anserini.search.similarity;
 
-import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 import org.apache.lucene.index.FieldInvertState;
-import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.search.CollectionStatistics;
 import org.apache.lucene.search.Explanation;
 import org.apache.lucene.search.TermStatistics;
@@ -37,16 +34,9 @@
 public abstract class AxiomaticSimilarity extends Similarity {
   protected final float s;
   /** Cache of decoded bytes. */
-  protected static final float[] OLD_LENGTH_TABLE = new float[256];
   protected static final float[] LENGTH_TABLE = new float[256];
   
   static {
-    for (int i = 1; i < 256; i++) {
-      float f = SmallFloat.byte315ToFloat((byte)i);
-      OLD_LENGTH_TABLE[i] = 1.0f / (f*f);
-    }
-    OLD_LENGTH_TABLE[0] = 1.0f / OLD_LENGTH_TABLE[255]; // otherwise inf
-    
     for (int i = 0; i < 256; i++) {
       LENGTH_TABLE[i] = SmallFloat.byte4ToInt((byte) i);
     }
@@ -226,28 +216,20 @@ public Explanation idfExplain(CollectionStatistics collectionStats, TermStatisti
   }
   
   @Override
-  public final SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
+  public final SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
     Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats);
     float avgdl = avgFieldLength(collectionStats);
   
-    float[] oldCache = new float[256];
     float[] cache = new float[256];
     for (int i = 0; i < cache.length; i++) {
-      oldCache[i] = s + s * OLD_LENGTH_TABLE[i] / avgdl;
       cache[i] = s + s * LENGTH_TABLE[i] / avgdl;
     }
-    return new Stats(collectionStats.field(), boost, idf, avgdl, oldCache, cache);
-  }
-  
-  
-  @Override
-  public final SimScorer simScorer(SimWeight stats, LeafReaderContext context) throws IOException {
-    Stats axStats = (Stats) stats;
-    return new AxDocScorer(axStats, context.reader().getMetaData().getCreatedVersionMajor(), context.reader().getNormValues(axStats.field));
+    Stats axStats = new Stats(collectionStats.field(), boost, idf, avgdl, cache);
+    return new AxDocScorer(axStats);
   }
   
   /** DocumentCollection statistics for the F2Log model. */
-  static class Stats extends SimWeight {
+  static class Stats {
     /** F2Log's idf */
     public final Explanation idf;
     /** The average document length. */
@@ -259,15 +241,14 @@ static class Stats extends SimWeight {
     /** field name, for pulling norms */
     public final String field;
     /** precomputed norm[256] with k1 * ((1 - b) + b * dl / avgdl)
-     *  for both OLD_LENGTH_TABLE and LENGTH_TABLE */
-    private final float[] oldCache, cache;
+     *  for LENGTH_TABLE */
+    private final float[] cache;
     
-    Stats(String field, float boost, Explanation idf, float avgdl, float[] oldCache, float[] cache) {
+    Stats(String field, float boost, Explanation idf, float avgdl, float[] cache) {
       this.field = field;
       this.idf = idf;
       this.avgdl = avgdl;
-      this.weight = idf.getValue() * boost;
-      this.oldCache = oldCache;
+      this.weight = (float) (idf.getValue().doubleValue() * boost);
       this.cache = cache;
     }
   }
@@ -275,23 +256,13 @@ static class Stats extends SimWeight {
   class AxDocScorer extends SimScorer {
     private final Stats stats;
     private final float weightValue; // boost * idf
-    private final NumericDocValues norms;
-    /** precomputed cache for all length values */
-    private final float[] lengthCache;
     /** precomputed norm[256] with k1 * ((1 - b) + b * dl / avgdl) */
     private final float[] cache;
   
-    AxDocScorer(Stats stats, int indexCreatedVersionMajor, NumericDocValues norms) throws IOException {
+    AxDocScorer(Stats stats) {
       this.stats = stats;
       this.weightValue = stats.weight;
-      this.norms = norms;
-      if (indexCreatedVersionMajor >= 7) {
-        lengthCache = LENGTH_TABLE;
-        cache = stats.cache;
-      } else {
-        lengthCache = OLD_LENGTH_TABLE;
-        cache = stats.oldCache;
-      }
+      cache = stats.cache;
     }
     
     /* Score function is:
@@ -302,73 +273,44 @@ class AxDocScorer extends SimScorer {
        </pre>
      */
     @Override
-    public float score(int doc, float freq) throws IOException {
+    public float score(float freq, long encodedNorm) {
       // if there are no norms, we act as if b=0
-      float norm;
-      if (norms == null) {
-        norm = 0.0f;
-      } else {
-        if (norms.advanceExact(doc)) {
-          norm = cache[((byte) norms.longValue()) & 0xFF];
-        } else {
-          norm = cache[0];
-        }
-      }
-      return weightValue * freq / (freq + norm);
+      double norm = cache[((byte) encodedNorm) & 0xFF];
+      return weightValue * (float) (freq / (freq + norm));
     }
     
     @Override
-    public Explanation explain(int doc, Explanation freq) throws IOException {
-      return explainScore(doc, freq, stats, norms, lengthCache);
-    }
-    
-    @Override
-    public float computeSlopFactor(int distance) {
-      return sloppyFreq(distance);
-    }
-    
-    @Override
-    public float computePayloadFactor(int doc, int start, int end, BytesRef payload) {
-      return scorePayload(doc, start, end, payload);
+    public Explanation explain(Explanation freq, long encodedNorm) {
+      return explainScore(freq, encodedNorm, stats);
     }
   }
   
-  private Explanation explainTFNorm(int doc, Explanation freq, Stats stats, NumericDocValues norms, float[] lengthCache) throws IOException {
+  private Explanation explainTFNorm(Explanation freq, long encodedNorm, Stats stats) {
     List<Explanation> subs = new ArrayList<>();
     subs.add(freq);
     subs.add(Explanation.match(s, "parameter s"));
-    if (norms == null) {
-      subs.add(Explanation.match(0, "norm"));
-      return Explanation.match(1,
-          "tfNorm, computed as constant from:", subs);
-    } else {
-      byte norm;
-      if (norms.advanceExact(doc)) {
-        norm = (byte) norms.longValue();
-      } else {
-        norm = 0;
-      }
-      float doclen = lengthCache[norm & 0xff];
-      subs.add(Explanation.match(stats.avgdl, "avgFieldLength"));
-      subs.add(Explanation.match(doclen, "fieldLength"));
-      return Explanation.match(
-          (freq.getValue() / (freq.getValue() + s + s * doclen/stats.avgdl)),
-          "tfNorm, computed as (freq / (freq + s + s * fieldLength / avgFieldLength) from:", subs);
-    }
+
+    byte norm = (byte) encodedNorm;
+    float doclen = LENGTH_TABLE[norm & 0xff];
+    subs.add(Explanation.match(stats.avgdl, "avgFieldLength"));
+    subs.add(Explanation.match(doclen, "fieldLength"));
+    return Explanation.match(
+        (freq.getValue().floatValue() / (freq.getValue().floatValue() + s + s * doclen/stats.avgdl)),
+        "tfNorm, computed as (freq / (freq + s + s * fieldLength / avgFieldLength) from:", subs);
   }
   
   
-  private Explanation explainScore(int doc, Explanation freq, Stats stats, NumericDocValues norms, float[] lengthCache) throws IOException {
+  private Explanation explainScore(Explanation freq, long encodedNorm, Stats stats) {
     Explanation boostExpl = Explanation.match(stats.boost, "boost");
     List<Explanation> subs = new ArrayList<>();
-    if (boostExpl.getValue() != 1.0f)
+    if (boostExpl.getValue().floatValue() != 1.0f)
       subs.add(boostExpl);
     subs.add(stats.idf);
-    Explanation tfNormExpl = explainTFNorm(doc, freq, stats, norms, lengthCache);
+    Explanation tfNormExpl = explainTFNorm(freq, encodedNorm, stats);
     subs.add(tfNormExpl);
     return Explanation.match(
-        boostExpl.getValue() * stats.idf.getValue() * tfNormExpl.getValue(),
-        "score(doc="+doc+",freq="+freq+"), product of:", subs);
+        boostExpl.getValue().floatValue() * stats.idf.getValue().floatValue() * tfNormExpl.getValue().floatValue(),
+        "score(freq="+freq+", length=" + LENGTH_TABLE[Byte.toUnsignedInt((byte) encodedNorm)] + "), product of:", subs);
   }
   
   @Override
diff --git a/src/main/java/io/anserini/search/similarity/RankLibSimilarity.java b/src/main/java/io/anserini/search/similarity/RankLibSimilarity.java
index 9adae2ed86..4fd20480bf 100644
--- a/src/main/java/io/anserini/search/similarity/RankLibSimilarity.java
+++ b/src/main/java/io/anserini/search/similarity/RankLibSimilarity.java
@@ -17,13 +17,10 @@
 package io.anserini.search.similarity;
 
 import org.apache.lucene.index.FieldInvertState;
-import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.search.CollectionStatistics;
 import org.apache.lucene.search.TermStatistics;
 import org.apache.lucene.search.similarities.Similarity;
 
-import java.io.IOException;
-
 /**
  * Similarity that uses a Ranklib ranker to compute the score
  */
@@ -34,12 +31,8 @@ public long computeNorm(FieldInvertState fieldInvertState) {
   }
 
   @Override
-  public SimWeight computeWeight(float boost, CollectionStatistics collectionStatistics, TermStatistics... termStatistics) {
+  public SimScorer scorer(float boost, CollectionStatistics collectionStatistics, TermStatistics... termStatistics) {
     return null;
   }
 
-  @Override
-  public SimScorer simScorer(SimWeight simWeight, LeafReaderContext leafReaderContext) throws IOException {
-    return null;
-  }
 }
diff --git a/src/main/java/io/anserini/util/ExtractTopDfTerms.java b/src/main/java/io/anserini/util/ExtractTopDfTerms.java
index adae2cd631..cf72d6c344 100644
--- a/src/main/java/io/anserini/util/ExtractTopDfTerms.java
+++ b/src/main/java/io/anserini/util/ExtractTopDfTerms.java
@@ -20,7 +20,7 @@
 import org.apache.logging.log4j.Logger;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.MultiTerms;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
@@ -95,7 +95,7 @@ public int compare(Pair p1, Pair p2) {
     PriorityQueue<Pair> queue = new PriorityQueue<Pair>(myArgs.topK, comp);
 
     LOG.info("Starting to iterate through all terms...");
-    Terms terms = MultiFields.getFields(reader).terms(myArgs.field);
+    Terms terms = MultiTerms.getTerms(reader, myArgs.field);
     TermsEnum termsEnum = terms.iterator();
     BytesRef text;
     int cnt = 0;
diff --git a/src/test/java/io/anserini/integration/IndexerTest.java b/src/test/java/io/anserini/integration/IndexerTest.java
index c37c9c69f6..61e9503245 100644
--- a/src/test/java/io/anserini/integration/IndexerTest.java
+++ b/src/test/java/io/anserini/integration/IndexerTest.java
@@ -217,7 +217,7 @@ public void testIterateThroughDocumentVectorComputeBM25() throws Exception {
         TopDocs rs = searcher.search(finalQuery, 1);                 // issue the query
 
         // The BM25 weight is the maxScore
-        System.out.println(term + " " + tf + " " + rs.getMaxScore());
+        System.out.println(term + " " + tf + " " + (rs.scoreDocs.length == 0 ? Float.NaN : rs.scoreDocs[0].score));
       }
     }
   }
diff --git a/src/test/java/io/anserini/integration/MultiThreadingSearchTest.java b/src/test/java/io/anserini/integration/MultiThreadingSearchTest.java
index 076d2f4854..a16ae32953 100644
--- a/src/test/java/io/anserini/integration/MultiThreadingSearchTest.java
+++ b/src/test/java/io/anserini/integration/MultiThreadingSearchTest.java
@@ -35,8 +35,9 @@ protected void init() {
     termIndexStatusTermCount = 12; // Please note that standard analyzer ignores stopwords.
                                    // Also, this includes docids
     termIndexStatusTotFreq = 17;  //
-    termIndexStatusTotPos = 16;   // only "text" fields are indexed with position so we have 16
     storedFieldStatusTotalDocCounts = 3;
+    // 16 positions for text fields, plus 1 for each document because of id
+    termIndexStatusTotPos = 16 + storedFieldStatusTotalDocCounts;
     storedFieldStatusTotFields = 9;  // 3 docs * (1 id + 1 text + 1 raw)
   }
   
diff --git a/src/test/java/io/anserini/integration/TrecEndToEndTest.java b/src/test/java/io/anserini/integration/TrecEndToEndTest.java
index c5c83c26a2..f1e11885c6 100644
--- a/src/test/java/io/anserini/integration/TrecEndToEndTest.java
+++ b/src/test/java/io/anserini/integration/TrecEndToEndTest.java
@@ -28,8 +28,9 @@ protected void init() {
     fieldNormStatusTotalFields = 1;  // text
     termIndexStatusTermCount = 12;   // Note that standard analyzer ignores stopwords; includes docids.
     termIndexStatusTotFreq = 17;
-    termIndexStatusTotPos = 16;      // Only "text" fields are indexed with position so we have 16.
     storedFieldStatusTotalDocCounts = 3;
+    // 16 positions for text fields, plus 1 for each document because of id
+    termIndexStatusTotPos = 16 + storedFieldStatusTotalDocCounts;
     storedFieldStatusTotFields = 9;  // 3 docs * (1 id + 1 text + 1 raw)
 
     // The search output should be as follows (for Lucene 7.5):
diff --git a/src/test/java/io/anserini/integration/TweetEndToEndTest.java b/src/test/java/io/anserini/integration/TweetEndToEndTest.java
index 247f127244..0c8eb3c2e5 100644
--- a/src/test/java/io/anserini/integration/TweetEndToEndTest.java
+++ b/src/test/java/io/anserini/integration/TweetEndToEndTest.java
@@ -30,8 +30,9 @@ protected void init() {
     // We set that retweets and the tweets with ids larger than tweetMaxId will NOT be indexed!
     termIndexStatusTermCount = 32; // other indexable fields: 4 doc ids + 4 "lang" fields + 4 "screen_name" fields
     termIndexStatusTotFreq = 36;
-    termIndexStatusTotPos = 24;   // only "text" fields are indexed with positions
     storedFieldStatusTotalDocCounts = 4;
+    // 24 positions for text fields, plus 3 for each document because of id, screen_name and lang
+    termIndexStatusTotPos = 24 + 3 * storedFieldStatusTotalDocCounts;
     storedFieldStatusTotFields = 12;  // 4 tweets * (1 id + 1 text + 1 raw)
 
     // The search output should be as follows (for Lucene 7.5):

From 3b44a7e17195d755f53382d969b1238e6cd68139 Mon Sep 17 00:00:00 2001
From: Adrien Grand <jpountz@gmail.com>
Date: Thu, 14 Mar 2019 08:48:57 +0100
Subject: [PATCH 2/6] Use Lucene`s Axiomatic similarity.

---
 .../io/anserini/search/SearchCollection.java  |   6 +-
 .../similarity/AxiomaticSimilarity.java       | 330 ------------------
 .../search/similarity/F2ExpSimilarity.java    |  59 ----
 .../search/similarity/F2LogSimilarity.java    |  48 ---
 4 files changed, 2 insertions(+), 441 deletions(-)
 delete mode 100644 src/main/java/io/anserini/search/similarity/AxiomaticSimilarity.java
 delete mode 100644 src/main/java/io/anserini/search/similarity/F2ExpSimilarity.java
 delete mode 100644 src/main/java/io/anserini/search/similarity/F2LogSimilarity.java

diff --git a/src/main/java/io/anserini/search/SearchCollection.java b/src/main/java/io/anserini/search/SearchCollection.java
index 664b1a0004..0b2f868a92 100644
--- a/src/main/java/io/anserini/search/SearchCollection.java
+++ b/src/main/java/io/anserini/search/SearchCollection.java
@@ -29,8 +29,6 @@
 import io.anserini.rerank.lib.ScoreTiesAdjusterReranker;
 import io.anserini.search.query.BagOfWordsQueryGenerator;
 import io.anserini.search.query.SdmQueryGenerator;
-import io.anserini.search.similarity.F2ExpSimilarity;
-import io.anserini.search.similarity.F2LogSimilarity;
 import io.anserini.search.similarity.TaggedSimilarity;
 import io.anserini.search.topicreader.NewsBackgroundLinkingTopicReader;
 import io.anserini.search.topicreader.TopicReader;
@@ -238,11 +236,11 @@ public List<TaggedSimilarity> constructSimiliries() {
       }
     } else if (args.f2exp) {
       for (String s : args.f2exp_s) {
-        similarities.add(new TaggedSimilarity(new F2ExpSimilarity(Float.valueOf(s)), "s:"+s));
+        similarities.add(new TaggedSimilarity(new AxiomaticF2EXP(Float.valueOf(s)), "s:"+s));
       }
     } else if (args.f2log) {
       for (String s : args.f2log_s) {
-        similarities.add(new TaggedSimilarity(new F2LogSimilarity(Float.valueOf(s)), "s:"+s));
+        similarities.add(new TaggedSimilarity(new AxiomaticF2LOG(Float.valueOf(s)), "s:"+s));
       }
     } else {
       throw new IllegalArgumentException("Error: Must specify scoring model!");
diff --git a/src/main/java/io/anserini/search/similarity/AxiomaticSimilarity.java b/src/main/java/io/anserini/search/similarity/AxiomaticSimilarity.java
deleted file mode 100644
index d691f32a98..0000000000
--- a/src/main/java/io/anserini/search/similarity/AxiomaticSimilarity.java
+++ /dev/null
@@ -1,330 +0,0 @@
-/**
- * Anserini: An information retrieval toolkit built on Lucene
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package io.anserini.search.similarity;
-
-import java.util.ArrayList;
-import java.util.List;
-import org.apache.lucene.index.FieldInvertState;
-import org.apache.lucene.search.CollectionStatistics;
-import org.apache.lucene.search.Explanation;
-import org.apache.lucene.search.TermStatistics;
-import org.apache.lucene.search.similarities.Similarity;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.SmallFloat;
-
-/**
- * Hui Fang and ChengXiang Zhai. 2005. An exploration of axiomatic approaches to information retrieval.
- * In Proceedings of the 28th annual international ACM SIGIR conference on Research and development in
- * information retrieval (SIGIR '05). ACM, New York, NY, USA, 480-487.
- */
-public abstract class AxiomaticSimilarity extends Similarity {
-  protected final float s;
-  /** Cache of decoded bytes. */
-  protected static final float[] LENGTH_TABLE = new float[256];
-  
-  static {
-    for (int i = 0; i < 256; i++) {
-      LENGTH_TABLE[i] = SmallFloat.byte4ToInt((byte) i);
-    }
-  }
-  
-  /**
-   * @param s Generic parater s
-   * @throws IllegalArgumentException if {@code s} is infinite or if {@code s} is
-   *         not within the range {@code [0..1]}
-   */
-  AxiomaticSimilarity(float s) {
-    if (Float.isNaN(s) || s < 0 || s > 1) {
-      throw new IllegalArgumentException("illegal s value: " + s + ", must be between 0 and 1");
-    }
-    this.s = s;
-  }
-  
-  /** Default parameter:
-   * <ul>
-   *   <li>{@code s = 0.5}</li>
-   * </ul>
-   */
-  AxiomaticSimilarity() {
-    this(0.5f);
-  }
-  
-  /** Implemented as <code>log(1 + (docCount - docFreq + 0.5)/(docFreq + 0.5))</code>.
-   *
-   * @param docFreq terms's document frequency
-   * @param docCount total document count in the index
-   * @return inverted document frequency
-   * */
-  float idf(long docFreq, long docCount) {
-    throw new UnsupportedOperationException();
-  }
-  
-  /** Implemented as <code>1 / (distance + 1)</code>.
-   *
-   * @param distance distance
-   * @return sloppy frequency
-   * */
-  float sloppyFreq(int distance) {
-    return 1.0f / (distance + 1);
-  }
-  
-  /** The default implementation returns <code>1</code>
-   *
-   * @param doc doc
-   * @param start start
-   * @param end end
-   * @param payload payload
-   * @return 1
-   * */
-  float scorePayload(int doc, int start, int end, BytesRef payload) {
-    return 1;
-  }
-  
-  /** The default implementation computes the average as <code>sumTotalTermFreq / docCount</code>,
-   * or returns <code>1</code> if the index does not store sumTotalTermFreq:
-   * any field that omits frequency information).
-   *
-   * @param collectionStats collection-wide statistics
-   * @return average document length of FIELD_BODY
-   * */
-  float avgFieldLength(CollectionStatistics collectionStats) {
-    final long sumTotalTermFreq = collectionStats.sumTotalTermFreq();
-    if (sumTotalTermFreq <= 0) {
-      return 1f;       // field does not exist, or stat is unsupported
-    } else {
-      final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
-      return (float) (sumTotalTermFreq / (double) docCount);
-    }
-  }
-  
-  /**
-   * True if overlap tokens (tokens with a position of increment of zero) are
-   * discounted from the document's length.
-   */
-  boolean discountOverlaps = true;
-  
-  /** Sets whether overlap tokens (Tokens with 0 position increment) are
-   *  ignored when computing norm.  By default this is true, meaning overlap
-   *  tokens do not count when computing norms.
-   *
-   * @param v v
-   *  */
-  public void setDiscountOverlaps(boolean v) {
-    discountOverlaps = v;
-  }
-  
-  /**
-   * Returns true if overlap tokens are discounted from the document's length.
-   * @see #setDiscountOverlaps
-   *
-   * @return discountOverlaps
-   */
-  public boolean getDiscountOverlaps() {
-    return discountOverlaps;
-  }
-  
-  /** Cache of decoded bytes. */
-  private static final float[] NORM_TABLE = new float[256];
-  
-  static {
-    for (int i = 1; i < 256; i++) {
-      float f = SmallFloat.byte315ToFloat((byte)i);
-      NORM_TABLE[i] = 1.0f / (f*f);
-    }
-    NORM_TABLE[0] = 1.0f / NORM_TABLE[255]; // otherwise inf
-  }
-  
-  
-  @Override
-  public final long computeNorm(FieldInvertState state) {
-    final int numTerms = discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength();
-    int indexCreatedVersionMajor = state.getIndexCreatedVersionMajor();
-    if (indexCreatedVersionMajor >= 7) {
-      return SmallFloat.intToByte4(numTerms);
-    } else {
-      return SmallFloat.floatToByte315((float) (1 / Math.sqrt(numTerms)));
-    }
-  }
-  
-  /**
-   * Computes a score factor for a simple term and returns an explanation
-   * for that score factor.
-   *
-   * <p>
-   * The default implementation uses:
-   *
-   * <pre class="prettyprint">
-   * idf(docFreq, docCount);
-   * </pre>
-   *
-   * Note that {@link CollectionStatistics#docCount()} is used instead of
-   * {@link org.apache.lucene.index.IndexReader#numDocs() IndexReader#numDocs()} because also
-   * {@link TermStatistics#docFreq()} is used, and when the latter
-   * is inaccurate, so is {@link CollectionStatistics#docCount()}, and in the same direction.
-   * In addition, {@link CollectionStatistics#docCount()} does not skew when fields are sparse.
-   *
-   * @param collectionStats collection-level statistics
-   * @param termStats term-level statistics for the term
-   * @return an Explain object that includes both an idf score factor
-  and an explanation for the term.
-   */
-  public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) {
-    final long df = termStats.docFreq();
-    final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
-    final float idf = idf(df, docCount);
-    return Explanation.match(idf, "idf(docFreq=" + df + ", docCount=" + docCount + ")");
-  }
-  
-  /**
-   * Computes a score factor for a phrase.
-   *
-   * <p>
-   * The default implementation sums the idf factor for
-   * each term in the phrase.
-   *
-   * @param collectionStats collection-level statistics
-   * @param termStats term-level statistics for the terms in the phrase
-   * @return an Explain object that includes both an idf
-   *         score factor for the phrase and an explanation
-   *         for each term.
-   */
-  public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) {
-    final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
-    double idf = 0d;
-    List<Explanation> details = new ArrayList<>();
-    for (final TermStatistics stat : termStats ) {
-      final long df = stat.docFreq();
-      final float termIdf = idf(df, docCount);
-      details.add(Explanation.match(termIdf, "idf(docFreq=" + df + ", docCount=" + docCount + ")"));
-      idf += termIdf;
-    }
-    return Explanation.match((float)idf, "idf(), sum of:", details);
-  }
-  
-  @Override
-  public final SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
-    Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats);
-    float avgdl = avgFieldLength(collectionStats);
-  
-    float[] cache = new float[256];
-    for (int i = 0; i < cache.length; i++) {
-      cache[i] = s + s * LENGTH_TABLE[i] / avgdl;
-    }
-    Stats axStats = new Stats(collectionStats.field(), boost, idf, avgdl, cache);
-    return new AxDocScorer(axStats);
-  }
-  
-  /** DocumentCollection statistics for the F2Log model. */
-  static class Stats {
-    /** F2Log's idf */
-    public final Explanation idf;
-    /** The average document length. */
-    public final float avgdl;
-    /** query boost */
-    public float boost;
-    /** weight (idf * boost) */
-    public float weight;
-    /** field name, for pulling norms */
-    public final String field;
-    /** precomputed norm[256] with k1 * ((1 - b) + b * dl / avgdl)
-     *  for LENGTH_TABLE */
-    private final float[] cache;
-    
-    Stats(String field, float boost, Explanation idf, float avgdl, float[] cache) {
-      this.field = field;
-      this.idf = idf;
-      this.avgdl = avgdl;
-      this.weight = (float) (idf.getValue().doubleValue() * boost);
-      this.cache = cache;
-    }
-  }
-  
-  class AxDocScorer extends SimScorer {
-    private final Stats stats;
-    private final float weightValue; // boost * idf
-    /** precomputed norm[256] with k1 * ((1 - b) + b * dl / avgdl) */
-    private final float[] cache;
-  
-    AxDocScorer(Stats stats) {
-      this.stats = stats;
-      this.weightValue = stats.weight;
-      cache = stats.cache;
-    }
-    
-    /* Score function is:
-     * <pre class="prettyprint">
-                                                     occurrences
-      score = termWeight * IDF * ---------------------------------------------------------
-                                 occurrences + s + documentLength * s / avgDocLength
-       </pre>
-     */
-    @Override
-    public float score(float freq, long encodedNorm) {
-      // if there are no norms, we act as if b=0
-      double norm = cache[((byte) encodedNorm) & 0xFF];
-      return weightValue * (float) (freq / (freq + norm));
-    }
-    
-    @Override
-    public Explanation explain(Explanation freq, long encodedNorm) {
-      return explainScore(freq, encodedNorm, stats);
-    }
-  }
-  
-  private Explanation explainTFNorm(Explanation freq, long encodedNorm, Stats stats) {
-    List<Explanation> subs = new ArrayList<>();
-    subs.add(freq);
-    subs.add(Explanation.match(s, "parameter s"));
-
-    byte norm = (byte) encodedNorm;
-    float doclen = LENGTH_TABLE[norm & 0xff];
-    subs.add(Explanation.match(stats.avgdl, "avgFieldLength"));
-    subs.add(Explanation.match(doclen, "fieldLength"));
-    return Explanation.match(
-        (freq.getValue().floatValue() / (freq.getValue().floatValue() + s + s * doclen/stats.avgdl)),
-        "tfNorm, computed as (freq / (freq + s + s * fieldLength / avgFieldLength) from:", subs);
-  }
-  
-  
-  private Explanation explainScore(Explanation freq, long encodedNorm, Stats stats) {
-    Explanation boostExpl = Explanation.match(stats.boost, "boost");
-    List<Explanation> subs = new ArrayList<>();
-    if (boostExpl.getValue().floatValue() != 1.0f)
-      subs.add(boostExpl);
-    subs.add(stats.idf);
-    Explanation tfNormExpl = explainTFNorm(freq, encodedNorm, stats);
-    subs.add(tfNormExpl);
-    return Explanation.match(
-        boostExpl.getValue().floatValue() * stats.idf.getValue().floatValue() * tfNormExpl.getValue().floatValue(),
-        "score(freq="+freq+", length=" + LENGTH_TABLE[Byte.toUnsignedInt((byte) encodedNorm)] + "), product of:", subs);
-  }
-  
-  @Override
-  public String toString() {
-    throw new UnsupportedOperationException();
-  }
-  
-  /**
-   * Returns the <code>b</code> parameter
-   * @see #AxiomaticSimilarity(float)
-   *
-   * @return s
-   */
-  public float getS() {
-    return s;
-  }
-}
diff --git a/src/main/java/io/anserini/search/similarity/F2ExpSimilarity.java b/src/main/java/io/anserini/search/similarity/F2ExpSimilarity.java
deleted file mode 100644
index c7a2394f71..0000000000
--- a/src/main/java/io/anserini/search/similarity/F2ExpSimilarity.java
+++ /dev/null
@@ -1,59 +0,0 @@
-/**
- * Anserini: An information retrieval toolkit built on Lucene
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package io.anserini.search.similarity;
-
-public class F2ExpSimilarity extends AxiomaticSimilarity {
-  private final float k = 0.35f;
-  
-  /**
-   * F2Exp with the supplied parameter values.
-   * @param s Controls to what degree document length normalizes tf values.
-   * @throws IllegalArgumentException if {@code s} is infinite or if {@code s} is
-   *         not within the range {@code [0..1]}
-   */
-  public F2ExpSimilarity(float s) {
-    super(s);
-  }
-  
-  /** F2Exp with these default values:
-   * <ul>
-   *   <li>{@code k = 0.35}</li>
-   * </ul>
-   */
-  public F2ExpSimilarity() {
-    this(0.5f);
-  }
-  
-  @Override
-  float idf(long docFreq, long docCount) {
-    return (float) Math.pow((docCount + 1.0) / docFreq, this.k);
-  }
-  
-  @Override
-  public String toString() {
-    return "F2Exp(s=" + s +")";
-  }
-  
-  /**
-   * Returns the <code>k</code> parameter
-   * @see #F2ExpSimilarity(float)
-   * @return k
-   */
-  public float getK() {
-    return k;
-  }
-}
diff --git a/src/main/java/io/anserini/search/similarity/F2LogSimilarity.java b/src/main/java/io/anserini/search/similarity/F2LogSimilarity.java
deleted file mode 100644
index 5e59f665fc..0000000000
--- a/src/main/java/io/anserini/search/similarity/F2LogSimilarity.java
+++ /dev/null
@@ -1,48 +0,0 @@
-/**
- * Anserini: An information retrieval toolkit built on Lucene
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package io.anserini.search.similarity;
-
-public class F2LogSimilarity extends AxiomaticSimilarity {
-  /**
-   * F2Log with the supplied parameter values.
-   * @param s Controls to what degree document length normalizes tf values.
-   * @throws IllegalArgumentException if {@code s} is infinite or if {@code s} is
-   *         not within the range {@code [0..1]}
-   */
-  public F2LogSimilarity(float s) {
-    super(s);
-  }
-  
-  /** F2Log with these default values:
-   * <ul>
-   *   <li>{@code s = 0.5}</li>
-   * </ul>
-   */
-  public F2LogSimilarity() {
-    this(0.5f);
-  }
-  
-  @Override
-  float idf(long docFreq, long docCount) {
-    return (float) Math.log((1.0f + docCount) / docFreq);
-  }
-  
-  @Override
-  public String toString() {
-    return "F2Log(s=" + s +")";
-  }
-}

From 813e5923aa146d162c0022504c334ef04c05b404 Mon Sep 17 00:00:00 2001
From: Jimmy Lin <jimmylin@uwaterloo.ca>
Date: Tue, 23 Apr 2019 12:52:59 -0400
Subject: [PATCH 3/6] Fixed all regressions for Lucene8 (#596)

---
 docs/experiments-car17.md                   |  2 +-
 docs/experiments-core17.md                  |  2 +-
 docs/experiments-core18.md                  |  2 +-
 docs/experiments-cw09b.md                   | 24 +++----
 docs/experiments-cw12.md                    | 16 ++---
 docs/experiments-cw12b13.md                 | 16 ++---
 docs/experiments-gov2.md                    |  8 +--
 docs/experiments-mb11.md                    |  2 +-
 docs/experiments-robust04.md                |  2 +-
 docs/experiments-robust05.md                |  2 +-
 docs/experiments-wt10g.md                   |  2 +-
 src/main/resources/regression/car17.yaml    |  6 +-
 src/main/resources/regression/core17.yaml   |  4 +-
 src/main/resources/regression/core18.yaml   |  4 +-
 src/main/resources/regression/cw09b.yaml    | 78 ++++++++++-----------
 src/main/resources/regression/cw12.yaml     | 50 ++++++-------
 src/main/resources/regression/cw12b13.yaml  | 62 ++++++++--------
 src/main/resources/regression/gov2.yaml     | 10 +--
 src/main/resources/regression/mb11.yaml     |  2 +-
 src/main/resources/regression/robust04.yaml |  4 +-
 src/main/resources/regression/robust05.yaml |  4 +-
 src/main/resources/regression/wt10g.yaml    |  8 +--
 22 files changed, 155 insertions(+), 155 deletions(-)

diff --git a/docs/experiments-car17.md b/docs/experiments-car17.md
index 495e6a8482..c89f471617 100644
--- a/docs/experiments-car17.md
+++ b/docs/experiments-car17.md
@@ -63,7 +63,7 @@ With the above commands, you should be able to replicate the following results:
 
 MAP                                     | BM25      | BM25+RM3  | BM25+AX   | QL        | QL+RM3    | QL+AX     |
 :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------|
-All Topics                              | 0.1689    | 0.1287    | 0.1355    | 0.1516    | 0.1173    | 0.1082    |
+All Topics                              | 0.1689    | 0.1286    | 0.1355    | 0.1516    | 0.1173    | 0.1082    |
 
 
 RECIP_RANK                              | BM25      | BM25+RM3  | BM25+AX   | QL        | QL+RM3    | QL+AX     |
diff --git a/docs/experiments-core17.md b/docs/experiments-core17.md
index aecc04bdde..4e25a8079b 100644
--- a/docs/experiments-core17.md
+++ b/docs/experiments-core17.md
@@ -64,7 +64,7 @@ With the above commands, you should be able to replicate the following results:
 
 MAP                                     | BM25      | BM25+RM3  | BM25+AX   | QL        | QL+RM3    | QL+AX     |
 :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------|
-All Topics                              | 0.1977    | 0.2682    | 0.2700    | 0.1913    | 0.2485    | 0.2514    |
+All Topics                              | 0.1977    | 0.2682    | 0.2701    | 0.1913    | 0.2485    | 0.2514    |
 
 
 P30                                     | BM25      | BM25+RM3  | BM25+AX   | QL        | QL+RM3    | QL+AX     |
diff --git a/docs/experiments-core18.md b/docs/experiments-core18.md
index 474d2a1fd2..2b7c9091d9 100644
--- a/docs/experiments-core18.md
+++ b/docs/experiments-core18.md
@@ -64,7 +64,7 @@ With the above commands, you should be able to replicate the following results:
 
 MAP                                     | BM25      | BM25+RM3  | BM25+AX   | QL        | QL+RM3    | QL+AX     |
 :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------|
-All Topics                              | 0.2491    | 0.3147    | 0.2921    | 0.2522    | 0.3064    | 0.2975    |
+All Topics                              | 0.2491    | 0.3147    | 0.2926    | 0.2522    | 0.3064    | 0.2975    |
 
 
 P30                                     | BM25      | BM25+RM3  | BM25+AX   | QL        | QL+RM3    | QL+AX     |
diff --git a/docs/experiments-cw09b.md b/docs/experiments-cw09b.md
index 9bac861ada..cbf0331ada 100644
--- a/docs/experiments-cw09b.md
+++ b/docs/experiments-cw09b.md
@@ -110,29 +110,29 @@ With the above commands, you should be able to replicate the following results:
 
 MAP                                     | BM25      | BM25+RM3  | BM25+AX   | QL        | QL+RM3    | QL+AX     |
 :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------|
-TREC 2010 Web Track: Topics 51-100      | 0.1126    | 0.0933    | 0.0928    | 0.1060    | 0.1019    | 0.1086    |
-TREC 2011 Web Track: Topics 101-150     | 0.1094    | 0.1081    | 0.0974    | 0.0958    | 0.0837    | 0.0879    |
-TREC 2012 Web Track: Topics 151-200     | 0.1106    | 0.1107    | 0.1315    | 0.1069    | 0.1059    | 0.1212    |
+TREC 2010 Web Track: Topics 51-100      | 0.1126    | 0.0933    | 0.0929    | 0.1060    | 0.1019    | 0.1086    |
+TREC 2011 Web Track: Topics 101-150     | 0.1094    | 0.1085    | 0.0975    | 0.0958    | 0.0839    | 0.0879    |
+TREC 2012 Web Track: Topics 151-200     | 0.1105    | 0.1107    | 0.1315    | 0.1069    | 0.1058    | 0.1212    |
 
 
 P30                                     | BM25      | BM25+RM3  | BM25+AX   | QL        | QL+RM3    | QL+AX     |
 :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------|
-TREC 2010 Web Track: Topics 51-100      | 0.2681    | 0.2389    | 0.2354    | 0.2431    | 0.2312    | 0.2618    |
-TREC 2011 Web Track: Topics 101-150     | 0.2513    | 0.2467    | 0.2393    | 0.2147    | 0.2067    | 0.2167    |
-TREC 2012 Web Track: Topics 151-200     | 0.2167    | 0.1920    | 0.2553    | 0.2080    | 0.1980    | 0.2140    |
+TREC 2010 Web Track: Topics 51-100      | 0.2694    | 0.2389    | 0.2354    | 0.2431    | 0.2312    | 0.2618    |
+TREC 2011 Web Track: Topics 101-150     | 0.2513    | 0.2480    | 0.2387    | 0.2147    | 0.2047    | 0.2173    |
+TREC 2012 Web Track: Topics 151-200     | 0.2167    | 0.1920    | 0.2553    | 0.2080    | 0.1980    | 0.2147    |
 
 
 NDCG20                                  | BM25      | BM25+RM3  | BM25+AX   | QL        | QL+RM3    | QL+AX     |
 :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------|
-TREC 2010 Web Track: Topics 51-100      | 0.1354    | 0.1369    | 0.1637    | 0.1143    | 0.1185    | 0.1454    |
-TREC 2011 Web Track: Topics 101-150     | 0.1890    | 0.1916    | 0.1833    | 0.1619    | 0.1447    | 0.1509    |
-TREC 2012 Web Track: Topics 151-200     | 0.1014    | 0.0917    | 0.1441    | 0.0868    | 0.0896    | 0.1030    |
+TREC 2010 Web Track: Topics 51-100      | 0.1354    | 0.1369    | 0.1632    | 0.1143    | 0.1182    | 0.1454    |
+TREC 2011 Web Track: Topics 101-150     | 0.1890    | 0.1916    | 0.1835    | 0.1619    | 0.1449    | 0.1517    |
+TREC 2012 Web Track: Topics 151-200     | 0.1014    | 0.0918    | 0.1441    | 0.0868    | 0.0896    | 0.1037    |
 
 
 ERR20                                   | BM25      | BM25+RM3  | BM25+AX   | QL        | QL+RM3    | QL+AX     |
 :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------|
-TREC 2010 Web Track: Topics 51-100      | 0.0733    | 0.0747    | 0.0981    | 0.0599    | 0.0592    | 0.0742    |
-TREC 2011 Web Track: Topics 101-150     | 0.0959    | 0.0960    | 0.1091    | 0.0849    | 0.0786    | 0.0820    |
-TREC 2012 Web Track: Topics 151-200     | 0.1304    | 0.1493    | 0.2355    | 0.1305    | 0.1334    | 0.1558    |
+TREC 2010 Web Track: Topics 51-100      | 0.0733    | 0.0747    | 0.0977    | 0.0599    | 0.0592    | 0.0742    |
+TREC 2011 Web Track: Topics 101-150     | 0.0959    | 0.0960    | 0.1091    | 0.0849    | 0.0787    | 0.0821    |
+TREC 2012 Web Track: Topics 151-200     | 0.1303    | 0.1494    | 0.2355    | 0.1305    | 0.1334    | 0.1558    |
 
 
diff --git a/docs/experiments-cw12.md b/docs/experiments-cw12.md
index cec82a6012..a7ff8120c3 100644
--- a/docs/experiments-cw12.md
+++ b/docs/experiments-cw12.md
@@ -73,25 +73,25 @@ With the above commands, you should be able to replicate the following results:
 
 MAP                                     | BM25      | BM25+RM3  | QL        | QL+RM3    |
 :---------------------------------------|-----------|-----------|-----------|-----------|
-[TREC 2013 Web Track: Topics 201-250](http://trec.nist.gov/data/web2013.html)| 0.1695    | 0.1464    | 0.1493    | 0.1291    |
-[TREC 2014 Web Track: Topics 251-300](http://trec.nist.gov/data/web2014.html)| 0.2469    | 0.2325    | 0.2467    | 0.2168    |
+[TREC 2013 Web Track: Topics 201-250](http://trec.nist.gov/data/web2013.html)| 0.1694    | 0.1464    | 0.1494    | 0.1290    |
+[TREC 2014 Web Track: Topics 251-300](http://trec.nist.gov/data/web2014.html)| 0.2469    | 0.2324    | 0.2466    | 0.2177    |
 
 
 P30                                     | BM25      | BM25+RM3  | QL        | QL+RM3    |
 :---------------------------------------|-----------|-----------|-----------|-----------|
-[TREC 2013 Web Track: Topics 201-250](http://trec.nist.gov/data/web2013.html)| 0.2767    | 0.2387    | 0.2613    | 0.2347    |
-[TREC 2014 Web Track: Topics 251-300](http://trec.nist.gov/data/web2014.html)| 0.4533    | 0.4073    | 0.4380    | 0.3793    |
+[TREC 2013 Web Track: Topics 201-250](http://trec.nist.gov/data/web2013.html)| 0.2773    | 0.2393    | 0.2607    | 0.2347    |
+[TREC 2014 Web Track: Topics 251-300](http://trec.nist.gov/data/web2014.html)| 0.4547    | 0.4080    | 0.4380    | 0.3800    |
 
 
 NDCG20                                  | BM25      | BM25+RM3  | QL        | QL+RM3    |
 :---------------------------------------|-----------|-----------|-----------|-----------|
-[TREC 2013 Web Track: Topics 201-250](http://trec.nist.gov/data/web2013.html)| 0.2086    | 0.2033    | 0.1993    | 0.1725    |
-[TREC 2014 Web Track: Topics 251-300](http://trec.nist.gov/data/web2014.html)| 0.2578    | 0.2530    | 0.2228    | 0.2066    |
+[TREC 2013 Web Track: Topics 201-250](http://trec.nist.gov/data/web2013.html)| 0.2088    | 0.2033    | 0.1993    | 0.1725    |
+[TREC 2014 Web Track: Topics 251-300](http://trec.nist.gov/data/web2014.html)| 0.2572    | 0.2530    | 0.2218    | 0.2083    |
 
 
 ERR20                                   | BM25      | BM25+RM3  | QL        | QL+RM3    |
 :---------------------------------------|-----------|-----------|-----------|-----------|
-[TREC 2013 Web Track: Topics 201-250](http://trec.nist.gov/data/web2013.html)| 0.1284    | 0.1264    | 0.1232    | 0.1008    |
-[TREC 2014 Web Track: Topics 251-300](http://trec.nist.gov/data/web2014.html)| 0.1630    | 0.1655    | 0.1321    | 0.1218    |
+[TREC 2013 Web Track: Topics 201-250](http://trec.nist.gov/data/web2013.html)| 0.1284    | 0.1264    | 0.1233    | 0.1008    |
+[TREC 2014 Web Track: Topics 251-300](http://trec.nist.gov/data/web2014.html)| 0.1616    | 0.1655    | 0.1322    | 0.1245    |
 
 
diff --git a/docs/experiments-cw12b13.md b/docs/experiments-cw12b13.md
index 4dfe6f754e..d6d7729f3e 100644
--- a/docs/experiments-cw12b13.md
+++ b/docs/experiments-cw12b13.md
@@ -88,25 +88,25 @@ With the above commands, you should be able to replicate the following results:
 
 MAP                                     | BM25      | BM25+RM3  | BM25+AX   | QL        | QL+RM3    | QL+AX     |
 :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------|
-[TREC 2013 Web Track: Topics 201-250](http://trec.nist.gov/data/web2013.html)| 0.0468    | 0.0412    | 0.0435    | 0.0397    | 0.0322    | 0.0359    |
-[TREC 2014 Web Track: Topics 251-300](http://trec.nist.gov/data/web2014.html)| 0.0224    | 0.0210    | 0.0180    | 0.0235    | 0.0203    | 0.0186    |
+[TREC 2013 Web Track: Topics 201-250](http://trec.nist.gov/data/web2013.html)| 0.0468    | 0.0408    | 0.0435    | 0.0397    | 0.0322    | 0.0358    |
+[TREC 2014 Web Track: Topics 251-300](http://trec.nist.gov/data/web2014.html)| 0.0224    | 0.0210    | 0.0180    | 0.0235    | 0.0203    | 0.0183    |
 
 
 P30                                     | BM25      | BM25+RM3  | BM25+AX   | QL        | QL+RM3    | QL+AX     |
 :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------|
-[TREC 2013 Web Track: Topics 201-250](http://trec.nist.gov/data/web2013.html)| 0.2113    | 0.1713    | 0.1840    | 0.1767    | 0.1507    | 0.1513    |
-[TREC 2014 Web Track: Topics 251-300](http://trec.nist.gov/data/web2014.html)| 0.1273    | 0.1207    | 0.1107    | 0.1373    | 0.1173    | 0.1167    |
+[TREC 2013 Web Track: Topics 201-250](http://trec.nist.gov/data/web2013.html)| 0.2113    | 0.1673    | 0.1833    | 0.1780    | 0.1513    | 0.1507    |
+[TREC 2014 Web Track: Topics 251-300](http://trec.nist.gov/data/web2014.html)| 0.1273    | 0.1207    | 0.1107    | 0.1373    | 0.1173    | 0.1147    |
 
 
 NDCG20                                  | BM25      | BM25+RM3  | BM25+AX   | QL        | QL+RM3    | QL+AX     |
 :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------|
-[TREC 2013 Web Track: Topics 201-250](http://trec.nist.gov/data/web2013.html)| 0.1286    | 0.1129    | 0.1287    | 0.1107    | 0.0920    | 0.1143    |
-[TREC 2014 Web Track: Topics 251-300](http://trec.nist.gov/data/web2014.html)| 0.1185    | 0.1080    | 0.0964    | 0.1177    | 0.1003    | 0.1001    |
+[TREC 2013 Web Track: Topics 201-250](http://trec.nist.gov/data/web2013.html)| 0.1286    | 0.1119    | 0.1287    | 0.1106    | 0.0920    | 0.1141    |
+[TREC 2014 Web Track: Topics 251-300](http://trec.nist.gov/data/web2014.html)| 0.1183    | 0.1081    | 0.0963    | 0.1177    | 0.1004    | 0.0989    |
 
 
 ERR20                                   | BM25      | BM25+RM3  | BM25+AX   | QL        | QL+RM3    | QL+AX     |
 :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------|
-[TREC 2013 Web Track: Topics 201-250](http://trec.nist.gov/data/web2013.html)| 0.0838    | 0.0763    | 0.0943    | 0.0769    | 0.0553    | 0.0780    |
-[TREC 2014 Web Track: Topics 251-300](http://trec.nist.gov/data/web2014.html)| 0.1201    | 0.1065    | 0.0929    | 0.1091    | 0.0929    | 0.0896    |
+[TREC 2013 Web Track: Topics 201-250](http://trec.nist.gov/data/web2013.html)| 0.0838    | 0.0753    | 0.0941    | 0.0768    | 0.0553    | 0.0780    |
+[TREC 2014 Web Track: Topics 251-300](http://trec.nist.gov/data/web2014.html)| 0.1201    | 0.1066    | 0.0928    | 0.1092    | 0.0928    | 0.0900    |
 
 
diff --git a/docs/experiments-gov2.md b/docs/experiments-gov2.md
index 859b7490e7..6a998a8d77 100644
--- a/docs/experiments-gov2.md
+++ b/docs/experiments-gov2.md
@@ -90,14 +90,14 @@ With the above commands, you should be able to replicate the following results:
 
 MAP                                     | BM25      | BM25+RM3  | BM25+AX   | QL        | QL+RM3    | QL+AX     |
 :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------|
-[TREC 2004 Terabyte Track: Topics 701-750](http://trec.nist.gov/data/terabyte04.html)| 0.2689    | 0.2844    | 0.2665    | 0.2681    | 0.2708    | 0.2666    |
-[TREC 2005 Terabyte Track: Topics 751-800](http://trec.nist.gov/data/terabyte05.html)| 0.3390    | 0.3820    | 0.3664    | 0.3303    | 0.3559    | 0.3646    |
-[TREC 2006 Terabyte Track: Topics 801-850](http://trec.nist.gov/data/terabyte06.html)| 0.3080    | 0.3377    | 0.3069    | 0.2996    | 0.3154    | 0.3084    |
+[TREC 2004 Terabyte Track: Topics 701-750](http://trec.nist.gov/data/terabyte04.html)| 0.2689    | 0.2844    | 0.2669    | 0.2681    | 0.2708    | 0.2666    |
+[TREC 2005 Terabyte Track: Topics 751-800](http://trec.nist.gov/data/terabyte05.html)| 0.3390    | 0.3820    | 0.3666    | 0.3303    | 0.3559    | 0.3646    |
+[TREC 2006 Terabyte Track: Topics 801-850](http://trec.nist.gov/data/terabyte06.html)| 0.3080    | 0.3377    | 0.3069    | 0.2997    | 0.3154    | 0.3084    |
 
 
 P30                                     | BM25      | BM25+RM3  | BM25+AX   | QL        | QL+RM3    | QL+AX     |
 :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------|
-[TREC 2004 Terabyte Track: Topics 701-750](http://trec.nist.gov/data/terabyte04.html)| 0.4864    | 0.5190    | 0.4986    | 0.4755    | 0.4925    | 0.4932    |
+[TREC 2004 Terabyte Track: Topics 701-750](http://trec.nist.gov/data/terabyte04.html)| 0.4864    | 0.5190    | 0.4993    | 0.4755    | 0.4925    | 0.4932    |
 [TREC 2005 Terabyte Track: Topics 751-800](http://trec.nist.gov/data/terabyte05.html)| 0.5540    | 0.5920    | 0.5933    | 0.5347    | 0.5620    | 0.5840    |
 [TREC 2006 Terabyte Track: Topics 801-850](http://trec.nist.gov/data/terabyte06.html)| 0.4907    | 0.5160    | 0.5033    | 0.4720    | 0.4847    | 0.4920    |
 
diff --git a/docs/experiments-mb11.md b/docs/experiments-mb11.md
index 9805271083..fd77fac0a1 100644
--- a/docs/experiments-mb11.md
+++ b/docs/experiments-mb11.md
@@ -95,6 +95,6 @@ MAP                                     | BM25      | BM25+RM3  | BM25+AX   | QL
 P30                                     | BM25      | BM25+RM3  | BM25+AX   | QL        | QL+RM3    | QL+AX     |
 :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------|
 [TREC 2011 Microblog Track](http://trec.nist.gov/data/microblog2011.html)| 0.3959    | 0.4170    | 0.4612    | 0.4061    | 0.4435    | 0.4408    |
-[TREC 2012 Microblog Track](http://trec.nist.gov/data/microblog2012.html)| 0.3316    | 0.3463    | 0.3554    | 0.3333    | 0.3520    | 0.3842    |
+[TREC 2012 Microblog Track](http://trec.nist.gov/data/microblog2012.html)| 0.3316    | 0.3463    | 0.3554    | 0.3333    | 0.3514    | 0.3842    |
 
 
diff --git a/docs/experiments-robust04.md b/docs/experiments-robust04.md
index 69aca1b3ab..d600518575 100644
--- a/docs/experiments-robust04.md
+++ b/docs/experiments-robust04.md
@@ -63,7 +63,7 @@ With the above commands, you should be able to replicate the following results:
 
 MAP                                     | BM25      | BM25+RM3  | BM25+AX   | QL        | QL+RM3    | QL+AX     |
 :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------|
-All Topics                              | 0.2531    | 0.2903    | 0.2895    | 0.2467    | 0.2747    | 0.2774    |
+All Topics                              | 0.2531    | 0.2903    | 0.2896    | 0.2467    | 0.2747    | 0.2774    |
 
 
 P30                                     | BM25      | BM25+RM3  | BM25+AX   | QL        | QL+RM3    | QL+AX     |
diff --git a/docs/experiments-robust05.md b/docs/experiments-robust05.md
index 96cad0889e..9c50869363 100644
--- a/docs/experiments-robust05.md
+++ b/docs/experiments-robust05.md
@@ -62,7 +62,7 @@ With the above commands, you should be able to replicate the following results:
 
 MAP                                     | BM25      | BM25+RM3  | BM25+AX   | QL        | QL+RM3    | QL+AX     |
 :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------|
-All Topics                              | 0.2031    | 0.2602    | 0.2584    | 0.2028    | 0.2491    | 0.2476    |
+All Topics                              | 0.2032    | 0.2602    | 0.2587    | 0.2028    | 0.2491    | 0.2476    |
 
 
 P30                                     | BM25      | BM25+RM3  | BM25+AX   | QL        | QL+RM3    | QL+AX     |
diff --git a/docs/experiments-wt10g.md b/docs/experiments-wt10g.md
index cff0a0bf1d..21a24798f2 100644
--- a/docs/experiments-wt10g.md
+++ b/docs/experiments-wt10g.md
@@ -69,6 +69,6 @@ Wt10g: Topics 451-550                   | 0.1992    | 0.2276    | 0.2200    | 0.
 
 P30                                     | BM25      | BM25+RM3  | BM25+AX   | QL        | QL+RM3    | QL+AX     |
 :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------|
-Wt10g: Topics 451-550                   | 0.2218    | 0.2398    | 0.2483    | 0.2180    | 0.2310    | 0.2517    |
+Wt10g: Topics 451-550                   | 0.2214    | 0.2398    | 0.2483    | 0.2180    | 0.2310    | 0.2514    |
 
 
diff --git a/src/main/resources/regression/car17.yaml b/src/main/resources/regression/car17.yaml
index c2b08c8da8..56b46e2371 100644
--- a/src/main/resources/regression/car17.yaml
+++ b/src/main/resources/regression/car17.yaml
@@ -21,8 +21,8 @@ index_path: indexes/lucene-index.car17.pos+docvectors+rawdocs # path to the exis
 collection: CarCollection
 index_stats:
   documents: 29678360
-  documents (non-empty): 29674409
-  total terms: 1257896158
+  documents (non-empty): 29674425
+  total terms: 1257909884
 topics:
   - name: "All Topics"
     path: topics.car17.test200.txt
@@ -59,7 +59,7 @@ models:
       - -rm3
     results:
       map:
-        - 0.1287
+        - 0.1286
       recip_rank:
         - 0.1788
   - name: bm25+ax
diff --git a/src/main/resources/regression/core17.yaml b/src/main/resources/regression/core17.yaml
index bd928d6f2b..922b779cc0 100644
--- a/src/main/resources/regression/core17.yaml
+++ b/src/main/resources/regression/core17.yaml
@@ -22,7 +22,7 @@ collection: NewYorkTimesCollection
 index_stats:
   documents: 1831109
   documents (non-empty): 1831109
-  total terms: 720510677
+  total terms: 720510680
 topics:
   - name: "All Topics"
     path: topics.core17.txt
@@ -70,7 +70,7 @@ models:
       - -axiom.deterministic
     results:
       map:
-        - 0.2700
+        - 0.2701
       p30:
         - 0.4927
   - name: ql
diff --git a/src/main/resources/regression/core18.yaml b/src/main/resources/regression/core18.yaml
index af8fb27aad..6bb5eb33d9 100644
--- a/src/main/resources/regression/core18.yaml
+++ b/src/main/resources/regression/core18.yaml
@@ -22,7 +22,7 @@ collection: WashingtonPostCollection
 index_stats:
   documents: 595037
   documents (non-empty): 595037
-  total terms: 317882653
+  total terms: 317898812
 topics:
   - name: "All Topics"
     path: topics.core18.txt
@@ -70,7 +70,7 @@ models:
       - -axiom.deterministic
     results:
       map:
-        - 0.2921
+        - 0.2926
       p30:
         - 0.4007
   - name: ql
diff --git a/src/main/resources/regression/cw09b.yaml b/src/main/resources/regression/cw09b.yaml
index f8901e6ff4..054b3449bc 100644
--- a/src/main/resources/regression/cw09b.yaml
+++ b/src/main/resources/regression/cw09b.yaml
@@ -22,7 +22,7 @@ topic_reader: Webxml
 index_stats:
   documents: 50220189
   documents (non-empty): 50220159
-  total terms: 31270685466
+  total terms: 31302554269
 topics:
   - name: "TREC 2010 Web Track: Topics 51-100"
     path: topics.web.51-100.txt
@@ -70,19 +70,19 @@ models:
       map:
         - 0.1126
         - 0.1094
-        - 0.1106
+        - 0.1105
       p30:
-        - 0.2681
+        - 0.2694
         - 0.2513
         - 0.2167
       ndcg20:
-        - 0.13539
-        - 0.18901
-        - 0.10141
+        - 0.13537
+        - 0.18900
+        - 0.10139
       err20:
         - 0.07335
         - 0.09592
-        - 0.13036
+        - 0.13031
   - name: bm25+rm3
     params:
       - -bm25
@@ -90,20 +90,20 @@ models:
     results:
       map:
         - 0.0933
-        - 0.1081
+        - 0.1085
         - 0.1107
       p30:
         - 0.2389
-        - 0.2467
+        - 0.2480
         - 0.1920
       ndcg20:
-        - 0.13690
-        - 0.19164
-        - 0.09170
+        - 0.13693
+        - 0.19160
+        - 0.09182
       err20:
-        - 0.07470
-        - 0.09597
-        - 0.14933
+        - 0.07473
+        - 0.09596
+        - 0.14936
   - name: bm25+ax
     params:
       - -bm25
@@ -113,21 +113,21 @@ models:
       - -axiom.beta 0.1
     results:
       map:
-        - 0.0928
-        - 0.0974
+        - 0.0929
+        - 0.0975
         - 0.1315
       p30:
         - 0.2354
-        - 0.2393
+        - 0.2387
         - 0.2553
       ndcg20:
-        - 0.16375
-        - 0.18330
+        - 0.16319
+        - 0.18348
         - 0.14413
       err20:
-        - 0.09815
-        - 0.10909
-        - 0.23554
+        - 0.09771
+        - 0.10912
+        - 0.23551
   - name: ql
     params:
       - -ql
@@ -141,12 +141,12 @@ models:
         - 0.2147
         - 0.2080
       ndcg20:
-        - 0.11431
-        - 0.16192
+        - 0.11432
+        - 0.16191
         - 0.08682
       err20:
         - 0.05994
-        - 0.08487
+        - 0.08486
         - 0.13052
   - name: ql+rm3
     params:
@@ -155,19 +155,19 @@ models:
     results:
       map:
         - 0.1019
-        - 0.0837
-        - 0.1059
+        - 0.0839
+        - 0.1058
       p30:
         - 0.2312
-        - 0.2067
+        - 0.2047
         - 0.1980
       ndcg20:
-        - 0.11852
-        - 0.14469
+        - 0.11823
+        - 0.14487
         - 0.08959
       err20:
-        - 0.05920
-        - 0.07861
+        - 0.05917
+        - 0.07872
         - 0.13336
   - name: ql+ax
     params:
@@ -183,13 +183,13 @@ models:
         - 0.1212
       p30:
         - 0.2618
-        - 0.2167
-        - 0.2140
+        - 0.2173
+        - 0.2147
       ndcg20:
         - 0.14541
-        - 0.15091
-        - 0.10296
+        - 0.15174
+        - 0.10373
       err20:
         - 0.07424
-        - 0.08203
-        - 0.15575
+        - 0.08205
+        - 0.15577
diff --git a/src/main/resources/regression/cw12.yaml b/src/main/resources/regression/cw12.yaml
index 2e221ee26a..1afdf07f67 100644
--- a/src/main/resources/regression/cw12.yaml
+++ b/src/main/resources/regression/cw12.yaml
@@ -21,8 +21,8 @@ index_options:
 topic_reader: Webxml
 index_stats:
   documents: 731705088
-  documents (non-empty): 731556725
-  total terms: 428628865985
+  documents (non-empty): 731556853
+  total terms: 429328271635
 topics:
   - name: "[TREC 2013 Web Track: Topics 201-250](http://trec.nist.gov/data/web2013.html)"
     path: topics.web.201-250.txt
@@ -65,17 +65,17 @@ models:
       - -bm25
     results:
       map:
-        - 0.1695
+        - 0.1694
         - 0.2469
       p30:
-        - 0.2767
-        - 0.4533
+        - 0.2773
+        - 0.4547
       ndcg20:
-        - 0.20858
-        - 0.25776
+        - 0.20881
+        - 0.25719
       err20:
-        - 0.12835
-        - 0.16305
+        - 0.12838
+        - 0.16162
   - name: bm25+rm3
     params:
       - -bm25
@@ -83,13 +83,13 @@ models:
     results:
       map:
         - 0.1464
-        - 0.2325
+        - 0.2324
       p30:
-        - 0.2387
-        - 0.4073
+        - 0.2393
+        - 0.4080
       ndcg20:
         - 0.20327
-        - 0.25304
+        - 0.25303
       err20:
         - 0.12637
         - 0.16550
@@ -98,32 +98,32 @@ models:
       - -ql
     results:
       map:
-        - 0.1493
-        - 0.2467
+        - 0.1494
+        - 0.2466
       p30:
-        - 0.2613
+        - 0.2607
         - 0.4380
       ndcg20:
         - 0.19935
-        - 0.22282
+        - 0.22184
       err20:
-        - 0.12319
-        - 0.13211
+        - 0.12325
+        - 0.13218
   - name: ql+rm3
     params:
       - -ql
       - -rm3
     results:
       map:
-        - 0.1291
-        - 0.2168
+        - 0.1290
+        - 0.2177
       p30:
         - 0.2347
-        - 0.3793
+        - 0.3800
       ndcg20:
         - 0.17253
-        - 0.20662
+        - 0.20829
       err20:
-        - 0.10084
-        - 0.12179
+        - 0.10083
+        - 0.12450
 
diff --git a/src/main/resources/regression/cw12b13.yaml b/src/main/resources/regression/cw12b13.yaml
index 584fd76759..6de24bb7d5 100644
--- a/src/main/resources/regression/cw12b13.yaml
+++ b/src/main/resources/regression/cw12b13.yaml
@@ -21,8 +21,8 @@ index_options:
 topic_reader: Webxml
 index_stats:
   documents: 52249039
-  documents (non-empty): 52238521
-  total terms: 30617038149
+  documents (non-empty): 52238526
+  total terms: 30666923268
 topics:
   - name: "[TREC 2013 Web Track: Topics 201-250](http://trec.nist.gov/data/web2013.html)"
     path: topics.web.201-250.txt
@@ -72,27 +72,27 @@ models:
         - 0.1273
       ndcg20:
         - 0.12862
-        - 0.11849
+        - 0.11835
       err20:
-        - 0.08379
-        - 0.12013
+        - 0.08378
+        - 0.12006
   - name: bm25+rm3
     params:
       - -bm25
       - -rm3
     results:
       map:
-        - 0.0412
+        - 0.0408
         - 0.0210
       p30:
-        - 0.1713
+        - 0.1673
         - 0.1207
       ndcg20:
-        - 0.11293
-        - 0.10796
+        - 0.11192
+        - 0.10809
       err20:
-        - 0.07629
-        - 0.10653
+        - 0.07530
+        - 0.10662
   - name: bm25+ax
     params:
       - -bm25
@@ -105,14 +105,14 @@ models:
         - 0.0435
         - 0.0180
       p30:
-        - 0.1840
+        - 0.1833
         - 0.1107
       ndcg20:
-        - 0.12875
-        - 0.09637
+        - 0.12867
+        - 0.09627
       err20:
-        - 0.09430
-        - 0.09289
+        - 0.09413
+        - 0.09285
   - name: ql
     params:
       - -ql
@@ -121,14 +121,14 @@ models:
         - 0.0397
         - 0.0235
       p30:
-        - 0.1767
+        - 0.1780
         - 0.1373
       ndcg20:
-        - 0.11067
+        - 0.11059
         - 0.11765
       err20:
-        - 0.07689
-        - 0.10908
+        - 0.07679
+        - 0.10917
   - name: ql+rm3
     params:
       - -ql
@@ -138,14 +138,14 @@ models:
         - 0.0322
         - 0.0203
       p30:
-        - 0.1507
+        - 0.1513
         - 0.1173
       ndcg20:
         - 0.09199
-        - 0.10035
+        - 0.10036
       err20:
         - 0.05525
-        - 0.09289
+        - 0.09284
   - name: ql+ax
     params:
       - -ql
@@ -155,14 +155,14 @@ models:
       - -axiom.beta 0.1
     results:
       map:
-        - 0.0359
-        - 0.0186
+        - 0.0358
+        - 0.0183
       p30:
-        - 0.1513
-        - 0.1167
+        - 0.1507
+        - 0.1147
       ndcg20:
-        - 0.11435
-        - 0.10013
+        - 0.11407
+        - 0.09891
       err20:
-        - 0.07800
-        - 0.08965
+        - 0.07803
+        - 0.09002
diff --git a/src/main/resources/regression/gov2.yaml b/src/main/resources/regression/gov2.yaml
index f0c23ec988..f5f9917c8a 100644
--- a/src/main/resources/regression/gov2.yaml
+++ b/src/main/resources/regression/gov2.yaml
@@ -39,7 +39,7 @@ evals:
 index_stats:
   documents: 25172934
   documents (non-empty): 25170664
-  total terms: 17343119816
+  total terms: 17345062322
 topics:
   - name: "[TREC 2004 Terabyte Track: Topics 701-750](http://trec.nist.gov/data/terabyte04.html)"
     path: topics.701-750.txt
@@ -85,11 +85,11 @@ models:
       - -axiom.deterministic
     results:
       map:
-        - 0.2665
-        - 0.3664
+        - 0.2669
+        - 0.3666
         - 0.3069
       p30:
-        - 0.4986
+        - 0.4993
         - 0.5933
         - 0.5033
   - name: ql
@@ -99,7 +99,7 @@ models:
       map:
         - 0.2681
         - 0.3303
-        - 0.2996
+        - 0.2997
       p30:
         - 0.4755
         - 0.5347
diff --git a/src/main/resources/regression/mb11.yaml b/src/main/resources/regression/mb11.yaml
index 173b3b48a3..76cbd7dec7 100644
--- a/src/main/resources/regression/mb11.yaml
+++ b/src/main/resources/regression/mb11.yaml
@@ -112,7 +112,7 @@ models:
         - 0.2389
       p30:
         - 0.4435
-        - 0.3520
+        - 0.3514
   - name: ql+ax
     params:
       - -searchtweets
diff --git a/src/main/resources/regression/robust04.yaml b/src/main/resources/regression/robust04.yaml
index 5193b946cb..6186e90db3 100644
--- a/src/main/resources/regression/robust04.yaml
+++ b/src/main/resources/regression/robust04.yaml
@@ -40,7 +40,7 @@ index_path: indexes/lucene-index.robust04.pos+docvectors+rawdocs # path to the e
 index_stats:
   documents: 528030
   documents (non-empty): 528030
-  total terms: 174540587
+  total terms: 174540872
 topics:
   - name: "All Topics"
     path: topics.robust04.301-450.601-700.txt
@@ -71,7 +71,7 @@ models:
       - -axiom.deterministic
     results:
       map:
-        - 0.2895
+        - 0.2896
       p30:
         - 0.3333
   - name: ql
diff --git a/src/main/resources/regression/robust05.yaml b/src/main/resources/regression/robust05.yaml
index 500a386f39..3d61dbc57f 100644
--- a/src/main/resources/regression/robust05.yaml
+++ b/src/main/resources/regression/robust05.yaml
@@ -51,7 +51,7 @@ models:
       - -bm25
     results:
       map:
-        - 0.2031
+        - 0.2032
       p30:
         - 0.3693
   - name: bm25+rm3
@@ -71,7 +71,7 @@ models:
       - -axiom.deterministic
     results:
       map:
-        - 0.2584
+        - 0.2587
       p30:
         - 0.4120
   - name: ql
diff --git a/src/main/resources/regression/wt10g.yaml b/src/main/resources/regression/wt10g.yaml
index fceec4ffac..6db937ec82 100644
--- a/src/main/resources/regression/wt10g.yaml
+++ b/src/main/resources/regression/wt10g.yaml
@@ -39,8 +39,8 @@ input: collections/web/wt10g/
 index_path: indexes/lucene-index.wt10g.pos+docvectors+rawdocs # path to the existing index, used in regression test if `--index` option is absent
 index_stats:
   documents: 1688402
-  documents (non-empty): 1688290
-  total terms: 752326031
+  documents (non-empty): 1688291
+  total terms: 752790242
 topics:
   - name: "Wt10g: Topics 451-550"
     path: topics.451-550.txt
@@ -53,7 +53,7 @@ models:
       map:
         - 0.1992
       p30:
-        - 0.2218
+        - 0.2214
   - name: bm25+rm3
     params:
       - -bm25
@@ -103,4 +103,4 @@ models:
       map:
         - 0.2275
       p30:
-        - 0.2517
+        - 0.2514

From 5b4c78585bcffdeddd0e52cd4f762cb48233e140 Mon Sep 17 00:00:00 2001
From: lintool <jimmylin@uwaterloo.ca>
Date: Fri, 7 Jun 2019 11:41:57 -0400
Subject: [PATCH 4/6] Fixed compiler error and regressions.

---
 src/main/java/io/anserini/search/SimpleSearcher.java | 6 +++---
 src/main/resources/regression/car17v2.0.yaml         | 4 ++--
 src/main/resources/regression/core17.yaml            | 4 ++--
 src/main/resources/regression/core18.yaml            | 8 ++++----
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/main/java/io/anserini/search/SimpleSearcher.java b/src/main/java/io/anserini/search/SimpleSearcher.java
index 6642fdb71e..c15221f2e6 100644
--- a/src/main/java/io/anserini/search/SimpleSearcher.java
+++ b/src/main/java/io/anserini/search/SimpleSearcher.java
@@ -189,14 +189,14 @@ protected Result[] search(Query query, List<String> queryTokens, String queryStr
         builder.add(query, BooleanClause.Occur.MUST);
         Query compositeQuery = builder.build();
         rs = searcher.search(compositeQuery, isRerank ? searchArgs.rerankcutoff : k, BREAK_SCORE_TIES_BY_TWEETID, true);
-        context = new RerankerContext<>(searcher, null, compositeQuery, null, q, queryTokens, filter, searchArgs);
+        context = new RerankerContext<>(searcher, null, compositeQuery, null, queryString, queryTokens, filter, searchArgs);
       } else {
         rs = searcher.search(query, isRerank ? searchArgs.rerankcutoff : k, BREAK_SCORE_TIES_BY_TWEETID, true);
-        context = new RerankerContext<>(searcher, null, query, null, q, queryTokens, null, searchArgs);
+        context = new RerankerContext<>(searcher, null, query, null, queryString, queryTokens, null, searchArgs);
       }
     } else {
       rs = searcher.search(query, isRerank ? searchArgs.rerankcutoff : k, BREAK_SCORE_TIES_BY_DOCID, true);
-        context = new RerankerContext<>(searcher, null, query, null, q, queryTokens, null, searchArgs);
+        context = new RerankerContext<>(searcher, null, query, null, queryString, queryTokens, null, searchArgs);
     }
 
     ScoredDocuments hits = cascade.run(ScoredDocuments.fromTopDocs(rs, searcher), context);
diff --git a/src/main/resources/regression/car17v2.0.yaml b/src/main/resources/regression/car17v2.0.yaml
index 551eb028c0..f1ab5cfcb5 100644
--- a/src/main/resources/regression/car17v2.0.yaml
+++ b/src/main/resources/regression/car17v2.0.yaml
@@ -21,8 +21,8 @@ index_path: indexes/lucene-index.car17v2.0.pos+docvectors+rawdocs
 collection: CarCollection
 index_stats:
   documents: 29794689
-  documents (non-empty): 29791041
-  total terms: 1249740109
+  documents (non-empty): 29791059
+  total terms: 1249754054
 topics:
   - name: "benchmarkY1test"
     path: topics.car17v2.0.benchmarkY1test.txt
diff --git a/src/main/resources/regression/core17.yaml b/src/main/resources/regression/core17.yaml
index bd20578ba8..f9c022d9d1 100644
--- a/src/main/resources/regression/core17.yaml
+++ b/src/main/resources/regression/core17.yaml
@@ -22,7 +22,7 @@ collection: NewYorkTimesCollection
 index_stats:
   documents: 1855649
   documents (non-empty): 1855649
-  total terms: 751034051
+  total terms: 751034054
 topics:
   - name: "All Topics"
     path: topics.core17.txt
@@ -70,7 +70,7 @@ models:
       - -axiom.deterministic
     results:
       map:
-        - 0.2787
+        - 0.2788
       p30:
         - 0.4980
   - name: ql
diff --git a/src/main/resources/regression/core18.yaml b/src/main/resources/regression/core18.yaml
index d36cdda743..2ff64295cb 100644
--- a/src/main/resources/regression/core18.yaml
+++ b/src/main/resources/regression/core18.yaml
@@ -21,8 +21,8 @@ index_path: indexes/lucene-index.core18.pos+docvectors+rawdocs # path to the exi
 collection: WashingtonPostCollection
 index_stats:
   documents: 595037
-  documents (non-empty): 595037
-  total terms: 317898812
+  documents (non-empty): 595030
+  total terms: 318219945
 topics:
   - name: "All Topics"
     path: topics.core18.txt
@@ -59,7 +59,7 @@ models:
       - -rm3
     results:
       map:
-        - 0.3136
+        - 0.3135
       p30:
         - 0.4200
   - name: bm25+ax
@@ -70,7 +70,7 @@ models:
       - -axiom.deterministic
     results:
       map:
-        - 0.2920
+        - 0.2925
       p30:
         - 0.4027
   - name: ql

From bebbda0c5dd0a0b02e7c6946007591482674194a Mon Sep 17 00:00:00 2001
From: lintool <jimmylin@uwaterloo.ca>
Date: Fri, 7 Jun 2019 20:20:12 -0400
Subject: [PATCH 5/6] Tweaks.

---
 docs/experiments-core17.md | 2 +-
 docs/experiments-core18.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/experiments-core17.md b/docs/experiments-core17.md
index 6acda9e0c1..9dbf1bdd5b 100644
--- a/docs/experiments-core17.md
+++ b/docs/experiments-core17.md
@@ -64,7 +64,7 @@ With the above commands, you should be able to replicate the following results:
 
 MAP                                     | BM25      | BM25+RM3  | BM25+AX   | QL        | QL+RM3    | QL+AX     |
 :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------|
-All Topics                              | 0.2087    | 0.2823    | 0.2787    | 0.2032    | 0.2606    | 0.2613    |
+All Topics                              | 0.2087    | 0.2823    | 0.2788    | 0.2032    | 0.2606    | 0.2613    |
 
 
 P30                                     | BM25      | BM25+RM3  | BM25+AX   | QL        | QL+RM3    | QL+AX     |
diff --git a/docs/experiments-core18.md b/docs/experiments-core18.md
index fafa740668..38330b6170 100644
--- a/docs/experiments-core18.md
+++ b/docs/experiments-core18.md
@@ -64,7 +64,7 @@ With the above commands, you should be able to replicate the following results:
 
 MAP                                     | BM25      | BM25+RM3  | BM25+AX   | QL        | QL+RM3    | QL+AX     |
 :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------|
-All Topics                              | 0.2495    | 0.3136    | 0.2920    | 0.2526    | 0.3073    | 0.2966    |
+All Topics                              | 0.2495    | 0.3135    | 0.2925    | 0.2526    | 0.3073    | 0.2966    |
 
 
 P30                                     | BM25      | BM25+RM3  | BM25+AX   | QL        | QL+RM3    | QL+AX     |

From b485d236246532d5186902deece179f0ef65f2c8 Mon Sep 17 00:00:00 2001
From: lintool <jimmylin@uwaterloo.ca>
Date: Tue, 11 Jun 2019 23:06:10 -0400
Subject: [PATCH 6/6] Fixed broken regressions.

---
 docs/regressions-car17v1.5.md                      | 4 ++--
 docs/regressions-msmarco-doc.md                    | 4 ++--
 docs/regressions-msmarco-passage.md                | 2 +-
 src/main/resources/regression/car17v1.5.yaml       | 4 ++--
 src/main/resources/regression/msmarco-doc.yaml     | 8 ++++----
 src/main/resources/regression/msmarco-passage.yaml | 6 +++---
 6 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/docs/regressions-car17v1.5.md b/docs/regressions-car17v1.5.md
index b73e776dfc..b5873f528e 100644
--- a/docs/regressions-car17v1.5.md
+++ b/docs/regressions-car17v1.5.md
@@ -64,11 +64,11 @@ With the above commands, you should be able to replicate the following results:
 
 MAP                                     | BM25      | +RM3      | +Ax       | QL        | +RM3      | +Ax       |
 :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------|
-[TREC 2017 CAR: benchmarkY1test (v1.5)](http://trec-car.cs.unh.edu/datareleases/)| 0.1563    | 0.1295    | 0.1358    | 0.1386    | 0.1080    | 0.1048    |
+[TREC 2017 CAR: benchmarkY1test (v1.5)](http://trec-car.cs.unh.edu/datareleases/)| 0.1562    | 0.1295    | 0.1358    | 0.1386    | 0.1080    | 0.1048    |
 
 
 RECIP_RANK                              | BM25      | +RM3      | +Ax       | QL        | +RM3      | +Ax       |
 :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------|
-[TREC 2017 CAR: benchmarkY1test (v1.5)](http://trec-car.cs.unh.edu/datareleases/)| 0.2336    | 0.1923    | 0.1949    | 0.2037    | 0.1599    | 0.1524    |
+[TREC 2017 CAR: benchmarkY1test (v1.5)](http://trec-car.cs.unh.edu/datareleases/)| 0.2331    | 0.1923    | 0.1949    | 0.2037    | 0.1599    | 0.1524    |
 
 
diff --git a/docs/regressions-msmarco-doc.md b/docs/regressions-msmarco-doc.md
index 4ce79b929d..6aaf2966d0 100644
--- a/docs/regressions-msmarco-doc.md
+++ b/docs/regressions-msmarco-doc.md
@@ -47,11 +47,11 @@ With the above commands, you should be able to replicate the following results:
 
 MAP                                     | BM25      | +RM3      |
 :---------------------------------------|-----------|-----------|
-[MS MARCO Document Ranking: Dev Queries](https://github.com/microsoft/TREC-2019-Deep-Learning)| 0.2308    | 0.1631    |
+[MS MARCO Document Ranking: Dev Queries](https://github.com/microsoft/TREC-2019-Deep-Learning)| 0.2310    | 0.1632    |
 
 
 R@1000                                  | BM25      | +RM3      |
 :---------------------------------------|-----------|-----------|
-[MS MARCO Document Ranking: Dev Queries](https://github.com/microsoft/TREC-2019-Deep-Learning)| 0.8856    | 0.8787    |
+[MS MARCO Document Ranking: Dev Queries](https://github.com/microsoft/TREC-2019-Deep-Learning)| 0.8856    | 0.8785    |
 
 
diff --git a/docs/regressions-msmarco-passage.md b/docs/regressions-msmarco-passage.md
index 202a3fb53d..d8b29f8b6d 100644
--- a/docs/regressions-msmarco-passage.md
+++ b/docs/regressions-msmarco-passage.md
@@ -56,7 +56,7 @@ With the above commands, you should be able to replicate the following results:
 
 MAP                                     | BM25 (Default)| +RM3      | BM25 (Tuned)| +RM3      |
 :---------------------------------------|-----------|-----------|-----------|-----------|
-[MS MARCO Passage Ranking: Dev Queries](https://github.com/microsoft/MSMARCO-Passage-Ranking)| 0.1924    | 0.1661    | 0.1956    | 0.1766    |
+[MS MARCO Passage Ranking: Dev Queries](https://github.com/microsoft/MSMARCO-Passage-Ranking)| 0.1926    | 0.1661    | 0.1957    | 0.1766    |
 
 
 R@1000                                  | BM25 (Default)| +RM3      | BM25 (Tuned)| +RM3      |
diff --git a/src/main/resources/regression/car17v1.5.yaml b/src/main/resources/regression/car17v1.5.yaml
index ed6b77f4d2..e13410f6ea 100644
--- a/src/main/resources/regression/car17v1.5.yaml
+++ b/src/main/resources/regression/car17v1.5.yaml
@@ -51,9 +51,9 @@ models:
       - -bm25
     results:
       map:
-        - 0.1563
+        - 0.1562
       recip_rank:
-        - 0.2336
+        - 0.2331
   - name: bm25+rm3
     display: +RM3
     params:
diff --git a/src/main/resources/regression/msmarco-doc.yaml b/src/main/resources/regression/msmarco-doc.yaml
index dcf364fb78..7fdf92776f 100644
--- a/src/main/resources/regression/msmarco-doc.yaml
+++ b/src/main/resources/regression/msmarco-doc.yaml
@@ -42,7 +42,7 @@ index_path: indexes/lucene-index.msmarco-doc.pos+docvectors+rawdocs
 index_stats:
   documents: 3213835
   documents (non-empty): 3213835
-  total terms: 2746735247
+  total terms: 2748636047
 topics:
   - name: "[MS MARCO Document Ranking: Dev Queries](https://github.com/microsoft/TREC-2019-Deep-Learning)"
     path: topics.msmarco-doc.dev.txt
@@ -54,7 +54,7 @@ models:
       - -bm25
     results:
       map:
-        - 0.2308
+        - 0.2310
       R@1000:
         - 0.8856
   - name: bm25+rm3
@@ -64,6 +64,6 @@ models:
       - -rm3
     results:
       map:
-        - 0.1631
+        - 0.1632
       R@1000:
-        - 0.8787
+        - 0.8785
diff --git a/src/main/resources/regression/msmarco-passage.yaml b/src/main/resources/regression/msmarco-passage.yaml
index 8dc22f5af6..c7d46f6380 100644
--- a/src/main/resources/regression/msmarco-passage.yaml
+++ b/src/main/resources/regression/msmarco-passage.yaml
@@ -42,7 +42,7 @@ index_path: indexes/lucene-index.msmarco-passage.pos+docvectors+rawdocs
 index_stats:
   documents: 8841823
   documents (non-empty): 8841823
-  total terms: 352122244
+  total terms: 352316036
 topics:
   - name: "[MS MARCO Passage Ranking: Dev Queries](https://github.com/microsoft/MSMARCO-Passage-Ranking)"
     path: topics.msmarco-passage.dev-subset.txt
@@ -54,7 +54,7 @@ models:
       - -bm25
     results:
       map:
-        - 0.1924
+        - 0.1926
       R@1000:
         - 0.8526
   - name: bm25-default+rm3
@@ -75,7 +75,7 @@ models:
       - -b 0.72
     results:
       map:
-        - 0.1956
+        - 0.1957
       R@1000:
         - 0.8578
   - name: bm25-tuned+rm3