log(1 + (docCount - docFreq + 0.5)/(docFreq + 0.5))
.
- *
- * @param docFreq terms's document frequency
- * @param docCount total document count in the index
- * @return inverted document frequency
- * */
- float idf(long docFreq, long docCount) {
- throw new UnsupportedOperationException();
- }
-
- /** Implemented as 1 / (distance + 1)
.
- *
- * @param distance distance
- * @return sloppy frequency
- * */
- float sloppyFreq(int distance) {
- return 1.0f / (distance + 1);
- }
-
- /** The default implementation returns 1
- *
- * @param doc doc
- * @param start start
- * @param end end
- * @param payload payload
- * @return 1
- * */
- float scorePayload(int doc, int start, int end, BytesRef payload) {
- return 1;
- }
-
- /** The default implementation computes the average as sumTotalTermFreq / docCount
,
- * or returns 1
if the index does not store sumTotalTermFreq:
- * any field that omits frequency information).
- *
- * @param collectionStats collection-wide statistics
- * @return average document length of FIELD_BODY
- * */
- float avgFieldLength(CollectionStatistics collectionStats) {
- final long sumTotalTermFreq = collectionStats.sumTotalTermFreq();
- if (sumTotalTermFreq <= 0) {
- return 1f; // field does not exist, or stat is unsupported
- } else {
- final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
- return (float) (sumTotalTermFreq / (double) docCount);
- }
- }
-
- /** The default implementation encodes boost / sqrt(length)
- * with {@link SmallFloat#floatToByte315(float)}. This is compatible with
- * Lucene's default implementation. If you change this, then you should
- * change {@link #decodeNormValue(byte)} to match.
- *
- * @param boost boost
- * @param fieldLength fieldLength
- * @return encoded document lengths
- * */
- byte encodeNormValue(float boost, int fieldLength) {
- return SmallFloat.floatToByte315(boost / (float) Math.sqrt(fieldLength));
- }
-
- /** The default implementation returns 1 / f2
- * where f
is {@link SmallFloat#byte315ToFloat(byte)}.
- *
- * @param b encoded document length
- * @return decoded document length
- * */
- float decodeNormValue(byte b) {
- return NORM_TABLE[b & 0xFF];
- }
-
- /**
- * True if overlap tokens (tokens with a position of increment of zero) are
- * discounted from the document's length.
- */
- boolean discountOverlaps = true;
-
- /** Sets whether overlap tokens (Tokens with 0 position increment) are
- * ignored when computing norm. By default this is true, meaning overlap
- * tokens do not count when computing norms.
- *
- * @param v v
- * */
- public void setDiscountOverlaps(boolean v) {
- discountOverlaps = v;
- }
-
- /**
- * Returns true if overlap tokens are discounted from the document's length.
- * @see #setDiscountOverlaps
- *
- * @return discountOverlaps
- */
- public boolean getDiscountOverlaps() {
- return discountOverlaps;
- }
-
- /** Cache of decoded bytes. */
- private static final float[] NORM_TABLE = new float[256];
-
- static {
- for (int i = 1; i < 256; i++) {
- float f = SmallFloat.byte315ToFloat((byte)i);
- NORM_TABLE[i] = 1.0f / (f*f);
- }
- NORM_TABLE[0] = 1.0f / NORM_TABLE[255]; // otherwise inf
- }
-
-
- @Override
- public long computeNorm(FieldInvertState state) {
- final int numTerms = discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength();
- return encodeNormValue(state.getBoost(), numTerms);
- }
-
- /**
- * Computes a score factor for a simple term and returns an explanation
- * for that score factor.
- *
- * - * The default implementation uses: - * - *
- * idf(docFreq, docCount); - *- * - * Note that {@link CollectionStatistics#docCount()} is used instead of - * {@link org.apache.lucene.index.IndexReader#numDocs() IndexReader#numDocs()} because also - * {@link TermStatistics#docFreq()} is used, and when the latter - * is inaccurate, so is {@link CollectionStatistics#docCount()}, and in the same direction. - * In addition, {@link CollectionStatistics#docCount()} does not skew when fields are sparse. - * - * @param collectionStats collection-level statistics - * @param termStats term-level statistics for the term - * @return an Explain object that includes both an idf score factor - and an explanation for the term. - */ - public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) { - final long df = termStats.docFreq(); - final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount(); - final float idf = idf(df, docCount); - return Explanation.match(idf, "idf(docFreq=" + df + ", docCount=" + docCount + ")"); - } - - /** - * Computes a score factor for a phrase. - * - *
- * The default implementation sums the idf factor for
- * each term in the phrase.
- *
- * @param collectionStats collection-level statistics
- * @param termStats term-level statistics for the terms in the phrase
- * @return an Explain object that includes both an idf
- * score factor for the phrase and an explanation
- * for each term.
- */
- public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) {
- final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
- float idf = 0.0f;
- List
- occurrences
- score = termWeight * IDF * ---------------------------------------------------------
- occurrences + s + documentLength * ( s / avgDocLength )
-
- */
- @Override
- public float score(int doc, float freq) {
- // if there are no norms, we act as if b=0
- float norm = norms == null ? 1.0f : cache[(byte)norms.get(doc) & 0xFF];
- return weightValue * freq / (freq + norm);
- }
-
- @Override
- public Explanation explain(int doc, Explanation freq) {
- return explainScore(doc, freq, stats, norms);
- }
-
- @Override
- public float computeSlopFactor(int distance) {
- return sloppyFreq(distance);
- }
-
- @Override
- public float computePayloadFactor(int doc, int start, int end, BytesRef payload) {
- return scorePayload(doc, start, end, payload);
- }
- }
-
- Explanation explainTFNorm(int doc, Explanation freq, Stats stats, NumericDocValues norms) {
- Listb
parameter
- * @see #AxiomaticSimilarity(float)
- *
- * @return s
- */
- public float getS() {
- return s;
- }
-}
diff --git a/src/main/java/io/anserini/search/similarity/F2ExpSimilarity.java b/src/main/java/io/anserini/search/similarity/F2ExpSimilarity.java
deleted file mode 100644
index 25fd6e0cae..0000000000
--- a/src/main/java/io/anserini/search/similarity/F2ExpSimilarity.java
+++ /dev/null
@@ -1,59 +0,0 @@
-/**
- * Anserini: A toolkit for reproducible information retrieval research built on Lucene
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package io.anserini.search.similarity;
-
-public class F2ExpSimilarity extends AxiomaticSimilarity {
- private final float k = 0.35f;
-
- /**
- * F2Exp with the supplied parameter values.
- * @param s Controls to what degree document length normalizes tf values.
- * @throws IllegalArgumentException if {@code s} is infinite or if {@code s} is
- * not within the range {@code [0..1]}
- */
- public F2ExpSimilarity(float s) {
- super(s);
- }
-
- /** F2Exp with these default values:
- *
- *
- */
- public F2ExpSimilarity() {
- this(0.5f);
- }
-
- @Override
- float idf(long docFreq, long docCount) {
- return (float) Math.pow((docCount + 1.0) / docFreq, this.k);
- }
-
- @Override
- public String toString() {
- return "F2Exp(s=" + s +")";
- }
-
- /**
- * Returns the k
parameter
- * @see #F2ExpSimilarity(float)
- * @return k
- */
- public float getK() {
- return k;
- }
-}
diff --git a/src/main/java/io/anserini/search/similarity/F2LogSimilarity.java b/src/main/java/io/anserini/search/similarity/F2LogSimilarity.java
deleted file mode 100644
index f95386b045..0000000000
--- a/src/main/java/io/anserini/search/similarity/F2LogSimilarity.java
+++ /dev/null
@@ -1,48 +0,0 @@
-/**
- * Anserini: A toolkit for reproducible information retrieval research built on Lucene
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package io.anserini.search.similarity;
-
-public class F2LogSimilarity extends AxiomaticSimilarity {
- /**
- * F2Log with the supplied parameter values.
- * @param s Controls to what degree document length normalizes tf values.
- * @throws IllegalArgumentException if {@code s} is infinite or if {@code s} is
- * not within the range {@code [0..1]}
- */
- public F2LogSimilarity(float s) {
- super(s);
- }
-
- /** F2Log with these default values:
- *
- *
- */
- public F2LogSimilarity() {
- this(0.5f);
- }
-
- @Override
- float idf(long docFreq, long docCount) {
- return (float) Math.log((1.0f + docCount) / docFreq);
- }
-
- @Override
- public String toString() {
- return "F2Log(s=" + s +")";
- }
-}
diff --git a/src/main/java/io/anserini/search/similarity/RankLibSimilarity.java b/src/main/java/io/anserini/search/similarity/RankLibSimilarity.java
index 5e2795b253..9adae2ed86 100644
--- a/src/main/java/io/anserini/search/similarity/RankLibSimilarity.java
+++ b/src/main/java/io/anserini/search/similarity/RankLibSimilarity.java
@@ -34,7 +34,7 @@ public long computeNorm(FieldInvertState fieldInvertState) {
}
@Override
- public SimWeight computeWeight(CollectionStatistics collectionStatistics, TermStatistics... termStatisticses) {
+ public SimWeight computeWeight(float boost, CollectionStatistics collectionStatistics, TermStatistics... termStatistics) {
return null;
}
diff --git a/src/main/resources/fine_tuning/models.yaml b/src/main/resources/fine_tuning/models.yaml
index 28eb306881..c8c61ca44d 100644
--- a/src/main/resources/fine_tuning/models.yaml
+++ b/src/main/resources/fine_tuning/models.yaml
@@ -12,20 +12,20 @@ models:
expected:
robust04:
map:
- best_avg: 0.2496
- oracles_per_topic: 0.2703
- 2-fold: 0.2496
- 5-fold: 0.2481
+ best_avg: 0.2514
+ oracles_per_topic: 0.2721
+ 2-fold: 0.2509
+ 5-fold: 0.2486
P_20:
- best_avg: 0.3572
+ best_avg: 0.3610
oracles_per_topic: 0.4008
- 2-fold: 0.3543
- 5-fold: 0.3517
+ 2-fold: 0.3610
+ 5-fold: 0.3573
ndcg20:
- best_avg: 0.40703
- oracles_per_topic: 0.45610
- 2-fold: 0.4042
- 5-fold: 0.4005
+ best_avg: 0.41022
+ oracles_per_topic: 0.45820
+ 2-fold: 0.4083
+ 5-fold: 0.4059
bm25:
name: bm25
fixed_params: "-inmem -skipexists"
@@ -43,20 +43,20 @@ models:
expected:
robust04:
map:
- best_avg: 0.2532
- oracles_per_topic: 0.2921
- 2-fold: 0.2526
- 5-fold: 0.2528
+ best_avg: 0.2543
+ oracles_per_topic: 0.2935
+ 2-fold: 0.2539
+ 5-fold: 0.2530
P_20:
- best_avg: 0.3614
- oracles_per_topic: 0.4426
- 2-fold: 0.3604
- 5-fold: 0.3598
+ best_avg: 0.3631
+ oracles_per_topic: 0.4424
+ 2-fold: 0.3594
+ 5-fold: 0.3620
ndcg20:
- best_avg: 0.41659
- oracles_per_topic: 0.5028
- 2-fold: 0.4108
- 5-fold: 0.4157
+ best_avg: 0.41862
+ oracles_per_topic: 0.5031
+ 2-fold: 0.4144
+ 5-fold: 0.4160
axiom:
name: axiom
fixed_params: "-inmem -skipexists -axiom.n 30 -axiom.deterministic -rerankCutoff 50"
@@ -161,20 +161,20 @@ models:
expected:
robust04:
map:
- best_avg: 0.3009
- oracles_per_topic: 0.4158
- 2-fold: 0.2954
- 5-fold: 0.2991
+ best_avg: 0.3017
+ oracles_per_topic: 0.4153
+ 2-fold: 0.2943
+ 5-fold: 0.2982
P_20:
- best_avg: 0.3994
- oracles_per_topic: 0.5731
- 2-fold: 0.3795
- 5-fold: 0.3901
+ best_avg: 0.4016
+ oracles_per_topic: 0.5709
+ 2-fold: 0.3940
+ 5-fold: 0.3947
ndcg20:
- best_avg: 0.44631
- oracles_per_topic: 0.6335
- 2-fold: 0.4295
- 5-fold: 0.4348
+ best_avg: 0.44877
+ oracles_per_topic: 0.6332
+ 2-fold: 0.4411
+ 5-fold: 0.4412
bm25+rm3:
name: rm3
fixed_params: "-inmem -skipexists -rerankCutoff 50"
@@ -207,17 +207,17 @@ models:
expected:
robust04:
map:
- best_avg: 0.3020
- oracles_per_topic: 0.4402
- 2-fold: 0.2973
- 5-fold: 0.2956
+ best_avg: 0.3058
+ oracles_per_topic: 0.4375
+ 2-fold: 0.2987
+ 5-fold: 0.3033
P_20:
- best_avg: 0.4012
- oracles_per_topic: 0.6054
- 2-fold: 0.3871
- 5-fold: 0.3931
+ best_avg: 0.4024
+ oracles_per_topic: 0.5994
+ 2-fold: 0.3913
+ 5-fold: 0.3937
ndcg20:
- best_avg: 0.44958
- oracles_per_topic: 0.6702
- 2-fold: 0.4358
- 5-fold: 0.4402
+ best_avg: 0.44979
+ oracles_per_topic: 0.6653
+ 2-fold: 0.4321
+ 5-fold: 0.4426
diff --git a/src/main/resources/jdiq2018/models.yaml b/src/main/resources/jdiq2018/models.yaml
index f30507edbc..a7d7a4a618 100644
--- a/src/main/resources/jdiq2018/models.yaml
+++ b/src/main/resources/jdiq2018/models.yaml
@@ -13,57 +13,57 @@ models:
expected:
disk12:
map:
- topics.51-100.txt: 0.2262
- topics.101-150.txt: 0.2062
- topics.151-200.txt: 0.2605
+ topics.51-100.txt: 0.2274
+ topics.101-150.txt: 0.2071
+ topics.151-200.txt: 0.2614
robust04:
map:
- topics.robust04.301-450.601-700.txt: 0.2532
+ topics.robust04.301-450.601-700.txt: 0.2543
robust05:
map:
- topics.robust05.txt: 0.2090
+ topics.robust05.txt: 0.2097
core17:
map:
- topics.core17.txt: 0.2047
+ topics.core17.txt: 0.2052
wt10g:
map:
- topics.451-550.txt: 0.2012
+ topics.451-550.txt: 0.2005
gov2:
map:
- topics.701-750.txt: 0.2684
- topics.751-800.txt: 0.3392
- topics.801-850.txt: 0.3080
+ topics.701-750.txt: 0.2702
+ topics.751-800.txt: 0.3394
+ topics.801-850.txt: 0.3085
cw09b:
map:
- topics.web.51-100.txt: 0.1147
- topics.web.101-150.txt: 0.1117
- topics.web.151-200.txt: 0.1202
+ topics.web.51-100.txt: 0.1165
+ topics.web.101-150.txt: 0.1104
+ topics.web.151-200.txt: 0.1226
ndcg20:
- topics.web.51-100.txt: 0.14592
- topics.web.101-150.txt: 0.19374
- topics.web.151-200.txt: 0.10379
+ topics.web.51-100.txt: 0.14868
+ topics.web.101-150.txt: 0.19270
+ topics.web.151-200.txt: 0.10895
err20:
- topics.web.51-100.txt: 0.07644
- topics.web.101-150.txt: 0.10229
- topics.web.151-200.txt: 0.14715
+ topics.web.51-100.txt: 0.07743
+ topics.web.101-150.txt: 0.09808
+ topics.web.151-200.txt: 0.1524
cw12b13:
map:
- topics.web.201-250.txt: 0.0475
- topics.web.251-300.txt: 0.0238
+ topics.web.201-250.txt: 0.0481
+ topics.web.251-300.txt: 0.0237
ndcg20:
- topics.web.201-250.txt: 0.13862
- topics.web.251-300.txt: 0.12366
+ topics.web.201-250.txt: 0.13843
+ topics.web.251-300.txt: 0.12466
err20:
- topics.web.201-250.txt: 0.0959
- topics.web.251-300.txt: 0.12708
+ topics.web.201-250.txt: 0.09928
+ topics.web.251-300.txt: 0.12236
mb11:
map:
- topics.microblog2011.txt: 0.3683
+ topics.microblog2011.txt: 0.3643
topics.microblog2012.txt: 0.2083
mb13:
map:
- topics.microblog2013.txt: 0.2599
- topics.microblog2014.txt: 0.4203
+ topics.microblog2013.txt: 0.2600
+ topics.microblog2014.txt: 0.4195
ql:
params:
mu:
@@ -73,57 +73,57 @@ models:
expected:
disk12:
map:
- topics.51-100.txt: 0.2210
- topics.101-150.txt: 0.2017
- topics.151-200.txt: 0.2544
+ topics.51-100.txt: 0.2226
+ topics.101-150.txt: 0.2015
+ topics.151-200.txt: 0.2558
robust04:
map:
- topics.robust04.301-450.601-700.txt: 0.2496
+ topics.robust04.301-450.601-700.txt: 0.2514
robust05:
map:
- topics.robust05.txt: 0.2026
+ topics.robust05.txt: 0.2030
core17:
map:
- topics.core17.txt: 0.1951
+ topics.core17.txt: 0.1943
wt10g:
map:
- topics.451-550.txt: 0.2034
+ topics.451-550.txt: 0.2021
gov2:
map:
- topics.701-750.txt: 0.2636
- topics.751-800.txt: 0.3267
- topics.801-850.txt: 0.2957
+ topics.701-750.txt: 0.2700
+ topics.751-800.txt: 0.3303
+ topics.801-850.txt: 0.3013
cw09b:
map:
- topics.web.51-100.txt: 0.104
- topics.web.101-150.txt: 0.1002
- topics.web.151-200.txt: 0.1091
+ topics.web.51-100.txt: 0.1060
+ topics.web.101-150.txt: 0.1004
+ topics.web.151-200.txt: 0.1113
ndcg20:
- topics.web.51-100.txt: 0.11701
- topics.web.101-150.txt: 0.16868
- topics.web.151-200.txt: 0.09965
+ topics.web.51-100.txt: 0.11845
+ topics.web.101-150.txt: 0.17012
+ topics.web.151-200.txt: 0.09778
err20:
- topics.web.51-100.txt: 0.06455
- topics.web.101-150.txt: 0.08608
- topics.web.151-200.txt: 0.14886
+ topics.web.51-100.txt: 0.06431
+ topics.web.101-150.txt: 0.08684
+ topics.web.151-200.txt: 0.14839
cw12b13:
map:
- topics.web.201-250.txt: 0.0392
- topics.web.251-300.txt: 0.0241
+ topics.web.201-250.txt: 0.0398
+ topics.web.251-300.txt: 0.0246
ndcg20:
- topics.web.201-250.txt: 0.1168
- topics.web.251-300.txt: 0.11883
+ topics.web.201-250.txt: 0.11675
+ topics.web.251-300.txt: 0.12088
err20:
- topics.web.201-250.txt: 0.0883
- topics.web.251-300.txt: 0.1088
+ topics.web.201-250.txt: 0.08977
+ topics.web.251-300.txt: 0.1108
mb11:
map:
- topics.microblog2011.txt: 0.3635
- topics.microblog2012.txt: 0.2120
+ topics.microblog2011.txt: 0.3607
+ topics.microblog2012.txt: 0.2121
mb13:
map:
- topics.microblog2013.txt: 0.2613
- topics.microblog2014.txt: 0.4201
+ topics.microblog2013.txt: 0.2615
+ topics.microblog2014.txt: 0.4200
pl2:
params:
pl2.c:
@@ -133,57 +133,57 @@ models:
expected:
disk12:
map:
- topics.51-100.txt: 0.2213
- topics.101-150.txt: 0.1952
- topics.151-200.txt: 0.2524
+ topics.51-100.txt: 0.2226
+ topics.101-150.txt: 0.1967
+ topics.151-200.txt: 0.2544
robust04:
map:
- topics.robust04.301-450.601-700.txt: 0.2521
+ topics.robust04.301-450.601-700.txt: 0.2531
robust05:
map:
- topics.robust05.txt: 0.2006
+ topics.robust05.txt: 0.2021
core17:
map:
- topics.core17.txt: 0.2005
+ topics.core17.txt: 0.2019
wt10g:
map:
- topics.451-550.txt: 0.1889
+ topics.451-550.txt: 0.1880
gov2:
map:
- topics.701-750.txt: 0.2696
- topics.751-800.txt: 0.3428
- topics.801-850.txt: 0.3084
+ topics.701-750.txt: 0.2726
+ topics.751-800.txt: 0.3439
+ topics.801-850.txt: 0.3088
cw09b:
map:
- topics.web.51-100.txt: 0.1085
- topics.web.101-150.txt: 0.1075
- topics.web.151-200.txt: 0.1135
+ topics.web.51-100.txt: 0.1103
+ topics.web.101-150.txt: 0.1067
+ topics.web.151-200.txt: 0.1170
ndcg20:
- topics.web.51-100.txt: 0.12131
- topics.web.101-150.txt: 0.17742
- topics.web.151-200.txt: 0.09281
+ topics.web.51-100.txt: 0.12168
+ topics.web.101-150.txt: 0.17652
+ topics.web.151-200.txt: 0.09274
err20:
- topics.web.51-100.txt: 0.06348
- topics.web.101-150.txt: 0.09095
- topics.web.151-200.txt: 0.14314
+ topics.web.51-100.txt: 0.06346
+ topics.web.101-150.txt: 0.08923
+ topics.web.151-200.txt: 0.14389
cw12b13:
map:
- topics.web.201-250.txt: 0.0416
- topics.web.251-300.txt: 0.0239
+ topics.web.201-250.txt: 0.0419
+ topics.web.251-300.txt: 0.0242
ndcg20:
- topics.web.201-250.txt: 0.12392
- topics.web.251-300.txt: 0.11768
+ topics.web.201-250.txt: 0.12465
+ topics.web.251-300.txt: 0.12127
err20:
- topics.web.201-250.txt: 0.09066
- topics.web.251-300.txt: 0.10751
+ topics.web.201-250.txt: 0.09331
+ topics.web.251-300.txt: 0.11086
mb11:
map:
- topics.microblog2011.txt: 0.3572
- topics.microblog2012.txt: 0.2032
+ topics.microblog2011.txt: 0.3537
+ topics.microblog2012.txt: 0.2046
mb13:
map:
- topics.microblog2013.txt: 0.2519
- topics.microblog2014.txt: 0.4115
+ topics.microblog2013.txt: 0.2524
+ topics.microblog2014.txt: 0.4132
spl:
params:
spl.c:
@@ -193,57 +193,57 @@ models:
expected:
disk12:
map:
- topics.51-100.txt: 0.2189
- topics.101-150.txt: 0.1819
- topics.151-200.txt: 0.2448
+ topics.51-100.txt: 0.2201
+ topics.101-150.txt: 0.1840
+ topics.151-200.txt: 0.2459
robust04:
map:
- topics.robust04.301-450.601-700.txt: 0.2502
+ topics.robust04.301-450.601-700.txt: 0.2509
robust05:
map:
- topics.robust05.txt: 0.1969
+ topics.robust05.txt: 0.1980
core17:
map:
- topics.core17.txt: 0.1981
+ topics.core17.txt: 0.1999
wt10g:
map:
- topics.451-550.txt: 0.1726
+ topics.451-550.txt: 0.1704
gov2:
map:
- topics.701-750.txt: 0.2687
- topics.751-800.txt: 0.3386
- topics.801-850.txt: 0.3140
+ topics.701-750.txt: 0.2734
+ topics.751-800.txt: 0.3393
+ topics.801-850.txt: 0.3139
cw09b:
map:
- topics.web.51-100.txt: 0.1077
- topics.web.101-150.txt: 0.1066
- topics.web.151-200.txt: 0.1131
+ topics.web.51-100.txt: 0.1099
+ topics.web.101-150.txt: 0.1063
+ topics.web.151-200.txt: 0.1163
ndcg20:
- topics.web.51-100.txt: 0.12324
- topics.web.101-150.txt: 0.17621
- topics.web.151-200.txt: 0.09311
+ topics.web.51-100.txt: 0.12515
+ topics.web.101-150.txt: 0.17576
+ topics.web.151-200.txt: 0.09332
err20:
- topics.web.51-100.txt: 0.06653
- topics.web.101-150.txt: 0.09082
- topics.web.151-200.txt: 0.14348
+ topics.web.51-100.txt: 0.06589
+ topics.web.101-150.txt: 0.08926
+ topics.web.151-200.txt: 0.14448
cw12b13:
map:
- topics.web.201-250.txt: 0.0412
- topics.web.251-300.txt: 0.0238
+ topics.web.201-250.txt: 0.0418
+ topics.web.251-300.txt: 0.0240
ndcg20:
- topics.web.201-250.txt: 0.12534
- topics.web.251-300.txt: 0.11788
+ topics.web.201-250.txt: 0.12579
+ topics.web.251-300.txt: 0.12128
err20:
- topics.web.201-250.txt: 0.09046
- topics.web.251-300.txt: 0.109
+ topics.web.201-250.txt: 0.09396
+ topics.web.251-300.txt: 0.11347
mb11:
map:
- topics.microblog2011.txt: 0.3601
- topics.microblog2012.txt: 0.2050
+ topics.microblog2011.txt: 0.3567
+ topics.microblog2012.txt: 0.2055
mb13:
map:
- topics.microblog2013.txt: 0.2536
- topics.microblog2014.txt: 0.4132
+ topics.microblog2013.txt: 0.2530
+ topics.microblog2014.txt: 0.4147
f2exp:
params:
f2exp.s:
@@ -253,57 +253,57 @@ models:
expected:
disk12:
map:
- topics.51-100.txt: 0.2216
- topics.101-150.txt: 0.1997
- topics.151-200.txt: 0.2474
+ topics.51-100.txt: 0.2245
+ topics.101-150.txt: 0.2035
+ topics.151-200.txt: 0.2512
robust04:
map:
- topics.robust04.301-450.601-700.txt: 0.2491
+ topics.robust04.301-450.601-700.txt: 0.2516
robust05:
map:
- topics.robust05.txt: 0.1960
+ topics.robust05.txt: 0.1998
core17:
map:
- topics.core17.txt: 0.1986
+ topics.core17.txt: 0.2005
wt10g:
map:
- topics.451-550.txt: 0.1972
+ topics.451-550.txt: 0.1996
gov2:
map:
- topics.701-750.txt: 0.2535
- topics.751-800.txt: 0.3156
- topics.801-850.txt: 0.2845
+ topics.701-750.txt: 0.2592
+ topics.751-800.txt: 0.3195
+ topics.801-850.txt: 0.2900
cw09b:
map:
- topics.web.51-100.txt: 0.1067
- topics.web.101-150.txt: 0.1067
- topics.web.151-200.txt: 0.1042
+ topics.web.51-100.txt: 0.1111
+ topics.web.101-150.txt: 0.1081
+ topics.web.151-200.txt: 0.1089
ndcg20:
- topics.web.51-100.txt: 0.13895
- topics.web.101-150.txt: 0.18424
- topics.web.151-200.txt: 0.08933
+ topics.web.51-100.txt: 0.14176
+ topics.web.101-150.txt: 0.18778
+ topics.web.151-200.txt: 0.09333
err20:
- topics.web.51-100.txt: 0.07512
- topics.web.101-150.txt: 0.09258
- topics.web.151-200.txt: 0.12932
+ topics.web.51-100.txt: 0.07756
+ topics.web.101-150.txt: 0.09354
+ topics.web.151-200.txt: 0.13872
cw12b13:
map:
- topics.web.201-250.txt: 0.0434
- topics.web.251-300.txt: 0.0201
+ topics.web.201-250.txt: 0.0450
+ topics.web.251-300.txt: 0.0205
ndcg20:
- topics.web.201-250.txt: 0.12254
- topics.web.251-300.txt: 0.11349
+ topics.web.201-250.txt: 0.12218
+ topics.web.251-300.txt: 0.11593
err20:
- topics.web.201-250.txt: 0.08114
- topics.web.251-300.txt: 0.11991
+ topics.web.201-250.txt: 0.07970
+ topics.web.251-300.txt: 0.12031
mb11:
map:
- topics.microblog2011.txt: 0.3770
- topics.microblog2012.txt: 0.2098
+ topics.microblog2011.txt: 0.3769
+ topics.microblog2012.txt: 0.2107
mb13:
map:
- topics.microblog2013.txt: 0.2541
- topics.microblog2014.txt: 0.3844
+ topics.microblog2013.txt: 0.2531
+ topics.microblog2014.txt: 0.3854
f2log:
params:
f2log.s:
@@ -313,55 +313,55 @@ models:
expected:
disk12:
map:
- topics.51-100.txt: 0.2230
- topics.101-150.txt: 0.1992
- topics.151-200.txt: 0.2531
+ topics.51-100.txt: 0.2260
+ topics.101-150.txt: 0.2031
+ topics.151-200.txt: 0.2571
robust04:
map:
- topics.robust04.301-450.601-700.txt: 0.2500
+ topics.robust04.301-450.601-700.txt: 0.2523
robust05:
map:
- topics.robust05.txt: 0.1976
+ topics.robust05.txt: 0.2023
core17:
map:
- topics.core17.txt: 0.2041
+ topics.core17.txt: 0.2050
wt10g:
map:
- topics.451-550.txt: 0.1923
+ topics.451-550.txt: 0.1938
gov2:
map:
- topics.701-750.txt: 0.2627
- topics.751-800.txt: 0.3298
- topics.801-850.txt: 0.2970
+ topics.701-750.txt: 0.2689
+ topics.751-800.txt: 0.3342
+ topics.801-850.txt: 0.3026
cw09b:
map:
- topics.web.51-100.txt: 0.107
- topics.web.101-150.txt: 0.1108
- topics.web.151-200.txt: 0.1046
+ topics.web.51-100.txt: 0.1110
+ topics.web.101-150.txt: 0.1104
+ topics.web.151-200.txt: 0.1091
ndcg20:
- topics.web.51-100.txt: 0.13495
- topics.web.101-150.txt: 0.19114
- topics.web.151-200.txt: 0.09591
+ topics.web.51-100.txt: 0.13763
+ topics.web.101-150.txt: 0.19169
+ topics.web.151-200.txt: 0.09859
err20:
- topics.web.51-100.txt: 0.07234
- topics.web.101-150.txt: 0.09381
- topics.web.151-200.txt: 0.14312
+ topics.web.51-100.txt: 0.07245
+ topics.web.101-150.txt: 0.09435
+ topics.web.151-200.txt: 0.15240
cw12b13:
map:
- topics.web.201-250.txt: 0.0446
- topics.web.251-300.txt: 0.0212
+ topics.web.201-250.txt: 0.0454
+ topics.web.251-300.txt: 0.0213
ndcg20:
- topics.web.201-250.txt: 0.12442
- topics.web.251-300.txt: 0.11743
+ topics.web.201-250.txt: 0.12473
+ topics.web.251-300.txt: 0.11891
err20:
- topics.web.201-250.txt: 0.08356
- topics.web.251-300.txt: 0.12344
+ topics.web.201-250.txt: 0.08210
+ topics.web.251-300.txt: 0.12094
mb11:
map:
topics.microblog2011.txt: 0.3823
- topics.microblog2012.txt: 0.2018
+ topics.microblog2012.txt: 0.2033
mb13:
map:
topics.microblog2013.txt: 0.2622
- topics.microblog2014.txt: 0.4104
+ topics.microblog2014.txt: 0.4121
diff --git a/src/main/resources/regression/cacm.yaml b/src/main/resources/regression/cacm.yaml
index 1aba4802a9..eda72978cb 100644
--- a/src/main/resources/regression/cacm.yaml
+++ b/src/main/resources/regression/cacm.yaml
@@ -52,18 +52,18 @@ models:
- -bm25
results:
map:
- - 0.3102
+ - 0.3123
p30:
- - 0.1936
+ - 0.1942
- name: bm25+rm3
params:
- -bm25
- -rm3
results:
map:
- - 0.3698
+ - 0.3688
p30:
- - 0.2301
+ - 0.2295
- name: bm25+ax
params:
- -bm25
@@ -72,9 +72,9 @@ models:
- -axiom.deterministic
results:
map:
- - 0.3002
+ - 0.3077
p30:
- - 0.1974
+ - 0.1955
- name: ql
params:
- -ql
@@ -89,9 +89,9 @@ models:
- -rm3
results:
map:
- - 0.3768
+ - 0.3818
p30:
- - 0.2250
+ - 0.2237
- name: ql+ax
params:
- -ql
@@ -100,6 +100,6 @@ models:
- -axiom.deterministic
results:
map:
- - 0.2894
+ - 0.2907
p30:
- - 0.1795
+ - 0.1840
diff --git a/src/main/resources/regression/car17.yaml b/src/main/resources/regression/car17.yaml
index 737bbfa0f1..23cbcd6e7b 100644
--- a/src/main/resources/regression/car17.yaml
+++ b/src/main/resources/regression/car17.yaml
@@ -50,18 +50,18 @@ models:
- -bm25
results:
map:
- - 0.1650
+ - 0.1689
recip_rank:
- - 0.2270
+ - 0.2321
- name: bm25+rm3
params:
- -bm25
- -rm3
results:
map:
- - 0.1343
+ - 0.1386
recip_rank:
- - 0.1852
+ - 0.1907
- name: bm25+ax
params:
- -bm25
@@ -70,15 +70,15 @@ models:
- -axiom.deterministic
results:
map:
- - 0.1318
+ - 0.1355
recip_rank:
- - 0.1817
+ - 0.1857
- name: ql
params:
- -ql
results:
map:
- - 0.1515
+ - 0.1516
recip_rank:
- 0.2085
- name: ql+rm3
@@ -87,9 +87,9 @@ models:
- -rm3
results:
map:
- - 0.1211
+ - 0.1198
recip_rank:
- - 0.1672
+ - 0.1653
- name: ql+ax
params:
- -ql
@@ -98,6 +98,6 @@ models:
- -axiom.deterministic
results:
map:
- - 0.1083
+ - 0.1082
recip_rank:
- - 0.1503
+ - 0.1501
diff --git a/src/main/resources/regression/core17.yaml b/src/main/resources/regression/core17.yaml
index dd1707deb8..42ca9c6f26 100644
--- a/src/main/resources/regression/core17.yaml
+++ b/src/main/resources/regression/core17.yaml
@@ -50,18 +50,18 @@ models:
- -bm25
results:
map:
- - 0.1996
+ - 0.1977
p30:
- - 0.4207
+ - 0.4160
- name: bm25+rm3
params:
- -bm25
- -rm3
results:
map:
- - 0.2639
+ - 0.2596
p30:
- - 0.4880
+ - 0.4820
- name: bm25+ax
params:
- -bm25
@@ -70,26 +70,26 @@ models:
- -axiom.deterministic
results:
map:
- - 0.2719
+ - 0.2700
p30:
- - 0.4900
+ - 0.4927
- name: ql
params:
- -ql
results:
map:
- - 0.1928
+ - 0.1913
p30:
- - 0.4327
+ - 0.4373
- name: ql+rm3
params:
- -ql
- -rm3
results:
map:
- - 0.2427
+ - 0.2405
p30:
- - 0.4640
+ - 0.4580
- name: ql+ax
params:
- -ql
@@ -98,6 +98,6 @@ models:
- -axiom.deterministic
results:
map:
- - 0.2498
+ - 0.2514
p30:
- - 0.4813
+ - 0.4827
diff --git a/src/main/resources/regression/core18.yaml b/src/main/resources/regression/core18.yaml
index 4f363b1a79..7ad9953fae 100644
--- a/src/main/resources/regression/core18.yaml
+++ b/src/main/resources/regression/core18.yaml
@@ -50,18 +50,18 @@ models:
- -bm25
results:
map:
- - 0.2487
+ - 0.2491
p30:
- - 0.3640
+ - 0.3580
- name: bm25+rm3
params:
- -bm25
- -rm3
results:
map:
- - 0.2911
+ - 0.2952
p30:
- - 0.4087
+ - 0.4200
- name: bm25+ax
params:
- -bm25
@@ -70,26 +70,26 @@ models:
- -axiom.deterministic
results:
map:
- - 0.2919
+ - 0.2921
p30:
- - 0.4033
+ - 0.4007
- name: ql
params:
- -ql
results:
map:
- - 0.2504
+ - 0.2522
p30:
- - 0.3620
+ - 0.3627
- name: ql+rm3
params:
- -ql
- -rm3
results:
map:
- - 0.2754
+ - 0.2759
p30:
- - 0.3773
+ - 0.3753
- name: ql+ax
params:
- -ql
@@ -98,6 +98,6 @@ models:
- -axiom.deterministic
results:
map:
- - 0.2976
+ - 0.2975
p30:
- - 0.4067
+ - 0.4073
diff --git a/src/main/resources/regression/cw09b.yaml b/src/main/resources/regression/cw09b.yaml
index bb018d4af7..e121457ac8 100644
--- a/src/main/resources/regression/cw09b.yaml
+++ b/src/main/resources/regression/cw09b.yaml
@@ -68,42 +68,42 @@ models:
- -bm25
results:
map:
+ - 0.1126
- 0.1094
- - 0.1095
- - 0.1072
+ - 0.1106
p30:
- - 0.2653
- - 0.2540
- - 0.2180
+ - 0.2681
+ - 0.2513
+ - 0.2167
ndcg20:
- - 0.13280
- - 0.19143
- - 0.09764
+ - 0.13539
+ - 0.18901
+ - 0.10141
err20:
- - 0.07167
- - 0.09470
- - 0.13823
+ - 0.07335
+ - 0.09592
+ - 0.13036
- name: bm25+rm3
params:
- -bm25
- -rm3
results:
map:
- - 0.1075
- - 0.1146
- - 0.1318
+ - 0.1171
+ - 0.1142
+ - 0.1382
p30:
- - 0.2604
- - 0.2713
- - 0.2387
+ - 0.2819
+ - 0.2700
+ - 0.2473
ndcg20:
- - 0.14228
- - 0.18613
- - 0.13083
+ - 0.15446
+ - 0.18227
+ - 0.13294
err20:
- - 0.07842
- - 0.10805
- - 0.21787
+ - 0.08653
+ - 0.10422
+ - 0.22241
- name: bm25+ax
params:
- -bm25
@@ -113,62 +113,62 @@ models:
- -axiom.beta 0.1
results:
map:
- - 0.0966
- - 0.0996
- - 0.1242
+ - 0.0928
+ - 0.0974
+ - 0.1315
p30:
- - 0.2521
- - 0.2420
- - 0.2313
+ - 0.2354
+ - 0.2393
+ - 0.2553
ndcg20:
- - 0.17151
- - 0.18775
- - 0.11873
+ - 0.16375
+ - 0.18330
+ - 0.14413
err20:
- - 0.10073
- - 0.10645
- - 0.19208
+ - 0.09815
+ - 0.10909
+ - 0.23554
- name: ql
params:
- -ql
results:
map:
- - 0.1027
- - 0.0971
- - 0.1035
+ - 0.1060
+ - 0.0958
+ - 0.1069
p30:
- - 0.2417
- - 0.2220
- - 0.2013
+ - 0.2431
+ - 0.2147
+ - 0.2080
ndcg20:
- - 0.11319
- - 0.16347
- - 0.08620
+ - 0.11431
+ - 0.16192
+ - 0.08682
err20:
- - 0.05863
- - 0.08419
- - 0.13155
+ - 0.05994
+ - 0.08487
+ - 0.13052
- name: ql+rm3
params:
- -ql
- -rm3
results:
map:
- - 0.1060
- - 0.0961
- - 0.1132
+ - 0.1117
+ - 0.0964
+ - 0.1167
p30:
- - 0.2507
- - 0.2207
- - 0.2040
+ - 0.2611
+ - 0.2147
+ - 0.2053
ndcg20:
- - 0.13136
- - 0.16075
- - 0.10750
+ - 0.13618
+ - 0.15199
+ - 0.10590
err20:
- - 0.06493
- - 0.09210
- - 0.15740
+ - 0.06486
+ - 0.08655
+ - 0.14750
- name: ql+ax
params:
- -ql
@@ -178,18 +178,18 @@ models:
- -axiom.beta 0.1
results:
map:
- - 0.1088
- - 0.0914
- - 0.1215
+ - 0.1086
+ - 0.0879
+ - 0.1212
p30:
- 0.2618
- - 0.2267
- - 0.2100
+ - 0.2167
+ - 0.2140
ndcg20:
- - 0.14695
- - 0.15916
- - 0.10551
+ - 0.14541
+ - 0.15091
+ - 0.10296
err20:
- - 0.08023
- - 0.08791
- - 0.15829
+ - 0.07424
+ - 0.08203
+ - 0.15575
diff --git a/src/main/resources/regression/cw12.yaml b/src/main/resources/regression/cw12.yaml
index 532f1894e5..219e7b7c38 100644
--- a/src/main/resources/regression/cw12.yaml
+++ b/src/main/resources/regression/cw12.yaml
@@ -65,64 +65,65 @@ models:
- -bm25
results:
map:
- - 0.1673
- - 0.2432
+ - 0.1695
+ - 0.2469
p30:
- - 0.2827
- - 0.4500
+ - 0.2767
+ - 0.4533
ndcg20:
- - 0.20662
- - 0.26458
+ - 0.20858
+ - 0.25776
err20:
- - 0.12126
- - 0.17373
+ - 0.12835
+ - 0.16305
- name: bm25+rm3
params:
- -bm25
- -rm3
results:
map:
- - 0.1489
- - 0.2468
+ - 0.1498
+ - 0.2496
p30:
- - 0.2347
- - 0.4200
+ - 0.2407
+ - 0.4180
ndcg20:
- - 0.17566
- - 0.24349
+ - 0.18362
+ - 0.24303
err20:
- - 0.09148
- - 0.17411
+ - 0.09742
+ - 0.17134
- name: ql
params:
- -ql
results:
map:
- - 0.1438
- - 0.2401
+ - 0.1493
+ - 0.2467
p30:
- - 0.2507
- - 0.4367
+ - 0.2613
+ - 0.4380
ndcg20:
- - 0.19046
- - 0.23273
+ - 0.19935
+ - 0.22282
err20:
- - 0.11694
- - 0.14512
+ - 0.12319
+ - 0.13211
- name: ql+rm3
params:
- -ql
- -rm3
results:
map:
- - 0.1235
- - 0.2331
+ - 0.1280
+ - 0.2383
p30:
- - 0.2047
- - 0.4013
+ - 0.2207
+ - 0.4107
ndcg20:
- - 0.15570
- - 0.21679
+ - 0.16115
+ - 0.22580
err20:
- - 0.08588
- - 0.13441
+ - 0.09129
+ - 0.14066
+
diff --git a/src/main/resources/regression/cw12b13.yaml b/src/main/resources/regression/cw12b13.yaml
index 0e6b636389..de5a60229b 100644
--- a/src/main/resources/regression/cw12b13.yaml
+++ b/src/main/resources/regression/cw12b13.yaml
@@ -65,34 +65,34 @@ models:
- -bm25
results:
map:
- - 0.0457
- - 0.0219
+ - 0.0468
+ - 0.0224
p30:
- - 0.2000
- - 0.1293
+ - 0.2113
+ - 0.1273
ndcg20:
- - 0.12419
- - 0.11900
+ - 0.12862
+ - 0.11849
err20:
- - 0.08205
- - 0.12373
+ - 0.08379
+ - 0.12013
- name: bm25+rm3
params:
- -bm25
- -rm3
results:
map:
- - 0.0440
- - 0.0192
+ - 0.0450
+ - 0.0189
p30:
- - 0.1767
- - 0.1113
+ - 0.1787
+ - 0.1133
ndcg20:
- - 0.11974
- - 0.10017
+ - 0.12284
+ - 0.10124
err20:
- - 0.07771
- - 0.10139
+ - 0.08793
+ - 0.10390
- name: bm25+ax
params:
- -bm25
@@ -102,50 +102,50 @@ models:
- -axiom.beta 0.1
results:
map:
- - 0.0411
- - 0.0177
+ - 0.0435
+ - 0.0180
p30:
- - 0.1800
- - 0.1173
+ - 0.1840
+ - 0.1107
ndcg20:
- - 0.12449
- - 0.09690
+ - 0.12875
+ - 0.09637
err20:
- - 0.09151
- - 0.09588
+ - 0.09430
+ - 0.09289
- name: ql
params:
- -ql
results:
map:
- - 0.0389
- - 0.0228
+ - 0.0397
+ - 0.0235
p30:
- - 0.1720
- - 0.1313
+ - 0.1767
+ - 0.1373
ndcg20:
- - 0.11584
- - 0.11327
+ - 0.11067
+ - 0.11765
err20:
- - 0.07636
- - 0.10398
+ - 0.07689
+ - 0.10908
- name: ql+rm3
params:
- -ql
- -rm3
results:
map:
- - 0.0314
- - 0.0202
+ - 0.0319
+ - 0.0205
p30:
- - 0.1420
- - 0.1160
+ - 0.1373
+ - 0.1173
ndcg20:
- - 0.08515
- - 0.09591
+ - 0.08799
+ - 0.10240
err20:
- - 0.05106
- - 0.09114
+ - 0.05681
+ - 0.10356
- name: ql+ax
params:
- -ql
@@ -155,14 +155,14 @@ models:
- -axiom.beta 0.1
results:
map:
- - 0.0354
- - 0.0189
+ - 0.0359
+ - 0.0186
p30:
- 0.1513
- - 0.1180
+ - 0.1167
ndcg20:
- - 0.11169
- - 0.09989
+ - 0.11435
+ - 0.10013
err20:
- - 0.07054
- - 0.09945
+ - 0.07800
+ - 0.08965
diff --git a/src/main/resources/regression/disk12.yaml b/src/main/resources/regression/disk12.yaml
index 78f7ce87df..e1997ecd22 100644
--- a/src/main/resources/regression/disk12.yaml
+++ b/src/main/resources/regression/disk12.yaml
@@ -56,12 +56,12 @@ models:
- -bm25
results:
map:
- - 0.2254
- - 0.2003
- - 0.2571
+ - 0.2273
+ - 0.2010
+ - 0.2580
p30:
- - 0.4493
- - 0.4213
+ - 0.4533
+ - 0.4280
- 0.4740
- name: bm25+rm3
params:
@@ -69,13 +69,13 @@ models:
- -rm3
results:
map:
- - 0.2607
- - 0.2579
- - 0.3224
+ - 0.2617
+ - 0.2600
+ - 0.3227
p30:
- - 0.4813
+ - 0.4867
- 0.4580
- - 0.5100
+ - 0.5040
- name: bm25+ax
params:
- -bm25
@@ -84,38 +84,38 @@ models:
- -axiom.deterministic
results:
map:
- - 0.2675
- - 0.2708
- - 0.3349
+ - 0.2640
+ - 0.2722
+ - 0.3318
p30:
- - 0.5167
- - 0.4787
- - 0.5160
+ - 0.5067
+ - 0.4753
+ - 0.5100
- name: ql
params:
- -ql
results:
map:
- - 0.2188
- - 0.2013
- - 0.2530
+ - 0.2189
+ - 0.2015
+ - 0.2518
p30:
- - 0.4453
- - 0.4153
- - 0.4647
+ - 0.4520
+ - 0.4207
+ - 0.4580
- name: ql+rm3
params:
- -ql
- -rm3
results:
map:
- - 0.2500
- - 0.2475
- - 0.3019
+ - 0.2478
+ - 0.2485
+ - 0.2996
p30:
- - 0.4687
- - 0.4427
- - 0.5013
+ - 0.4653
+ - 0.4453
+ - 0.4933
- name: ql+ax
params:
- -ql
@@ -124,10 +124,10 @@ models:
- -axiom.deterministic
results:
map:
- - 0.2519
- - 0.2606
- - 0.3113
+ - 0.2501
+ - 0.2593
+ - 0.3103
p30:
- - 0.4967
- - 0.4660
- - 0.5160
+ - 0.4953
+ - 0.4740
+ - 0.5167
diff --git a/src/main/resources/regression/gov2.yaml b/src/main/resources/regression/gov2.yaml
index 126c4a68eb..9173dd1bfe 100644
--- a/src/main/resources/regression/gov2.yaml
+++ b/src/main/resources/regression/gov2.yaml
@@ -56,26 +56,26 @@ models:
- -bm25
results:
map:
- - 0.2673
- - 0.3366
- - 0.3055
+ - 0.2689
+ - 0.3390
+ - 0.3080
p30:
- - 0.4837
- - 0.5520
- - 0.4900
+ - 0.4864
+ - 0.5540
+ - 0.4907
- name: bm25+rm3
params:
- -bm25
- -rm3
results:
map:
- - 0.2974
- - 0.3846
- - 0.3438
+ - 0.2943
+ - 0.3800
+ - 0.3356
p30:
- - 0.5347
- - 0.5960
- - 0.5227
+ - 0.5313
+ - 0.5873
+ - 0.5160
- name: bm25+ax
params:
- -bm25
@@ -85,38 +85,38 @@ models:
- -axiom.deterministic
results:
map:
- - 0.2735
- - 0.3669
- - 0.3061
+ - 0.2665
+ - 0.3664
+ - 0.3069
p30:
- - 0.5082
- - 0.5947
- - 0.5007
+ - 0.4986
+ - 0.5933
+ - 0.5033
- name: ql
params:
- -ql
results:
map:
- - 0.2636
- - 0.3264
- - 0.2957
+ - 0.2681
+ - 0.3303
+ - 0.2996
p30:
- - 0.4667
- - 0.5160
- - 0.4753
+ - 0.4755
+ - 0.5347
+ - 0.4720
- name: ql+rm3
params:
- -ql
- -rm3
results:
map:
- - 0.2770
- - 0.3610
- - 0.3160
+ - 0.2806
+ - 0.3628
+ - 0.3173
p30:
- - 0.4878
- - 0.5673
- - 0.4853
+ - 0.4952
+ - 0.5720
+ - 0.4773
- name: ql+ax
params:
- -ql
@@ -126,10 +126,10 @@ models:
- -axiom.deterministic
results:
map:
- - 0.2638
- - 0.3670
- - 0.3112
+ - 0.2666
+ - 0.3646
+ - 0.3084
p30:
- - 0.4837
- - 0.5880
- - 0.5007
+ - 0.4932
+ - 0.5840
+ - 0.4920
diff --git a/src/main/resources/regression/mb11.yaml b/src/main/resources/regression/mb11.yaml
index 225b46f02e..b9d5085184 100644
--- a/src/main/resources/regression/mb11.yaml
+++ b/src/main/resources/regression/mb11.yaml
@@ -58,11 +58,11 @@ models:
- -bm25
results:
map:
- - 0.3351
- - 0.1912
+ - 0.3384
+ - 0.1948
p30:
- - 0.3837
- - 0.3328
+ - 0.3959
+ - 0.3316
- name: bm25+rm3
params:
- -searchtweets
@@ -70,11 +70,11 @@ models:
- -rm3
results:
map:
- - 0.3477
- - 0.2055
+ - 0.3621
+ - 0.2124
p30:
- - 0.4027
- - 0.3424
+ - 0.4088
+ - 0.3463
- name: bm25+ax
params:
- -searchtweets
@@ -85,22 +85,22 @@ models:
- -axiom.deterministic
results:
map:
- - 0.4042
- - 0.2310
+ - 0.4008
+ - 0.2309
p30:
- - 0.4558
- - 0.3588
+ - 0.4612
+ - 0.3554
- name: ql
params:
- -searchtweets
- -ql
results:
map:
- - 0.3614
- - 0.2100
+ - 0.3584
+ - 0.2102
p30:
- - 0.4095
- - 0.3322
+ - 0.4061
+ - 0.3333
- name: ql+rm3
params:
- -searchtweets
@@ -108,11 +108,11 @@ models:
- -rm3
results:
map:
- - 0.4093
- - 0.2412
+ - 0.4097
+ - 0.2397
p30:
- 0.4483
- - 0.3542
+ - 0.3571
- name: ql+ax
params:
- -searchtweets
@@ -123,8 +123,8 @@ models:
- -axiom.deterministic
results:
map:
- - 0.4179
- - 0.2502
+ - 0.4201
+ - 0.2474
p30:
- - 0.4367
- - 0.3864
+ - 0.4408
+ - 0.3842
diff --git a/src/main/resources/regression/mb13.yaml b/src/main/resources/regression/mb13.yaml
index 926476e84e..df6175c273 100644
--- a/src/main/resources/regression/mb13.yaml
+++ b/src/main/resources/regression/mb13.yaml
@@ -59,11 +59,11 @@ models:
- -bm25
results:
map:
- - 0.2306
- - 0.3836
+ - 0.2371
+ - 0.3931
p30:
- - 0.4222
- - 0.6176
+ - 0.4339
+ - 0.6212
- name: bm25+rm3
params:
- -searchtweets
@@ -71,11 +71,11 @@ models:
- -rm3
results:
map:
- - 0.2356
- - 0.4036
+ - 0.2440
+ - 0.4158
p30:
- - 0.4044
- - 0.6061
+ - 0.4350
+ - 0.6236
- name: bm25+ax
params:
- -searchtweets
@@ -86,22 +86,22 @@ models:
- -axiom.deterministic
results:
map:
- - 0.2770
- - 0.4673
+ - 0.2855
+ - 0.4796
p30:
- - 0.4611
- - 0.6479
+ - 0.4728
+ - 0.6648
- name: ql
params:
- -searchtweets
- -ql
results:
map:
- - 0.2599
- - 0.4184
+ - 0.2602
+ - 0.4181
p30:
- - 0.4517
- - 0.6424
+ - 0.4561
+ - 0.6430
- name: ql+rm3
params:
- -searchtweets
@@ -109,11 +109,11 @@ models:
- -rm3
results:
map:
- - 0.2796
- - 0.4763
+ - 0.2815
+ - 0.4746
p30:
- - 0.4600
- - 0.6606
+ - 0.4672
+ - 0.6594
- name: ql+ax
params:
- -searchtweets
@@ -124,8 +124,8 @@ models:
- -axiom.deterministic
results:
map:
- - 0.3167
- - 0.4943
+ - 0.3152
+ - 0.4965
p30:
- - 0.5117
- - 0.6770
+ - 0.5078
+ - 0.6727
diff --git a/src/main/resources/regression/robust04.yaml b/src/main/resources/regression/robust04.yaml
index 921789114b..55844ce23b 100644
--- a/src/main/resources/regression/robust04.yaml
+++ b/src/main/resources/regression/robust04.yaml
@@ -51,18 +51,18 @@ models:
- -bm25
results:
map:
- - 0.2501
+ - 0.2531
p30:
- - 0.3123
+ - 0.3102
- name: bm25+rm3
params:
- -bm25
- -rm3
results:
map:
- - 0.2759
+ - 0.2778
p30:
- - 0.3252
+ - 0.3288
- name: bm25+ax
params:
- -bm25
@@ -71,26 +71,26 @@ models:
- -axiom.deterministic
results:
map:
- - 0.2860
+ - 0.2895
p30:
- - 0.3339
+ - 0.3333
- name: ql
params:
- -ql
results:
map:
- - 0.2468
+ - 0.2467
p30:
- - 0.3083
+ - 0.3079
- name: ql+rm3
params:
- -ql
- -rm3
results:
map:
- - 0.2643
+ - 0.2649
p30:
- - 0.3138
+ - 0.3171
- name: ql+ax
params:
- -ql
@@ -99,6 +99,6 @@ models:
- -axiom.deterministic
results:
map:
- - 0.2775
+ - 0.2774
p30:
- - 0.3233
+ - 0.3229
diff --git a/src/main/resources/regression/robust05.yaml b/src/main/resources/regression/robust05.yaml
index 4e9f2c14ba..901d84fa48 100644
--- a/src/main/resources/regression/robust05.yaml
+++ b/src/main/resources/regression/robust05.yaml
@@ -51,18 +51,18 @@ models:
- -bm25
results:
map:
- - 0.2003
+ - 0.2031
p30:
- - 0.3660
+ - 0.3693
- name: bm25+rm3
params:
- -bm25
- -rm3
results:
map:
- - 0.2517
+ - 0.2523
p30:
- - 0.3913
+ - 0.4007
- name: bm25+ax
params:
- -bm25
@@ -71,26 +71,26 @@ models:
- -axiom.deterministic
results:
map:
- - 0.2528
+ - 0.2584
p30:
- - 0.4007
+ - 0.4120
- name: ql
params:
- -ql
results:
map:
- - 0.2026
+ - 0.2028
p30:
- - 0.3713
+ - 0.3653
- name: ql+rm3
params:
- -ql
- -rm3
results:
map:
- - 0.2474
+ - 0.2466
p30:
- - 0.4020
+ - 0.4067
- name: ql+ax
params:
- -ql
@@ -99,6 +99,6 @@ models:
- -axiom.deterministic
results:
map:
- - 0.2501
+ - 0.2476
p30:
- - 0.4080
+ - 0.4113
diff --git a/src/main/resources/regression/wt10g.yaml b/src/main/resources/regression/wt10g.yaml
index 862973cab2..7f2197cf90 100644
--- a/src/main/resources/regression/wt10g.yaml
+++ b/src/main/resources/regression/wt10g.yaml
@@ -51,18 +51,18 @@ models:
- -bm25
results:
map:
- - 0.1981
+ - 0.1992
p30:
- - 0.2201
+ - 0.2218
- name: bm25+rm3
params:
- -bm25
- -rm3
results:
map:
- - 0.2169
+ - 0.2163
p30:
- - 0.2456
+ - 0.2463
- name: bm25+ax
params:
- -bm25
@@ -72,26 +72,26 @@ models:
- -axiom.deterministic
results:
map:
- - 0.2185
+ - 0.2200
p30:
- - 0.2442
+ - 0.2483
- name: ql
params:
- -ql
results:
map:
- - 0.2015
+ - 0.2021
p30:
- - 0.2184
+ - 0.2180
- name: ql+rm3
params:
- -ql
- -rm3
results:
map:
- - 0.2169
+ - 0.2151
p30:
- - 0.2354
+ - 0.2276
- name: ql+ax
params:
- -ql
@@ -101,6 +101,6 @@ models:
- -axiom.deterministic
results:
map:
- - 0.2250
+ - 0.2275
p30:
- - 0.2520
+ - 0.2517
diff --git a/src/test/java/io/anserini/integration/IndexerTest.java b/src/test/java/io/anserini/integration/IndexerTest.java
index 908d9dbbe6..c37c9c69f6 100644
--- a/src/test/java/io/anserini/integration/IndexerTest.java
+++ b/src/test/java/io/anserini/integration/IndexerTest.java
@@ -273,6 +273,16 @@ public FieldsProducer getPostingsReader() {
System.out.println("Getting custom postings reader...");
return new MyFieldsProducer(in.getPostingsReader());
}
+
+ @Override
+ public IndexReader.CacheHelper getCoreCacheHelper() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public IndexReader.CacheHelper getReaderCacheHelper() {
+ throw new UnsupportedOperationException();
+ }
}
// Custom class so we can intercept calls and potentially alter behavior.
diff --git a/src/test/java/io/anserini/integration/MultiThreadingSearchTest.java b/src/test/java/io/anserini/integration/MultiThreadingSearchTest.java
index 5620a87f41..076d2f4854 100644
--- a/src/test/java/io/anserini/integration/MultiThreadingSearchTest.java
+++ b/src/test/java/io/anserini/integration/MultiThreadingSearchTest.java
@@ -43,7 +43,7 @@ protected void init() {
protected void setSearchArgs() {
super.setSearchArgs();
searchArgs.bm25 = true;
- searchArgs.b = new String[] {"0.2", "0.4"};
+ searchArgs.b = new String[] {"0.2", "0.8"};
}
protected void testEval() throws Exception {
diff --git a/src/test/java/io/anserini/integration/TrecEndToEndTest.java b/src/test/java/io/anserini/integration/TrecEndToEndTest.java
index 508e8f00a6..c5c83c26a2 100644
--- a/src/test/java/io/anserini/integration/TrecEndToEndTest.java
+++ b/src/test/java/io/anserini/integration/TrecEndToEndTest.java
@@ -25,16 +25,22 @@ protected void init() {
generator = "Jsoup";
topicReader = "Trec";
- fieldNormStatusTotalFields = 1; // text
- termIndexStatusTermCount = 12; // Please note that standard analyzer ignores stopwords.
- // Also, this includes docids
- termIndexStatusTotFreq = 17; //
- termIndexStatusTotPos = 16; // only "text" fields are indexed with position so we have 16
+ fieldNormStatusTotalFields = 1; // text
+ termIndexStatusTermCount = 12; // Note that standard analyzer ignores stopwords; includes docids.
+ termIndexStatusTotFreq = 17;
+ termIndexStatusTotPos = 16; // Only "text" fields are indexed with position so we have 16.
storedFieldStatusTotalDocCounts = 3;
storedFieldStatusTotFields = 9; // 3 docs * (1 id + 1 text + 1 raw)
- evalMetricValue = (float)(0.0/1+1.0/2+2.0/3)/2.0f; // 3 retrieved docs in total:
- // 1st retrieved doc is non-rel, 2nd and 3rd are rel
- // and there are in total 3 rel docs in qrels
+ // The search output should be as follows (for Lucene 7.5):
+ // 1 Q0 DOC222 1 0.652100 Anserini
+ // 1 Q0 TREC_DOC_1 2 0.633500 Anserini
+ // 1 Q0 WSJ_1 3 0.130400 Anserini
+
+ // Qrels are at src/test/resources/sample_qrels/Trec
+ // 1 0 TREC_DOC_1 0
+ // 1 0 DOC222 1
+ // 1 0 WSJ_1 1
+ evalMetricValue = (float) (1.0/1.0 + 2.0/3)/2.0f;
}
}
diff --git a/src/test/java/io/anserini/integration/TweetEndToEndTest.java b/src/test/java/io/anserini/integration/TweetEndToEndTest.java
index a0ef17e845..247f127244 100644
--- a/src/test/java/io/anserini/integration/TweetEndToEndTest.java
+++ b/src/test/java/io/anserini/integration/TweetEndToEndTest.java
@@ -34,9 +34,21 @@ protected void init() {
storedFieldStatusTotalDocCounts = 4;
storedFieldStatusTotFields = 12; // 4 tweets * (1 id + 1 text + 1 raw)
- evalMetricValue = (float)(0.0/1+1.0/2)/3.0f; // 2 retrieved docs in total: (please note the querytweettime filters 1 rel tweet)
- // 1st retrieved doc is non-rel, 2nd retrieved is rel
- // and there are in total 3 rel docs in qrels
+ // The search output should be as follows (for Lucene 7.5):
+ // 1 Q0 5 1 1.167100 Anserini
+ // 1 Q0 3 2 0.693100 Anserini
+
+ // Qrels are at src/test/resources/sample_qrels/Microblog
+ // 1 0 1 0
+ // 1 0 3 1
+ // 1 0 5 0
+ // 1 0 6 0
+ // 1 0 8 1
+ // 1 0 10 1
+ evalMetricValue = (float) (0.0/1 + 1.0/2)/3.0f;
+ // 2 retrieved docs in total: note that querytweettime filters 1 rel tweet.
+ // 1st retrieved doc is not relevant, 2nd retrieved doc is relelevant,
+ // and there are 3 relevant docs in qrels.
}
@Override