diff --git a/docs/experiments-car17.md b/docs/experiments-car17.md index 29c01a5e52..7a87528496 100644 --- a/docs/experiments-car17.md +++ b/docs/experiments-car17.md @@ -63,11 +63,11 @@ With the above commands, you should be able to replicate the following results: MAP | BM25 | BM25+RM3 | BM25+AX | QL | QL+RM3 | QL+AX | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -All Topics | 0.1650 | 0.1343 | 0.1318 | 0.1515 | 0.1211 | 0.1083 | +All Topics | 0.1689 | 0.1386 | 0.1355 | 0.1516 | 0.1198 | 0.1082 | RECIP_RANK | BM25 | BM25+RM3 | BM25+AX | QL | QL+RM3 | QL+AX | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -All Topics | 0.2270 | 0.1852 | 0.1817 | 0.2085 | 0.1672 | 0.1503 | +All Topics | 0.2321 | 0.1907 | 0.1857 | 0.2085 | 0.1653 | 0.1501 | diff --git a/docs/experiments-core17.md b/docs/experiments-core17.md index 0b13fe5d57..9a2dec00da 100644 --- a/docs/experiments-core17.md +++ b/docs/experiments-core17.md @@ -63,11 +63,11 @@ With the above commands, you should be able to replicate the following results: MAP | BM25 | BM25+RM3 | BM25+AX | QL | QL+RM3 | QL+AX | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -All Topics | 0.1996 | 0.2639 | 0.2719 | 0.1928 | 0.2427 | 0.2498 | +All Topics | 0.1977 | 0.2596 | 0.2700 | 0.1913 | 0.2405 | 0.2514 | P30 | BM25 | BM25+RM3 | BM25+AX | QL | QL+RM3 | QL+AX | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -All Topics | 0.4207 | 0.4880 | 0.4900 | 0.4327 | 0.4640 | 0.4813 | +All Topics | 0.4160 | 0.4820 | 0.4927 | 0.4373 | 0.4580 | 0.4827 | diff --git a/docs/experiments-core18.md b/docs/experiments-core18.md index 9871e8c7ac..9aabfa81fe 100644 --- a/docs/experiments-core18.md +++ b/docs/experiments-core18.md @@ -62,11 +62,11 @@ With the above commands, you should be able to replicate the following results: MAP | BM25 | BM25+RM3 | BM25+AX | QL | QL+RM3 | QL+AX | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -All Topics | 0.2487 | 0.2911 | 0.2919 | 0.2504 | 0.2754 | 0.2976 | +All Topics | 0.2491 | 0.2952 | 0.2921 | 0.2522 | 0.2759 | 0.2975 | P30 | BM25 | BM25+RM3 | BM25+AX | QL | QL+RM3 | QL+AX | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -All Topics | 0.3640 | 0.4087 | 0.4033 | 0.3620 | 0.3773 | 0.4067 | +All Topics | 0.3580 | 0.4200 | 0.4007 | 0.3627 | 0.3753 | 0.4073 | diff --git a/docs/experiments-cw09b.md b/docs/experiments-cw09b.md index a6a58223c1..152c8b5f77 100644 --- a/docs/experiments-cw09b.md +++ b/docs/experiments-cw09b.md @@ -110,29 +110,29 @@ With the above commands, you should be able to replicate the following results: MAP | BM25 | BM25+RM3 | BM25+AX | QL | QL+RM3 | QL+AX | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -TREC 2010 Web Track: Topics 51-100 | 0.1094 | 0.1075 | 0.0966 | 0.1027 | 0.1060 | 0.1088 | -TREC 2011 Web Track: Topics 101-150 | 0.1095 | 0.1146 | 0.0996 | 0.0971 | 0.0961 | 0.0914 | -TREC 2012 Web Track: Topics 151-200 | 0.1072 | 0.1318 | 0.1242 | 0.1035 | 0.1132 | 0.1215 | +TREC 2010 Web Track: Topics 51-100 | 0.1126 | 0.1171 | 0.0928 | 0.1060 | 0.1117 | 0.1086 | +TREC 2011 Web Track: Topics 101-150 | 0.1094 | 0.1142 | 0.0974 | 0.0958 | 0.0964 | 0.0879 | +TREC 2012 Web Track: Topics 151-200 | 0.1106 | 0.1382 | 0.1315 | 0.1069 | 0.1167 | 0.1212 | P30 | BM25 | BM25+RM3 | BM25+AX | QL | QL+RM3 | QL+AX | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -TREC 2010 Web Track: Topics 51-100 | 0.2653 | 0.2604 | 0.2521 | 0.2417 | 0.2507 | 0.2618 | -TREC 2011 Web Track: Topics 101-150 | 0.2540 | 0.2713 | 0.2420 | 0.2220 | 0.2207 | 0.2267 | -TREC 2012 Web Track: Topics 151-200 | 0.2180 | 0.2387 | 0.2313 | 0.2013 | 0.2040 | 0.2100 | +TREC 2010 Web Track: Topics 51-100 | 0.2681 | 0.2819 | 0.2354 | 0.2431 | 0.2611 | 0.2618 | +TREC 2011 Web Track: Topics 101-150 | 0.2513 | 0.2700 | 0.2393 | 0.2147 | 0.2147 | 0.2167 | +TREC 2012 Web Track: Topics 151-200 | 0.2167 | 0.2473 | 0.2553 | 0.2080 | 0.2053 | 0.2140 | NDCG20 | BM25 | BM25+RM3 | BM25+AX | QL | QL+RM3 | QL+AX | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -TREC 2010 Web Track: Topics 51-100 | 0.1328 | 0.1423 | 0.1715 | 0.1132 | 0.1314 | 0.1470 | -TREC 2011 Web Track: Topics 101-150 | 0.1914 | 0.1861 | 0.1877 | 0.1635 | 0.1608 | 0.1592 | -TREC 2012 Web Track: Topics 151-200 | 0.0976 | 0.1308 | 0.1187 | 0.0862 | 0.1075 | 0.1055 | +TREC 2010 Web Track: Topics 51-100 | 0.1354 | 0.1545 | 0.1637 | 0.1143 | 0.1362 | 0.1454 | +TREC 2011 Web Track: Topics 101-150 | 0.1890 | 0.1823 | 0.1833 | 0.1619 | 0.1520 | 0.1509 | +TREC 2012 Web Track: Topics 151-200 | 0.1014 | 0.1329 | 0.1441 | 0.0868 | 0.1059 | 0.1030 | ERR20 | BM25 | BM25+RM3 | BM25+AX | QL | QL+RM3 | QL+AX | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -TREC 2010 Web Track: Topics 51-100 | 0.0717 | 0.0784 | 0.1007 | 0.0586 | 0.0649 | 0.0802 | -TREC 2011 Web Track: Topics 101-150 | 0.0947 | 0.1081 | 0.1064 | 0.0842 | 0.0921 | 0.0879 | -TREC 2012 Web Track: Topics 151-200 | 0.1382 | 0.2179 | 0.1921 | 0.1315 | 0.1574 | 0.1583 | +TREC 2010 Web Track: Topics 51-100 | 0.0733 | 0.0865 | 0.0981 | 0.0599 | 0.0649 | 0.0742 | +TREC 2011 Web Track: Topics 101-150 | 0.0959 | 0.1042 | 0.1091 | 0.0849 | 0.0865 | 0.0820 | +TREC 2012 Web Track: Topics 151-200 | 0.1304 | 0.2224 | 0.2355 | 0.1305 | 0.1475 | 0.1558 | diff --git a/docs/experiments-cw12.md b/docs/experiments-cw12.md index 3bd7f4b9de..c9269b09d6 100644 --- a/docs/experiments-cw12.md +++ b/docs/experiments-cw12.md @@ -73,25 +73,25 @@ With the above commands, you should be able to replicate the following results: MAP | BM25 | BM25+RM3 | QL | QL+RM3 | :---------------------------------------|-----------|-----------|-----------|-----------| -[TREC 2013 Web Track: Topics 201-250](http://trec.nist.gov/data/web2013.html)| 0.1673 | 0.1489 | 0.1438 | 0.1235 | -[TREC 2014 Web Track: Topics 251-300](http://trec.nist.gov/data/web2014.html)| 0.2432 | 0.2468 | 0.2401 | 0.2331 | +[TREC 2013 Web Track: Topics 201-250](http://trec.nist.gov/data/web2013.html)| 0.1695 | 0.1498 | 0.1493 | 0.1280 | +[TREC 2014 Web Track: Topics 251-300](http://trec.nist.gov/data/web2014.html)| 0.2469 | 0.2496 | 0.2467 | 0.2383 | P30 | BM25 | BM25+RM3 | QL | QL+RM3 | :---------------------------------------|-----------|-----------|-----------|-----------| -[TREC 2013 Web Track: Topics 201-250](http://trec.nist.gov/data/web2013.html)| 0.2827 | 0.2347 | 0.2507 | 0.2047 | -[TREC 2014 Web Track: Topics 251-300](http://trec.nist.gov/data/web2014.html)| 0.4500 | 0.4200 | 0.4367 | 0.4013 | +[TREC 2013 Web Track: Topics 201-250](http://trec.nist.gov/data/web2013.html)| 0.2767 | 0.2407 | 0.2613 | 0.2207 | +[TREC 2014 Web Track: Topics 251-300](http://trec.nist.gov/data/web2014.html)| 0.4533 | 0.4180 | 0.4380 | 0.4107 | NDCG20 | BM25 | BM25+RM3 | QL | QL+RM3 | :---------------------------------------|-----------|-----------|-----------|-----------| -[TREC 2013 Web Track: Topics 201-250](http://trec.nist.gov/data/web2013.html)| 0.2066 | 0.1757 | 0.1905 | 0.1557 | -[TREC 2014 Web Track: Topics 251-300](http://trec.nist.gov/data/web2014.html)| 0.2646 | 0.2435 | 0.2327 | 0.2168 | +[TREC 2013 Web Track: Topics 201-250](http://trec.nist.gov/data/web2013.html)| 0.2086 | 0.1836 | 0.1993 | 0.1611 | +[TREC 2014 Web Track: Topics 251-300](http://trec.nist.gov/data/web2014.html)| 0.2578 | 0.2430 | 0.2228 | 0.2258 | ERR20 | BM25 | BM25+RM3 | QL | QL+RM3 | :---------------------------------------|-----------|-----------|-----------|-----------| -[TREC 2013 Web Track: Topics 201-250](http://trec.nist.gov/data/web2013.html)| 0.1213 | 0.0915 | 0.1169 | 0.0859 | -[TREC 2014 Web Track: Topics 251-300](http://trec.nist.gov/data/web2014.html)| 0.1737 | 0.1741 | 0.1451 | 0.1344 | +[TREC 2013 Web Track: Topics 201-250](http://trec.nist.gov/data/web2013.html)| 0.1284 | 0.0974 | 0.1232 | 0.0913 | +[TREC 2014 Web Track: Topics 251-300](http://trec.nist.gov/data/web2014.html)| 0.1630 | 0.1713 | 0.1321 | 0.1407 | diff --git a/docs/experiments-cw12b13.md b/docs/experiments-cw12b13.md index 9374fb2ed3..ca0d362000 100644 --- a/docs/experiments-cw12b13.md +++ b/docs/experiments-cw12b13.md @@ -88,25 +88,25 @@ With the above commands, you should be able to replicate the following results: MAP | BM25 | BM25+RM3 | BM25+AX | QL | QL+RM3 | QL+AX | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -[TREC 2013 Web Track: Topics 201-250](http://trec.nist.gov/data/web2013.html)| 0.0457 | 0.0440 | 0.0411 | 0.0389 | 0.0314 | 0.0354 | -[TREC 2014 Web Track: Topics 251-300](http://trec.nist.gov/data/web2014.html)| 0.0219 | 0.0192 | 0.0177 | 0.0228 | 0.0202 | 0.0189 | +[TREC 2013 Web Track: Topics 201-250](http://trec.nist.gov/data/web2013.html)| 0.0468 | 0.0450 | 0.0435 | 0.0397 | 0.0319 | 0.0359 | +[TREC 2014 Web Track: Topics 251-300](http://trec.nist.gov/data/web2014.html)| 0.0224 | 0.0189 | 0.0180 | 0.0235 | 0.0205 | 0.0186 | P30 | BM25 | BM25+RM3 | BM25+AX | QL | QL+RM3 | QL+AX | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -[TREC 2013 Web Track: Topics 201-250](http://trec.nist.gov/data/web2013.html)| 0.2000 | 0.1767 | 0.1800 | 0.1720 | 0.1420 | 0.1513 | -[TREC 2014 Web Track: Topics 251-300](http://trec.nist.gov/data/web2014.html)| 0.1293 | 0.1113 | 0.1173 | 0.1313 | 0.1160 | 0.1180 | +[TREC 2013 Web Track: Topics 201-250](http://trec.nist.gov/data/web2013.html)| 0.2113 | 0.1787 | 0.1840 | 0.1767 | 0.1373 | 0.1513 | +[TREC 2014 Web Track: Topics 251-300](http://trec.nist.gov/data/web2014.html)| 0.1273 | 0.1133 | 0.1107 | 0.1373 | 0.1173 | 0.1167 | NDCG20 | BM25 | BM25+RM3 | BM25+AX | QL | QL+RM3 | QL+AX | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -[TREC 2013 Web Track: Topics 201-250](http://trec.nist.gov/data/web2013.html)| 0.1242 | 0.1197 | 0.1245 | 0.1158 | 0.0852 | 0.1117 | -[TREC 2014 Web Track: Topics 251-300](http://trec.nist.gov/data/web2014.html)| 0.1190 | 0.1002 | 0.0969 | 0.1133 | 0.0959 | 0.0999 | +[TREC 2013 Web Track: Topics 201-250](http://trec.nist.gov/data/web2013.html)| 0.1286 | 0.1228 | 0.1287 | 0.1107 | 0.0880 | 0.1143 | +[TREC 2014 Web Track: Topics 251-300](http://trec.nist.gov/data/web2014.html)| 0.1185 | 0.1012 | 0.0964 | 0.1177 | 0.1024 | 0.1001 | ERR20 | BM25 | BM25+RM3 | BM25+AX | QL | QL+RM3 | QL+AX | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -[TREC 2013 Web Track: Topics 201-250](http://trec.nist.gov/data/web2013.html)| 0.0821 | 0.0777 | 0.0915 | 0.0764 | 0.0511 | 0.0705 | -[TREC 2014 Web Track: Topics 251-300](http://trec.nist.gov/data/web2014.html)| 0.1237 | 0.1014 | 0.0959 | 0.1040 | 0.0911 | 0.0994 | +[TREC 2013 Web Track: Topics 201-250](http://trec.nist.gov/data/web2013.html)| 0.0838 | 0.0879 | 0.0943 | 0.0769 | 0.0568 | 0.0780 | +[TREC 2014 Web Track: Topics 251-300](http://trec.nist.gov/data/web2014.html)| 0.1201 | 0.1039 | 0.0929 | 0.1091 | 0.1036 | 0.0896 | diff --git a/docs/experiments-disk12.md b/docs/experiments-disk12.md index fbe6b02254..c3f2849592 100644 --- a/docs/experiments-disk12.md +++ b/docs/experiments-disk12.md @@ -90,15 +90,15 @@ With the above commands, you should be able to replicate the following results: MAP | BM25 | BM25+RM3 | BM25+AX | QL | QL+RM3 | QL+AX | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -TREC-1 Ad Hoc Track: Topics 51-100 | 0.2254 | 0.2607 | 0.2675 | 0.2188 | 0.2500 | 0.2519 | -TREC-2 Ad Hoc Track: Topics 101-150 | 0.2003 | 0.2579 | 0.2708 | 0.2013 | 0.2475 | 0.2606 | -TREC-3 Ad Hoc Track: Topics 151-200 | 0.2571 | 0.3224 | 0.3349 | 0.2530 | 0.3019 | 0.3113 | +TREC-1 Ad Hoc Track: Topics 51-100 | 0.2273 | 0.2617 | 0.2640 | 0.2189 | 0.2478 | 0.2501 | +TREC-2 Ad Hoc Track: Topics 101-150 | 0.2010 | 0.2600 | 0.2722 | 0.2015 | 0.2485 | 0.2593 | +TREC-3 Ad Hoc Track: Topics 151-200 | 0.2580 | 0.3227 | 0.3318 | 0.2518 | 0.2996 | 0.3103 | P30 | BM25 | BM25+RM3 | BM25+AX | QL | QL+RM3 | QL+AX | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -TREC-1 Ad Hoc Track: Topics 51-100 | 0.4493 | 0.4813 | 0.5167 | 0.4453 | 0.4687 | 0.4967 | -TREC-2 Ad Hoc Track: Topics 101-150 | 0.4213 | 0.4580 | 0.4787 | 0.4153 | 0.4427 | 0.4660 | -TREC-3 Ad Hoc Track: Topics 151-200 | 0.4740 | 0.5100 | 0.5160 | 0.4647 | 0.5013 | 0.5160 | +TREC-1 Ad Hoc Track: Topics 51-100 | 0.4533 | 0.4867 | 0.5067 | 0.4520 | 0.4653 | 0.4953 | +TREC-2 Ad Hoc Track: Topics 101-150 | 0.4280 | 0.4580 | 0.4753 | 0.4207 | 0.4453 | 0.4740 | +TREC-3 Ad Hoc Track: Topics 151-200 | 0.4740 | 0.5040 | 0.5100 | 0.4580 | 0.4933 | 0.5167 | diff --git a/docs/experiments-gov2.md b/docs/experiments-gov2.md index 2a24300fc5..084aba8c44 100644 --- a/docs/experiments-gov2.md +++ b/docs/experiments-gov2.md @@ -90,15 +90,15 @@ With the above commands, you should be able to replicate the following results: MAP | BM25 | BM25+RM3 | BM25+AX | QL | QL+RM3 | QL+AX | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -[TREC 2004 Terabyte Track: Topics 701-750](http://trec.nist.gov/data/terabyte04.html)| 0.2673 | 0.2974 | 0.2735 | 0.2636 | 0.2770 | 0.2638 | -[TREC 2005 Terabyte Track: Topics 751-800](http://trec.nist.gov/data/terabyte05.html)| 0.3366 | 0.3846 | 0.3669 | 0.3264 | 0.3610 | 0.3670 | -[TREC 2006 Terabyte Track: Topics 801-850](http://trec.nist.gov/data/terabyte06.html)| 0.3055 | 0.3438 | 0.3061 | 0.2957 | 0.3160 | 0.3112 | +[TREC 2004 Terabyte Track: Topics 701-750](http://trec.nist.gov/data/terabyte04.html)| 0.2689 | 0.2943 | 0.2665 | 0.2681 | 0.2806 | 0.2666 | +[TREC 2005 Terabyte Track: Topics 751-800](http://trec.nist.gov/data/terabyte05.html)| 0.3390 | 0.3800 | 0.3664 | 0.3303 | 0.3628 | 0.3646 | +[TREC 2006 Terabyte Track: Topics 801-850](http://trec.nist.gov/data/terabyte06.html)| 0.3080 | 0.3356 | 0.3069 | 0.2996 | 0.3173 | 0.3084 | P30 | BM25 | BM25+RM3 | BM25+AX | QL | QL+RM3 | QL+AX | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -[TREC 2004 Terabyte Track: Topics 701-750](http://trec.nist.gov/data/terabyte04.html)| 0.4837 | 0.5347 | 0.5082 | 0.4667 | 0.4878 | 0.4837 | -[TREC 2005 Terabyte Track: Topics 751-800](http://trec.nist.gov/data/terabyte05.html)| 0.5520 | 0.5960 | 0.5947 | 0.5160 | 0.5673 | 0.5880 | -[TREC 2006 Terabyte Track: Topics 801-850](http://trec.nist.gov/data/terabyte06.html)| 0.4900 | 0.5227 | 0.5007 | 0.4753 | 0.4853 | 0.5007 | +[TREC 2004 Terabyte Track: Topics 701-750](http://trec.nist.gov/data/terabyte04.html)| 0.4864 | 0.5313 | 0.4986 | 0.4755 | 0.4952 | 0.4932 | +[TREC 2005 Terabyte Track: Topics 751-800](http://trec.nist.gov/data/terabyte05.html)| 0.5540 | 0.5873 | 0.5933 | 0.5347 | 0.5720 | 0.5840 | +[TREC 2006 Terabyte Track: Topics 801-850](http://trec.nist.gov/data/terabyte06.html)| 0.4907 | 0.5160 | 0.5033 | 0.4720 | 0.4773 | 0.4920 | diff --git a/docs/experiments-jdiq2018.md b/docs/experiments-jdiq2018.md index 84729eecb6..2f98402ade 100644 --- a/docs/experiments-jdiq2018.md +++ b/docs/experiments-jdiq2018.md @@ -38,95 +38,95 @@ The script assumes hard-coded index directories; modify as appropriate. #### disk12 MAP | BM25 | F2EXP | PL2 | QL | F2LOG | SPL | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -topics.151-200.txt | 0.2605 | 0.2474 | 0.2524 | 0.2544 | 0.2531 | 0.2448 | -topics.51-100.txt | 0.2262 | 0.2216 | 0.2213 | 0.2210 | 0.2230 | 0.2189 | -topics.101-150.txt | 0.2062 | 0.1997 | 0.1952 | 0.2017 | 0.1992 | 0.1819 | +topics.151-200.txt | 0.2614 | 0.2512 | 0.2544 | 0.2558 | 0.2571 | 0.2459 | +topics.51-100.txt | 0.2274 | 0.2245 | 0.2226 | 0.2226 | 0.2260 | 0.2201 | +topics.101-150.txt | 0.2071 | 0.2035 | 0.1967 | 0.2015 | 0.2031 | 0.1840 | #### robust04 MAP | BM25 | F2EXP | PL2 | QL | F2LOG | SPL | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -topics.robust04.301-450.601-700.txt | 0.2532 | 0.2491 | 0.2521 | 0.2496 | 0.2500 | 0.2502 | +topics.robust04.301-450.601-700.txt | 0.2543 | 0.2516 | 0.2531 | 0.2514 | 0.2523 | 0.2509 | #### robust05 MAP | BM25 | F2EXP | PL2 | QL | F2LOG | SPL | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -topics.robust05.txt | 0.2090 | 0.1960 | 0.2006 | 0.2026 | 0.1976 | 0.1969 | +topics.robust05.txt | 0.2097 | 0.1998 | 0.2021 | 0.2030 | 0.2023 | 0.1980 | #### core17 MAP | BM25 | F2EXP | PL2 | QL | F2LOG | SPL | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -topics.core17.txt | 0.2047 | 0.1986 | 0.2005 | 0.1951 | 0.2041 | 0.1981 | +topics.core17.txt | 0.2052 | 0.2005 | 0.2019 | 0.1943 | 0.2050 | 0.1999 | #### wt10g MAP | BM25 | F2EXP | PL2 | QL | F2LOG | SPL | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -topics.451-550.txt | 0.2012 | 0.1972 | 0.1889 | 0.2034 | 0.1923 | 0.1726 | +topics.451-550.txt | 0.2005 | 0.1996 | 0.1880 | 0.2021 | 0.1938 | 0.1704 | #### gov2 MAP | BM25 | F2EXP | PL2 | QL | F2LOG | SPL | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -topics.701-750.txt | 0.2684 | 0.2535 | 0.2696 | 0.2636 | 0.2627 | 0.2687 | -topics.751-800.txt | 0.3392 | 0.3156 | 0.3428 | 0.3267 | 0.3298 | 0.3386 | -topics.801-850.txt | 0.3080 | 0.2845 | 0.3084 | 0.2957 | 0.2970 | 0.3140 | +topics.701-750.txt | 0.2702 | 0.2592 | 0.2726 | 0.2700 | 0.2689 | 0.2734 | +topics.751-800.txt | 0.3394 | 0.3195 | 0.3439 | 0.3303 | 0.3342 | 0.3393 | +topics.801-850.txt | 0.3085 | 0.2900 | 0.3088 | 0.3013 | 0.3026 | 0.3139 | #### cw09b ERR20 | BM25 | F2EXP | PL2 | QL | F2LOG | SPL | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -topics.web.151-200.txt | 0.1472 | 0.1293 | 0.1431 | 0.1489 | 0.1431 | 0.1435 | -topics.web.101-150.txt | 0.1023 | 0.0926 | 0.0910 | 0.0861 | 0.0938 | 0.0908 | -topics.web.51-100.txt | 0.0764 | 0.0751 | 0.0635 | 0.0646 | 0.0723 | 0.0665 | +topics.web.151-200.txt | 0.1524 | 0.1387 | 0.1439 | 0.1484 | 0.1524 | 0.1445 | +topics.web.101-150.txt | 0.0981 | 0.0935 | 0.0892 | 0.0868 | 0.0944 | 0.0893 | +topics.web.51-100.txt | 0.0774 | 0.0776 | 0.0635 | 0.0643 | 0.0725 | 0.0659 | NDCG20 | BM25 | F2EXP | PL2 | QL | F2LOG | SPL | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -topics.web.151-200.txt | 0.1038 | 0.0893 | 0.0928 | 0.0997 | 0.0959 | 0.0931 | -topics.web.101-150.txt | 0.1937 | 0.1842 | 0.1774 | 0.1687 | 0.1911 | 0.1762 | -topics.web.51-100.txt | 0.1459 | 0.1390 | 0.1213 | 0.1170 | 0.1350 | 0.1232 | +topics.web.151-200.txt | 0.1090 | 0.0933 | 0.0927 | 0.0978 | 0.0986 | 0.0933 | +topics.web.101-150.txt | 0.1927 | 0.1878 | 0.1765 | 0.1701 | 0.1917 | 0.1758 | +topics.web.51-100.txt | 0.1487 | 0.1418 | 0.1217 | 0.1185 | 0.1376 | 0.1252 | MAP | BM25 | F2EXP | PL2 | QL | F2LOG | SPL | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -topics.web.151-200.txt | 0.1202 | 0.1042 | 0.1135 | 0.1091 | 0.1046 | 0.1131 | -topics.web.101-150.txt | 0.1117 | 0.1067 | 0.1075 | 0.1002 | 0.1108 | 0.1066 | -topics.web.51-100.txt | 0.1147 | 0.1067 | 0.1085 | 0.1040 | 0.1070 | 0.1077 | +topics.web.151-200.txt | 0.1226 | 0.1089 | 0.1170 | 0.1113 | 0.1091 | 0.1163 | +topics.web.101-150.txt | 0.1104 | 0.1081 | 0.1067 | 0.1004 | 0.1104 | 0.1063 | +topics.web.51-100.txt | 0.1165 | 0.1111 | 0.1103 | 0.1060 | 0.1110 | 0.1099 | #### cw12b13 ERR20 | BM25 | F2EXP | PL2 | QL | F2LOG | SPL | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -topics.web.251-300.txt | 0.1271 | 0.1199 | 0.1075 | 0.1088 | 0.1234 | 0.1090 | -topics.web.201-250.txt | 0.0959 | 0.0811 | 0.0907 | 0.0883 | 0.0836 | 0.0905 | +topics.web.251-300.txt | 0.1224 | 0.1203 | 0.1109 | 0.1108 | 0.1209 | 0.1135 | +topics.web.201-250.txt | 0.0993 | 0.0797 | 0.0933 | 0.0898 | 0.0821 | 0.0940 | NDCG20 | BM25 | F2EXP | PL2 | QL | F2LOG | SPL | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -topics.web.251-300.txt | 0.1237 | 0.1135 | 0.1177 | 0.1188 | 0.1174 | 0.1179 | -topics.web.201-250.txt | 0.1386 | 0.1225 | 0.1239 | 0.1168 | 0.1244 | 0.1253 | +topics.web.251-300.txt | 0.1247 | 0.1159 | 0.1213 | 0.1209 | 0.1189 | 0.1213 | +topics.web.201-250.txt | 0.1384 | 0.1222 | 0.1247 | 0.1168 | 0.1247 | 0.1258 | MAP | BM25 | F2EXP | PL2 | QL | F2LOG | SPL | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -topics.web.251-300.txt | 0.0238 | 0.0201 | 0.0239 | 0.0241 | 0.0212 | 0.0238 | -topics.web.201-250.txt | 0.0475 | 0.0434 | 0.0416 | 0.0392 | 0.0446 | 0.0412 | +topics.web.251-300.txt | 0.0237 | 0.0205 | 0.0242 | 0.0246 | 0.0213 | 0.0240 | +topics.web.201-250.txt | 0.0481 | 0.0450 | 0.0419 | 0.0398 | 0.0454 | 0.0418 | #### mb11 MAP | BM25 | F2EXP | PL2 | QL | F2LOG | SPL | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -topics.microblog2012.txt | 0.2083 | 0.2098 | 0.2032 | 0.2120 | 0.2018 | 0.2050 | -topics.microblog2011.txt | 0.3683 | 0.3770 | 0.3572 | 0.3635 | 0.3823 | 0.3601 | +topics.microblog2012.txt | 0.2083 | 0.2107 | 0.2046 | 0.2121 | 0.2033 | 0.2055 | +topics.microblog2011.txt | 0.3643 | 0.3769 | 0.3537 | 0.3607 | 0.3823 | 0.3567 | #### mb13 MAP | BM25 | F2EXP | PL2 | QL | F2LOG | SPL | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -topics.microblog2013.txt | 0.2599 | 0.2541 | 0.2519 | 0.2613 | 0.2622 | 0.2536 | -topics.microblog2014.txt | 0.4203 | 0.3844 | 0.4115 | 0.4201 | 0.4104 | 0.4132 | +topics.microblog2013.txt | 0.2600 | 0.2531 | 0.2524 | 0.2615 | 0.2622 | 0.2530 | +topics.microblog2014.txt | 0.4195 | 0.3854 | 0.4132 | 0.4200 | 0.4121 | 0.4147 | diff --git a/docs/experiments-mb11.md b/docs/experiments-mb11.md index cd673fe640..fff54f5a8c 100644 --- a/docs/experiments-mb11.md +++ b/docs/experiments-mb11.md @@ -88,13 +88,13 @@ With the above commands, you should be able to replicate the following results: MAP | BM25 | BM25+RM3 | BM25+AX | QL | QL+RM3 | QL+AX | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -[TREC 2011 Microblog Track](http://trec.nist.gov/data/microblog2011.html)| 0.3351 | 0.3477 | 0.4042 | 0.3614 | 0.4093 | 0.4179 | -[TREC 2012 Microblog Track](http://trec.nist.gov/data/microblog2012.html)| 0.1912 | 0.2055 | 0.2310 | 0.2100 | 0.2412 | 0.2502 | +[TREC 2011 Microblog Track](http://trec.nist.gov/data/microblog2011.html)| 0.3384 | 0.3621 | 0.4008 | 0.3584 | 0.4097 | 0.4201 | +[TREC 2012 Microblog Track](http://trec.nist.gov/data/microblog2012.html)| 0.1948 | 0.2124 | 0.2309 | 0.2102 | 0.2397 | 0.2474 | P30 | BM25 | BM25+RM3 | BM25+AX | QL | QL+RM3 | QL+AX | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -[TREC 2011 Microblog Track](http://trec.nist.gov/data/microblog2011.html)| 0.3837 | 0.4027 | 0.4558 | 0.4095 | 0.4483 | 0.4367 | -[TREC 2012 Microblog Track](http://trec.nist.gov/data/microblog2012.html)| 0.3328 | 0.3424 | 0.3588 | 0.3322 | 0.3542 | 0.3864 | +[TREC 2011 Microblog Track](http://trec.nist.gov/data/microblog2011.html)| 0.3959 | 0.4088 | 0.4612 | 0.4061 | 0.4483 | 0.4408 | +[TREC 2012 Microblog Track](http://trec.nist.gov/data/microblog2012.html)| 0.3316 | 0.3463 | 0.3554 | 0.3333 | 0.3571 | 0.3842 | diff --git a/docs/experiments-mb13.md b/docs/experiments-mb13.md index 8ad85906d4..7ac869bb37 100644 --- a/docs/experiments-mb13.md +++ b/docs/experiments-mb13.md @@ -88,13 +88,13 @@ With the above commands, you should be able to replicate the following results: MAP | BM25 | BM25+RM3 | BM25+AX | QL | QL+RM3 | QL+AX | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -[TREC 2013 Microblog Track](http://trec.nist.gov/data/microblog2013.html)| 0.2306 | 0.2356 | 0.2770 | 0.2599 | 0.2796 | 0.3167 | -[TREC 2014 Microblog Track](http://trec.nist.gov/data/microblog2014.html)| 0.3836 | 0.4036 | 0.4673 | 0.4184 | 0.4763 | 0.4943 | +[TREC 2013 Microblog Track](http://trec.nist.gov/data/microblog2013.html)| 0.2371 | 0.2440 | 0.2855 | 0.2602 | 0.2815 | 0.3152 | +[TREC 2014 Microblog Track](http://trec.nist.gov/data/microblog2014.html)| 0.3931 | 0.4158 | 0.4796 | 0.4181 | 0.4746 | 0.4965 | P30 | BM25 | BM25+RM3 | BM25+AX | QL | QL+RM3 | QL+AX | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -[TREC 2013 Microblog Track](http://trec.nist.gov/data/microblog2013.html)| 0.4222 | 0.4044 | 0.4611 | 0.4517 | 0.4600 | 0.5117 | -[TREC 2014 Microblog Track](http://trec.nist.gov/data/microblog2014.html)| 0.6176 | 0.6061 | 0.6479 | 0.6424 | 0.6606 | 0.6770 | +[TREC 2013 Microblog Track](http://trec.nist.gov/data/microblog2013.html)| 0.4339 | 0.4350 | 0.4728 | 0.4561 | 0.4672 | 0.5078 | +[TREC 2014 Microblog Track](http://trec.nist.gov/data/microblog2014.html)| 0.6212 | 0.6236 | 0.6648 | 0.6430 | 0.6594 | 0.6727 | diff --git a/docs/experiments-robust04.md b/docs/experiments-robust04.md index 311504561d..d46850fae6 100644 --- a/docs/experiments-robust04.md +++ b/docs/experiments-robust04.md @@ -63,11 +63,11 @@ With the above commands, you should be able to replicate the following results: MAP | BM25 | BM25+RM3 | BM25+AX | QL | QL+RM3 | QL+AX | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -All Topics | 0.2501 | 0.2759 | 0.2860 | 0.2468 | 0.2643 | 0.2775 | +All Topics | 0.2531 | 0.2778 | 0.2895 | 0.2467 | 0.2649 | 0.2774 | P30 | BM25 | BM25+RM3 | BM25+AX | QL | QL+RM3 | QL+AX | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -All Topics | 0.3123 | 0.3252 | 0.3339 | 0.3083 | 0.3138 | 0.3233 | +All Topics | 0.3102 | 0.3288 | 0.3333 | 0.3079 | 0.3171 | 0.3229 | diff --git a/docs/experiments-robust05.md b/docs/experiments-robust05.md index f79a13abc2..57f3afd84a 100644 --- a/docs/experiments-robust05.md +++ b/docs/experiments-robust05.md @@ -62,11 +62,11 @@ With the above commands, you should be able to replicate the following results: MAP | BM25 | BM25+RM3 | BM25+AX | QL | QL+RM3 | QL+AX | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -All Topics | 0.2003 | 0.2517 | 0.2528 | 0.2026 | 0.2474 | 0.2501 | +All Topics | 0.2031 | 0.2523 | 0.2584 | 0.2028 | 0.2466 | 0.2476 | P30 | BM25 | BM25+RM3 | BM25+AX | QL | QL+RM3 | QL+AX | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -All Topics | 0.3660 | 0.3913 | 0.4007 | 0.3713 | 0.4020 | 0.4080 | +All Topics | 0.3693 | 0.4007 | 0.4120 | 0.3653 | 0.4067 | 0.4113 | diff --git a/docs/experiments-wt10g.md b/docs/experiments-wt10g.md index 9dd2c9181f..5e31e632a5 100644 --- a/docs/experiments-wt10g.md +++ b/docs/experiments-wt10g.md @@ -64,11 +64,11 @@ With the above commands, you should be able to replicate the following results: MAP | BM25 | BM25+RM3 | BM25+AX | QL | QL+RM3 | QL+AX | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -Wt10g: Topics 451-550 | 0.1981 | 0.2169 | 0.2185 | 0.2015 | 0.2169 | 0.2250 | +Wt10g: Topics 451-550 | 0.1992 | 0.2163 | 0.2200 | 0.2021 | 0.2151 | 0.2275 | P30 | BM25 | BM25+RM3 | BM25+AX | QL | QL+RM3 | QL+AX | :---------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------| -Wt10g: Topics 451-550 | 0.2201 | 0.2456 | 0.2442 | 0.2184 | 0.2354 | 0.2520 | +Wt10g: Topics 451-550 | 0.2218 | 0.2463 | 0.2483 | 0.2180 | 0.2276 | 0.2517 | diff --git a/pom.xml b/pom.xml index e0dc44f60f..e9bca47488 100644 --- a/pom.xml +++ b/pom.xml @@ -33,7 +33,7 @@ - 6.3.0 + 7.6.0 UTF-8 @@ -208,17 +208,17 @@ org.apache.lucene lucene-core - ${LUCENE_VERSION} + ${lucene.version} org.apache.lucene lucene-benchmark - ${LUCENE_VERSION} + ${lucene.version} org.apache.lucene lucene-test-framework - ${LUCENE_VERSION} + ${lucene.version} edu.umass.ciir diff --git a/src/main/java/io/anserini/index/IndexUtils.java b/src/main/java/io/anserini/index/IndexUtils.java index 8cd6ebe8e4..1544510059 100755 --- a/src/main/java/io/anserini/index/IndexUtils.java +++ b/src/main/java/io/anserini/index/IndexUtils.java @@ -296,14 +296,14 @@ public void dumpDocumentVectors(String reqDocidsPath, DocVectorWeight weight) th } public void getAllDocids(Compression compression) throws IOException { - Query q = new FieldValueQuery(LuceneDocumentGenerator.FIELD_ID); + Query q = new DocValuesFieldExistsQuery(LuceneDocumentGenerator.FIELD_ID); IndexSearcher searcher = new IndexSearcher(reader); ScoreDoc[] scoreDocs; try { - scoreDocs = searcher.search(new FieldValueQuery(LuceneDocumentGenerator.FIELD_ID), reader.maxDoc(), + scoreDocs = searcher.search(new DocValuesFieldExistsQuery(LuceneDocumentGenerator.FIELD_ID), reader.maxDoc(), BREAK_SCORE_TIES_BY_DOCID).scoreDocs; } catch (IllegalStateException e) { // because this is tweets collection - scoreDocs = searcher.search(new FieldValueQuery(TweetGenerator.StatusField.ID_LONG.name), reader.maxDoc(), + scoreDocs = searcher.search(new DocValuesFieldExistsQuery(TweetGenerator.StatusField.ID_LONG.name), reader.maxDoc(), BREAK_SCORE_TIES_BY_TWEETID).scoreDocs; } diff --git a/src/main/java/io/anserini/ltr/feature/base/TFIDFFeatureExtractor.java b/src/main/java/io/anserini/ltr/feature/base/TFIDFFeatureExtractor.java index 6174ba8352..00797345a8 100644 --- a/src/main/java/io/anserini/ltr/feature/base/TFIDFFeatureExtractor.java +++ b/src/main/java/io/anserini/ltr/feature/base/TFIDFFeatureExtractor.java @@ -72,7 +72,8 @@ public float extract(Document doc, Terms terms, RerankerContext context) { // number of query tokens found // how many of our query tokens were found - float coord = similarity.coord(countMap.size(), context.getQueryTokens().size()); + //float coord = similarity.coord(countMap.size(), context.getQueryTokens().size()); + // coord removed in Lucene 7 for (Object token : context.getQueryTokens()) { long termFreq = countMap.getOrDefault(token.toString(), 0L); @@ -82,7 +83,7 @@ public float extract(Document doc, Terms terms, RerankerContext context) { score += tf * idf*idf; } - score *= coord; + //score *= coord; return score; } diff --git a/src/main/java/io/anserini/rerank/lib/AxiomReranker.java b/src/main/java/io/anserini/rerank/lib/AxiomReranker.java index de3cb33539..342afe7b7f 100644 --- a/src/main/java/io/anserini/rerank/lib/AxiomReranker.java +++ b/src/main/java/io/anserini/rerank/lib/AxiomReranker.java @@ -238,10 +238,10 @@ private ScoreDoc[] buildInternalDocidsCache(String indexPath, boolean searchTwee IndexReader reader = DirectoryReader.open(FSDirectory.open(index)); IndexSearcher searcher = new IndexSearcher(reader); if (searchTweets) { - return searcher.search(new FieldValueQuery(TweetGenerator.StatusField.ID_LONG.name), reader.maxDoc(), + return searcher.search(new DocValuesFieldExistsQuery(TweetGenerator.StatusField.ID_LONG.name), reader.maxDoc(), BREAK_SCORE_TIES_BY_TWEETID).scoreDocs; } - return searcher.search(new FieldValueQuery(LuceneDocumentGenerator.FIELD_ID), reader.maxDoc(), + return searcher.search(new DocValuesFieldExistsQuery(LuceneDocumentGenerator.FIELD_ID), reader.maxDoc(), BREAK_SCORE_TIES_BY_DOCID).scoreDocs; } diff --git a/src/main/java/io/anserini/search/SearchCollection.java b/src/main/java/io/anserini/search/SearchCollection.java index 5c2f70308f..bd66925f10 100644 --- a/src/main/java/io/anserini/search/SearchCollection.java +++ b/src/main/java/io/anserini/search/SearchCollection.java @@ -30,8 +30,6 @@ import io.anserini.search.query.BagOfWordsQueryGenerator; import io.anserini.search.query.SdmQueryGenerator; import io.anserini.search.similarity.TaggedSimilarity; -import io.anserini.search.similarity.F2ExpSimilarity; -import io.anserini.search.similarity.F2LogSimilarity; import io.anserini.search.topicreader.NewsBackgroundLinkingTopicReader; import io.anserini.search.topicreader.TopicReader; import io.anserini.util.AnalyzerUtils; @@ -45,13 +43,13 @@ import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.queries.TermsQuery; import org.apache.lucene.queryparser.flexible.core.QueryNodeException; import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser; import org.apache.lucene.search.*; import org.apache.lucene.search.similarities.*; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.MMapDirectory; +import org.apache.lucene.util.BytesRef; import org.kohsuke.args4j.CmdLineException; import org.kohsuke.args4j.CmdLineParser; import org.kohsuke.args4j.OptionHandlerFilter; @@ -239,11 +237,11 @@ public List constructSimiliries() { } } else if (args.f2exp) { for (String s : args.f2exp_s) { - similarities.add(new TaggedSimilarity(new F2ExpSimilarity(Float.valueOf(s)), "s:"+s)); + similarities.add(new TaggedSimilarity(new AxiomaticF2EXP(Float.valueOf(s)), "s:"+s)); } } else if (args.f2log) { for (String s : args.f2log_s) { - similarities.add(new TaggedSimilarity(new F2LogSimilarity(Float.valueOf(s)), "s:"+s)); + similarities.add(new TaggedSimilarity(new AxiomaticF2LOG(Float.valueOf(s)), "s:"+s)); } } else { throw new IllegalArgumentException("Error: Must specify scoring model!"); @@ -294,7 +292,7 @@ public Map constructRerankerCascades() throws IOExcepti } @SuppressWarnings("unchecked") - public void runTopics() throws IOException, QueryNodeException { + public void runTopics() throws IOException { Path topicsFile = Paths.get(args.topics); if (!Files.exists(topicsFile) || !Files.isRegularFile(topicsFile) || !Files.isReadable(topicsFile)) { @@ -340,7 +338,7 @@ public void runTopics() throws IOException, QueryNodeException { } public ScoredDocuments search(IndexSearcher searcher, K qid, String queryString, RerankerCascade cascade) - throws IOException, QueryNodeException { + throws IOException { Query query = null; if (qc == QueryConstructor.SequentialDependenceModel) { query = new SdmQueryGenerator(args.sdm_tw, args.sdm_ow, args.sdm_uw).buildQuery(FIELD_BODY, analyzer, queryString); @@ -383,10 +381,10 @@ public ScoredDocuments searchBackgroundLinking(IndexSearcher searcher, K qid, // Because the actual query strings are extracted from tokenized document!!! q = new StandardQueryParser().parse(queryStr, FIELD_BODY); } - Query filter = new TermsQuery( - new Term(WapoGenerator.WapoField.KICKER.name, "Opinions"), - new Term(WapoGenerator.WapoField.KICKER.name, "Letters to the Editor"), - new Term(WapoGenerator.WapoField.KICKER.name, "The Post's View") + Query filter = new TermInSetQuery(WapoGenerator.WapoField.KICKER.name, new BytesRef("Opinions"), new BytesRef("Letters to the Editor"), new BytesRef("The Post's View") +// new Term(WapoGenerator.WapoField.KICKER.name, "Opinions"), +// new Term(WapoGenerator.WapoField.KICKER.name, "Letters to the Editor"), +// new Term(WapoGenerator.WapoField.KICKER.name, "The Post's View") ); BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.add(filter, BooleanClause.Occur.MUST_NOT); diff --git a/src/main/java/io/anserini/search/SimpleSearcher.java b/src/main/java/io/anserini/search/SimpleSearcher.java index cde440b200..dc8cc7871a 100644 --- a/src/main/java/io/anserini/search/SimpleSearcher.java +++ b/src/main/java/io/anserini/search/SimpleSearcher.java @@ -23,8 +23,6 @@ import io.anserini.rerank.lib.Rm3Reranker; import io.anserini.rerank.lib.ScoreTiesAdjusterReranker; import io.anserini.search.query.BagOfWordsQueryGenerator; -import io.anserini.search.similarity.F2ExpSimilarity; -import io.anserini.search.similarity.F2LogSimilarity; import io.anserini.util.AnalyzerUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -124,11 +122,11 @@ public void setIBSimilarity(float c) { } public void setF2ExpSimilarity(float s) { - this.similarity = new F2ExpSimilarity(s); + this.similarity = new AxiomaticF2EXP(s); } public void setF2LogSimilarity(float s) { - this.similarity = new F2LogSimilarity(s); + this.similarity = new AxiomaticF2LOG(s); } @Override diff --git a/src/main/java/io/anserini/search/similarity/AxiomaticSimilarity.java b/src/main/java/io/anserini/search/similarity/AxiomaticSimilarity.java deleted file mode 100644 index e72569d878..0000000000 --- a/src/main/java/io/anserini/search/similarity/AxiomaticSimilarity.java +++ /dev/null @@ -1,377 +0,0 @@ -/** - * Anserini: A toolkit for reproducible information retrieval research built on Lucene - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package io.anserini.search.similarity; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import org.apache.lucene.index.FieldInvertState; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.NumericDocValues; -import org.apache.lucene.search.CollectionStatistics; -import org.apache.lucene.search.Explanation; -import org.apache.lucene.search.TermStatistics; -import org.apache.lucene.search.similarities.Similarity; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.SmallFloat; - -/** - * Hui Fang and ChengXiang Zhai. 2005. An exploration of axiomatic approaches to information retrieval. - * In Proceedings of the 28th annual international ACM SIGIR conference on Research and development in - * information retrieval (SIGIR '05). ACM, New York, NY, USA, 480-487. - */ -public abstract class AxiomaticSimilarity extends Similarity { - protected final float s; - - /** - * @param s Generic parater s - * @throws IllegalArgumentException if {@code s} is infinite or if {@code s} is - * not within the range {@code [0..1]} - */ - AxiomaticSimilarity(float s) { - if (Float.isNaN(s) || s < 0 || s > 1) { - throw new IllegalArgumentException("illegal s value: " + s + ", must be between 0 and 1"); - } - this.s = s; - } - - /** Default parameter: - *
    - *
  • {@code s = 0.5}
  • - *
- */ - AxiomaticSimilarity() { - this(0.5f); - } - - /** Implemented as log(1 + (docCount - docFreq + 0.5)/(docFreq + 0.5)). - * - * @param docFreq terms's document frequency - * @param docCount total document count in the index - * @return inverted document frequency - * */ - float idf(long docFreq, long docCount) { - throw new UnsupportedOperationException(); - } - - /** Implemented as 1 / (distance + 1). - * - * @param distance distance - * @return sloppy frequency - * */ - float sloppyFreq(int distance) { - return 1.0f / (distance + 1); - } - - /** The default implementation returns 1 - * - * @param doc doc - * @param start start - * @param end end - * @param payload payload - * @return 1 - * */ - float scorePayload(int doc, int start, int end, BytesRef payload) { - return 1; - } - - /** The default implementation computes the average as sumTotalTermFreq / docCount, - * or returns 1 if the index does not store sumTotalTermFreq: - * any field that omits frequency information). - * - * @param collectionStats collection-wide statistics - * @return average document length of FIELD_BODY - * */ - float avgFieldLength(CollectionStatistics collectionStats) { - final long sumTotalTermFreq = collectionStats.sumTotalTermFreq(); - if (sumTotalTermFreq <= 0) { - return 1f; // field does not exist, or stat is unsupported - } else { - final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount(); - return (float) (sumTotalTermFreq / (double) docCount); - } - } - - /** The default implementation encodes boost / sqrt(length) - * with {@link SmallFloat#floatToByte315(float)}. This is compatible with - * Lucene's default implementation. If you change this, then you should - * change {@link #decodeNormValue(byte)} to match. - * - * @param boost boost - * @param fieldLength fieldLength - * @return encoded document lengths - * */ - byte encodeNormValue(float boost, int fieldLength) { - return SmallFloat.floatToByte315(boost / (float) Math.sqrt(fieldLength)); - } - - /** The default implementation returns 1 / f2 - * where f is {@link SmallFloat#byte315ToFloat(byte)}. - * - * @param b encoded document length - * @return decoded document length - * */ - float decodeNormValue(byte b) { - return NORM_TABLE[b & 0xFF]; - } - - /** - * True if overlap tokens (tokens with a position of increment of zero) are - * discounted from the document's length. - */ - boolean discountOverlaps = true; - - /** Sets whether overlap tokens (Tokens with 0 position increment) are - * ignored when computing norm. By default this is true, meaning overlap - * tokens do not count when computing norms. - * - * @param v v - * */ - public void setDiscountOverlaps(boolean v) { - discountOverlaps = v; - } - - /** - * Returns true if overlap tokens are discounted from the document's length. - * @see #setDiscountOverlaps - * - * @return discountOverlaps - */ - public boolean getDiscountOverlaps() { - return discountOverlaps; - } - - /** Cache of decoded bytes. */ - private static final float[] NORM_TABLE = new float[256]; - - static { - for (int i = 1; i < 256; i++) { - float f = SmallFloat.byte315ToFloat((byte)i); - NORM_TABLE[i] = 1.0f / (f*f); - } - NORM_TABLE[0] = 1.0f / NORM_TABLE[255]; // otherwise inf - } - - - @Override - public long computeNorm(FieldInvertState state) { - final int numTerms = discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength(); - return encodeNormValue(state.getBoost(), numTerms); - } - - /** - * Computes a score factor for a simple term and returns an explanation - * for that score factor. - * - *

- * The default implementation uses: - * - *

-   * idf(docFreq, docCount);
-   * 
- * - * Note that {@link CollectionStatistics#docCount()} is used instead of - * {@link org.apache.lucene.index.IndexReader#numDocs() IndexReader#numDocs()} because also - * {@link TermStatistics#docFreq()} is used, and when the latter - * is inaccurate, so is {@link CollectionStatistics#docCount()}, and in the same direction. - * In addition, {@link CollectionStatistics#docCount()} does not skew when fields are sparse. - * - * @param collectionStats collection-level statistics - * @param termStats term-level statistics for the term - * @return an Explain object that includes both an idf score factor - and an explanation for the term. - */ - public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) { - final long df = termStats.docFreq(); - final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount(); - final float idf = idf(df, docCount); - return Explanation.match(idf, "idf(docFreq=" + df + ", docCount=" + docCount + ")"); - } - - /** - * Computes a score factor for a phrase. - * - *

- * The default implementation sums the idf factor for - * each term in the phrase. - * - * @param collectionStats collection-level statistics - * @param termStats term-level statistics for the terms in the phrase - * @return an Explain object that includes both an idf - * score factor for the phrase and an explanation - * for each term. - */ - public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) { - final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount(); - float idf = 0.0f; - List details = new ArrayList<>(); - for (final TermStatistics stat : termStats ) { - final long df = stat.docFreq(); - final float termIdf = idf(df, docCount); - details.add(Explanation.match(termIdf, "idf(docFreq=" + df + ", docCount=" + docCount + ")")); - idf += termIdf; - } - return Explanation.match(idf, "idf(), sum of:", details); - } - - @Override - public SimWeight computeWeight(CollectionStatistics collectionStats, TermStatistics... termStats) { - Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats); - - float avgdl = avgFieldLength(collectionStats); - - // compute freq-independent part of f2log equation across all norm values - float cache[] = new float[256]; - for (int i = 0; i < cache.length; i++) { - cache[i] = s + s * decodeNormValue((byte)i) / avgdl; - } - return new Stats(collectionStats.field(), idf, avgdl, cache); - } - - @Override - public SimScorer simScorer(SimWeight stats, LeafReaderContext context) throws IOException { - Stats f2logStats = (Stats) stats; - return new F2LogDocScorer(f2logStats, context.reader().getNormValues(f2logStats.field)); - } - - /** DocumentCollection statistics for the F2Log model. */ - static class Stats extends SimWeight { - /** F2Log's idf */ - public final Explanation idf; - /** The average document length. */ - public final float avgdl; - /** query boost */ - public float boost; - /** weight (idf * boost) */ - public float weight; - /** field name, for pulling norms */ - public final String field; - /** precomputed norm[256] with k1 * ((1 - b) + b * dl / avgdl) */ - public final float cache[]; - - Stats(String field, Explanation idf, float avgdl, float cache[]) { - this.field = field; - this.idf = idf; - this.avgdl = avgdl; - this.cache = cache; - normalize(1f, 1f); - } - - @Override - public float getValueForNormalization() { - // we return a TF-IDF like normalization to be nice, but we don't actually normalize ourselves. - return weight * weight; - } - - @Override - public void normalize(float queryNorm, float boost) { - // we don't normalize with queryNorm at all, we just capture the top-level boost - this.boost = boost; - this.weight = idf.getValue() * boost; - } - } - - class F2LogDocScorer extends SimScorer { - private final Stats stats; - private final float weightValue; - private final NumericDocValues norms; - private final float[] cache; - - F2LogDocScorer(Stats stats, NumericDocValues norms) throws IOException { - this.stats = stats; - this.weightValue = stats.weight; - this.cache = stats.cache; - this.norms = norms; - } - - /* Score function is: - *

-                                                     occurrences
-      score = termWeight * IDF * ---------------------------------------------------------
-                                 occurrences + s + documentLength * ( s / avgDocLength )
-       
- */ - @Override - public float score(int doc, float freq) { - // if there are no norms, we act as if b=0 - float norm = norms == null ? 1.0f : cache[(byte)norms.get(doc) & 0xFF]; - return weightValue * freq / (freq + norm); - } - - @Override - public Explanation explain(int doc, Explanation freq) { - return explainScore(doc, freq, stats, norms); - } - - @Override - public float computeSlopFactor(int distance) { - return sloppyFreq(distance); - } - - @Override - public float computePayloadFactor(int doc, int start, int end, BytesRef payload) { - return scorePayload(doc, start, end, payload); - } - } - - Explanation explainTFNorm(int doc, Explanation freq, Stats stats, NumericDocValues norms) { - List subs = new ArrayList<>(); - subs.add(freq); - subs.add(Explanation.match(s, "parameter s")); - if (norms == null) { - subs.add(Explanation.match(0, "parameter s (norms omitted for field)")); - return Explanation.match( - freq.getValue() / freq.getValue(), - "tfNorm, computed from:", subs); - } else { - float doclen = decodeNormValue((byte)norms.get(doc)); - subs.add(Explanation.match(stats.avgdl, "avgFieldLength")); - subs.add(Explanation.match(doclen, "fieldLength")); - return Explanation.match( - freq.getValue() / (freq.getValue() + s + s * doclen/stats.avgdl), - "tfNorm, computed from:", subs); - } - } - - Explanation explainScore(int doc, Explanation freq, Stats stats, NumericDocValues norms) { - Explanation boostExpl = Explanation.match(stats.boost, "boost"); - List subs = new ArrayList<>(); - if (boostExpl.getValue() != 1.0f) - subs.add(boostExpl); - subs.add(stats.idf); - Explanation tfNormExpl = explainTFNorm(doc, freq, stats, norms); - subs.add(tfNormExpl); - return Explanation.match( - boostExpl.getValue() * stats.idf.getValue() * tfNormExpl.getValue(), - "score(doc="+doc+",freq="+freq+"), product of:", subs); - } - - @Override - public String toString() { - throw new UnsupportedOperationException(); - } - - /** - * Returns the b parameter - * @see #AxiomaticSimilarity(float) - * - * @return s - */ - public float getS() { - return s; - } -} diff --git a/src/main/java/io/anserini/search/similarity/F2ExpSimilarity.java b/src/main/java/io/anserini/search/similarity/F2ExpSimilarity.java deleted file mode 100644 index 25fd6e0cae..0000000000 --- a/src/main/java/io/anserini/search/similarity/F2ExpSimilarity.java +++ /dev/null @@ -1,59 +0,0 @@ -/** - * Anserini: A toolkit for reproducible information retrieval research built on Lucene - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package io.anserini.search.similarity; - -public class F2ExpSimilarity extends AxiomaticSimilarity { - private final float k = 0.35f; - - /** - * F2Exp with the supplied parameter values. - * @param s Controls to what degree document length normalizes tf values. - * @throws IllegalArgumentException if {@code s} is infinite or if {@code s} is - * not within the range {@code [0..1]} - */ - public F2ExpSimilarity(float s) { - super(s); - } - - /** F2Exp with these default values: - *
    - *
  • {@code k = 0.35}
  • - *
- */ - public F2ExpSimilarity() { - this(0.5f); - } - - @Override - float idf(long docFreq, long docCount) { - return (float) Math.pow((docCount + 1.0) / docFreq, this.k); - } - - @Override - public String toString() { - return "F2Exp(s=" + s +")"; - } - - /** - * Returns the k parameter - * @see #F2ExpSimilarity(float) - * @return k - */ - public float getK() { - return k; - } -} diff --git a/src/main/java/io/anserini/search/similarity/F2LogSimilarity.java b/src/main/java/io/anserini/search/similarity/F2LogSimilarity.java deleted file mode 100644 index f95386b045..0000000000 --- a/src/main/java/io/anserini/search/similarity/F2LogSimilarity.java +++ /dev/null @@ -1,48 +0,0 @@ -/** - * Anserini: A toolkit for reproducible information retrieval research built on Lucene - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package io.anserini.search.similarity; - -public class F2LogSimilarity extends AxiomaticSimilarity { - /** - * F2Log with the supplied parameter values. - * @param s Controls to what degree document length normalizes tf values. - * @throws IllegalArgumentException if {@code s} is infinite or if {@code s} is - * not within the range {@code [0..1]} - */ - public F2LogSimilarity(float s) { - super(s); - } - - /** F2Log with these default values: - *
    - *
  • {@code s = 0.5}
  • - *
- */ - public F2LogSimilarity() { - this(0.5f); - } - - @Override - float idf(long docFreq, long docCount) { - return (float) Math.log((1.0f + docCount) / docFreq); - } - - @Override - public String toString() { - return "F2Log(s=" + s +")"; - } -} diff --git a/src/main/java/io/anserini/search/similarity/RankLibSimilarity.java b/src/main/java/io/anserini/search/similarity/RankLibSimilarity.java index 5e2795b253..9adae2ed86 100644 --- a/src/main/java/io/anserini/search/similarity/RankLibSimilarity.java +++ b/src/main/java/io/anserini/search/similarity/RankLibSimilarity.java @@ -34,7 +34,7 @@ public long computeNorm(FieldInvertState fieldInvertState) { } @Override - public SimWeight computeWeight(CollectionStatistics collectionStatistics, TermStatistics... termStatisticses) { + public SimWeight computeWeight(float boost, CollectionStatistics collectionStatistics, TermStatistics... termStatistics) { return null; } diff --git a/src/main/resources/fine_tuning/models.yaml b/src/main/resources/fine_tuning/models.yaml index 28eb306881..c8c61ca44d 100644 --- a/src/main/resources/fine_tuning/models.yaml +++ b/src/main/resources/fine_tuning/models.yaml @@ -12,20 +12,20 @@ models: expected: robust04: map: - best_avg: 0.2496 - oracles_per_topic: 0.2703 - 2-fold: 0.2496 - 5-fold: 0.2481 + best_avg: 0.2514 + oracles_per_topic: 0.2721 + 2-fold: 0.2509 + 5-fold: 0.2486 P_20: - best_avg: 0.3572 + best_avg: 0.3610 oracles_per_topic: 0.4008 - 2-fold: 0.3543 - 5-fold: 0.3517 + 2-fold: 0.3610 + 5-fold: 0.3573 ndcg20: - best_avg: 0.40703 - oracles_per_topic: 0.45610 - 2-fold: 0.4042 - 5-fold: 0.4005 + best_avg: 0.41022 + oracles_per_topic: 0.45820 + 2-fold: 0.4083 + 5-fold: 0.4059 bm25: name: bm25 fixed_params: "-inmem -skipexists" @@ -43,20 +43,20 @@ models: expected: robust04: map: - best_avg: 0.2532 - oracles_per_topic: 0.2921 - 2-fold: 0.2526 - 5-fold: 0.2528 + best_avg: 0.2543 + oracles_per_topic: 0.2935 + 2-fold: 0.2539 + 5-fold: 0.2530 P_20: - best_avg: 0.3614 - oracles_per_topic: 0.4426 - 2-fold: 0.3604 - 5-fold: 0.3598 + best_avg: 0.3631 + oracles_per_topic: 0.4424 + 2-fold: 0.3594 + 5-fold: 0.3620 ndcg20: - best_avg: 0.41659 - oracles_per_topic: 0.5028 - 2-fold: 0.4108 - 5-fold: 0.4157 + best_avg: 0.41862 + oracles_per_topic: 0.5031 + 2-fold: 0.4144 + 5-fold: 0.4160 axiom: name: axiom fixed_params: "-inmem -skipexists -axiom.n 30 -axiom.deterministic -rerankCutoff 50" @@ -161,20 +161,20 @@ models: expected: robust04: map: - best_avg: 0.3009 - oracles_per_topic: 0.4158 - 2-fold: 0.2954 - 5-fold: 0.2991 + best_avg: 0.3017 + oracles_per_topic: 0.4153 + 2-fold: 0.2943 + 5-fold: 0.2982 P_20: - best_avg: 0.3994 - oracles_per_topic: 0.5731 - 2-fold: 0.3795 - 5-fold: 0.3901 + best_avg: 0.4016 + oracles_per_topic: 0.5709 + 2-fold: 0.3940 + 5-fold: 0.3947 ndcg20: - best_avg: 0.44631 - oracles_per_topic: 0.6335 - 2-fold: 0.4295 - 5-fold: 0.4348 + best_avg: 0.44877 + oracles_per_topic: 0.6332 + 2-fold: 0.4411 + 5-fold: 0.4412 bm25+rm3: name: rm3 fixed_params: "-inmem -skipexists -rerankCutoff 50" @@ -207,17 +207,17 @@ models: expected: robust04: map: - best_avg: 0.3020 - oracles_per_topic: 0.4402 - 2-fold: 0.2973 - 5-fold: 0.2956 + best_avg: 0.3058 + oracles_per_topic: 0.4375 + 2-fold: 0.2987 + 5-fold: 0.3033 P_20: - best_avg: 0.4012 - oracles_per_topic: 0.6054 - 2-fold: 0.3871 - 5-fold: 0.3931 + best_avg: 0.4024 + oracles_per_topic: 0.5994 + 2-fold: 0.3913 + 5-fold: 0.3937 ndcg20: - best_avg: 0.44958 - oracles_per_topic: 0.6702 - 2-fold: 0.4358 - 5-fold: 0.4402 + best_avg: 0.44979 + oracles_per_topic: 0.6653 + 2-fold: 0.4321 + 5-fold: 0.4426 diff --git a/src/main/resources/jdiq2018/models.yaml b/src/main/resources/jdiq2018/models.yaml index f30507edbc..a7d7a4a618 100644 --- a/src/main/resources/jdiq2018/models.yaml +++ b/src/main/resources/jdiq2018/models.yaml @@ -13,57 +13,57 @@ models: expected: disk12: map: - topics.51-100.txt: 0.2262 - topics.101-150.txt: 0.2062 - topics.151-200.txt: 0.2605 + topics.51-100.txt: 0.2274 + topics.101-150.txt: 0.2071 + topics.151-200.txt: 0.2614 robust04: map: - topics.robust04.301-450.601-700.txt: 0.2532 + topics.robust04.301-450.601-700.txt: 0.2543 robust05: map: - topics.robust05.txt: 0.2090 + topics.robust05.txt: 0.2097 core17: map: - topics.core17.txt: 0.2047 + topics.core17.txt: 0.2052 wt10g: map: - topics.451-550.txt: 0.2012 + topics.451-550.txt: 0.2005 gov2: map: - topics.701-750.txt: 0.2684 - topics.751-800.txt: 0.3392 - topics.801-850.txt: 0.3080 + topics.701-750.txt: 0.2702 + topics.751-800.txt: 0.3394 + topics.801-850.txt: 0.3085 cw09b: map: - topics.web.51-100.txt: 0.1147 - topics.web.101-150.txt: 0.1117 - topics.web.151-200.txt: 0.1202 + topics.web.51-100.txt: 0.1165 + topics.web.101-150.txt: 0.1104 + topics.web.151-200.txt: 0.1226 ndcg20: - topics.web.51-100.txt: 0.14592 - topics.web.101-150.txt: 0.19374 - topics.web.151-200.txt: 0.10379 + topics.web.51-100.txt: 0.14868 + topics.web.101-150.txt: 0.19270 + topics.web.151-200.txt: 0.10895 err20: - topics.web.51-100.txt: 0.07644 - topics.web.101-150.txt: 0.10229 - topics.web.151-200.txt: 0.14715 + topics.web.51-100.txt: 0.07743 + topics.web.101-150.txt: 0.09808 + topics.web.151-200.txt: 0.1524 cw12b13: map: - topics.web.201-250.txt: 0.0475 - topics.web.251-300.txt: 0.0238 + topics.web.201-250.txt: 0.0481 + topics.web.251-300.txt: 0.0237 ndcg20: - topics.web.201-250.txt: 0.13862 - topics.web.251-300.txt: 0.12366 + topics.web.201-250.txt: 0.13843 + topics.web.251-300.txt: 0.12466 err20: - topics.web.201-250.txt: 0.0959 - topics.web.251-300.txt: 0.12708 + topics.web.201-250.txt: 0.09928 + topics.web.251-300.txt: 0.12236 mb11: map: - topics.microblog2011.txt: 0.3683 + topics.microblog2011.txt: 0.3643 topics.microblog2012.txt: 0.2083 mb13: map: - topics.microblog2013.txt: 0.2599 - topics.microblog2014.txt: 0.4203 + topics.microblog2013.txt: 0.2600 + topics.microblog2014.txt: 0.4195 ql: params: mu: @@ -73,57 +73,57 @@ models: expected: disk12: map: - topics.51-100.txt: 0.2210 - topics.101-150.txt: 0.2017 - topics.151-200.txt: 0.2544 + topics.51-100.txt: 0.2226 + topics.101-150.txt: 0.2015 + topics.151-200.txt: 0.2558 robust04: map: - topics.robust04.301-450.601-700.txt: 0.2496 + topics.robust04.301-450.601-700.txt: 0.2514 robust05: map: - topics.robust05.txt: 0.2026 + topics.robust05.txt: 0.2030 core17: map: - topics.core17.txt: 0.1951 + topics.core17.txt: 0.1943 wt10g: map: - topics.451-550.txt: 0.2034 + topics.451-550.txt: 0.2021 gov2: map: - topics.701-750.txt: 0.2636 - topics.751-800.txt: 0.3267 - topics.801-850.txt: 0.2957 + topics.701-750.txt: 0.2700 + topics.751-800.txt: 0.3303 + topics.801-850.txt: 0.3013 cw09b: map: - topics.web.51-100.txt: 0.104 - topics.web.101-150.txt: 0.1002 - topics.web.151-200.txt: 0.1091 + topics.web.51-100.txt: 0.1060 + topics.web.101-150.txt: 0.1004 + topics.web.151-200.txt: 0.1113 ndcg20: - topics.web.51-100.txt: 0.11701 - topics.web.101-150.txt: 0.16868 - topics.web.151-200.txt: 0.09965 + topics.web.51-100.txt: 0.11845 + topics.web.101-150.txt: 0.17012 + topics.web.151-200.txt: 0.09778 err20: - topics.web.51-100.txt: 0.06455 - topics.web.101-150.txt: 0.08608 - topics.web.151-200.txt: 0.14886 + topics.web.51-100.txt: 0.06431 + topics.web.101-150.txt: 0.08684 + topics.web.151-200.txt: 0.14839 cw12b13: map: - topics.web.201-250.txt: 0.0392 - topics.web.251-300.txt: 0.0241 + topics.web.201-250.txt: 0.0398 + topics.web.251-300.txt: 0.0246 ndcg20: - topics.web.201-250.txt: 0.1168 - topics.web.251-300.txt: 0.11883 + topics.web.201-250.txt: 0.11675 + topics.web.251-300.txt: 0.12088 err20: - topics.web.201-250.txt: 0.0883 - topics.web.251-300.txt: 0.1088 + topics.web.201-250.txt: 0.08977 + topics.web.251-300.txt: 0.1108 mb11: map: - topics.microblog2011.txt: 0.3635 - topics.microblog2012.txt: 0.2120 + topics.microblog2011.txt: 0.3607 + topics.microblog2012.txt: 0.2121 mb13: map: - topics.microblog2013.txt: 0.2613 - topics.microblog2014.txt: 0.4201 + topics.microblog2013.txt: 0.2615 + topics.microblog2014.txt: 0.4200 pl2: params: pl2.c: @@ -133,57 +133,57 @@ models: expected: disk12: map: - topics.51-100.txt: 0.2213 - topics.101-150.txt: 0.1952 - topics.151-200.txt: 0.2524 + topics.51-100.txt: 0.2226 + topics.101-150.txt: 0.1967 + topics.151-200.txt: 0.2544 robust04: map: - topics.robust04.301-450.601-700.txt: 0.2521 + topics.robust04.301-450.601-700.txt: 0.2531 robust05: map: - topics.robust05.txt: 0.2006 + topics.robust05.txt: 0.2021 core17: map: - topics.core17.txt: 0.2005 + topics.core17.txt: 0.2019 wt10g: map: - topics.451-550.txt: 0.1889 + topics.451-550.txt: 0.1880 gov2: map: - topics.701-750.txt: 0.2696 - topics.751-800.txt: 0.3428 - topics.801-850.txt: 0.3084 + topics.701-750.txt: 0.2726 + topics.751-800.txt: 0.3439 + topics.801-850.txt: 0.3088 cw09b: map: - topics.web.51-100.txt: 0.1085 - topics.web.101-150.txt: 0.1075 - topics.web.151-200.txt: 0.1135 + topics.web.51-100.txt: 0.1103 + topics.web.101-150.txt: 0.1067 + topics.web.151-200.txt: 0.1170 ndcg20: - topics.web.51-100.txt: 0.12131 - topics.web.101-150.txt: 0.17742 - topics.web.151-200.txt: 0.09281 + topics.web.51-100.txt: 0.12168 + topics.web.101-150.txt: 0.17652 + topics.web.151-200.txt: 0.09274 err20: - topics.web.51-100.txt: 0.06348 - topics.web.101-150.txt: 0.09095 - topics.web.151-200.txt: 0.14314 + topics.web.51-100.txt: 0.06346 + topics.web.101-150.txt: 0.08923 + topics.web.151-200.txt: 0.14389 cw12b13: map: - topics.web.201-250.txt: 0.0416 - topics.web.251-300.txt: 0.0239 + topics.web.201-250.txt: 0.0419 + topics.web.251-300.txt: 0.0242 ndcg20: - topics.web.201-250.txt: 0.12392 - topics.web.251-300.txt: 0.11768 + topics.web.201-250.txt: 0.12465 + topics.web.251-300.txt: 0.12127 err20: - topics.web.201-250.txt: 0.09066 - topics.web.251-300.txt: 0.10751 + topics.web.201-250.txt: 0.09331 + topics.web.251-300.txt: 0.11086 mb11: map: - topics.microblog2011.txt: 0.3572 - topics.microblog2012.txt: 0.2032 + topics.microblog2011.txt: 0.3537 + topics.microblog2012.txt: 0.2046 mb13: map: - topics.microblog2013.txt: 0.2519 - topics.microblog2014.txt: 0.4115 + topics.microblog2013.txt: 0.2524 + topics.microblog2014.txt: 0.4132 spl: params: spl.c: @@ -193,57 +193,57 @@ models: expected: disk12: map: - topics.51-100.txt: 0.2189 - topics.101-150.txt: 0.1819 - topics.151-200.txt: 0.2448 + topics.51-100.txt: 0.2201 + topics.101-150.txt: 0.1840 + topics.151-200.txt: 0.2459 robust04: map: - topics.robust04.301-450.601-700.txt: 0.2502 + topics.robust04.301-450.601-700.txt: 0.2509 robust05: map: - topics.robust05.txt: 0.1969 + topics.robust05.txt: 0.1980 core17: map: - topics.core17.txt: 0.1981 + topics.core17.txt: 0.1999 wt10g: map: - topics.451-550.txt: 0.1726 + topics.451-550.txt: 0.1704 gov2: map: - topics.701-750.txt: 0.2687 - topics.751-800.txt: 0.3386 - topics.801-850.txt: 0.3140 + topics.701-750.txt: 0.2734 + topics.751-800.txt: 0.3393 + topics.801-850.txt: 0.3139 cw09b: map: - topics.web.51-100.txt: 0.1077 - topics.web.101-150.txt: 0.1066 - topics.web.151-200.txt: 0.1131 + topics.web.51-100.txt: 0.1099 + topics.web.101-150.txt: 0.1063 + topics.web.151-200.txt: 0.1163 ndcg20: - topics.web.51-100.txt: 0.12324 - topics.web.101-150.txt: 0.17621 - topics.web.151-200.txt: 0.09311 + topics.web.51-100.txt: 0.12515 + topics.web.101-150.txt: 0.17576 + topics.web.151-200.txt: 0.09332 err20: - topics.web.51-100.txt: 0.06653 - topics.web.101-150.txt: 0.09082 - topics.web.151-200.txt: 0.14348 + topics.web.51-100.txt: 0.06589 + topics.web.101-150.txt: 0.08926 + topics.web.151-200.txt: 0.14448 cw12b13: map: - topics.web.201-250.txt: 0.0412 - topics.web.251-300.txt: 0.0238 + topics.web.201-250.txt: 0.0418 + topics.web.251-300.txt: 0.0240 ndcg20: - topics.web.201-250.txt: 0.12534 - topics.web.251-300.txt: 0.11788 + topics.web.201-250.txt: 0.12579 + topics.web.251-300.txt: 0.12128 err20: - topics.web.201-250.txt: 0.09046 - topics.web.251-300.txt: 0.109 + topics.web.201-250.txt: 0.09396 + topics.web.251-300.txt: 0.11347 mb11: map: - topics.microblog2011.txt: 0.3601 - topics.microblog2012.txt: 0.2050 + topics.microblog2011.txt: 0.3567 + topics.microblog2012.txt: 0.2055 mb13: map: - topics.microblog2013.txt: 0.2536 - topics.microblog2014.txt: 0.4132 + topics.microblog2013.txt: 0.2530 + topics.microblog2014.txt: 0.4147 f2exp: params: f2exp.s: @@ -253,57 +253,57 @@ models: expected: disk12: map: - topics.51-100.txt: 0.2216 - topics.101-150.txt: 0.1997 - topics.151-200.txt: 0.2474 + topics.51-100.txt: 0.2245 + topics.101-150.txt: 0.2035 + topics.151-200.txt: 0.2512 robust04: map: - topics.robust04.301-450.601-700.txt: 0.2491 + topics.robust04.301-450.601-700.txt: 0.2516 robust05: map: - topics.robust05.txt: 0.1960 + topics.robust05.txt: 0.1998 core17: map: - topics.core17.txt: 0.1986 + topics.core17.txt: 0.2005 wt10g: map: - topics.451-550.txt: 0.1972 + topics.451-550.txt: 0.1996 gov2: map: - topics.701-750.txt: 0.2535 - topics.751-800.txt: 0.3156 - topics.801-850.txt: 0.2845 + topics.701-750.txt: 0.2592 + topics.751-800.txt: 0.3195 + topics.801-850.txt: 0.2900 cw09b: map: - topics.web.51-100.txt: 0.1067 - topics.web.101-150.txt: 0.1067 - topics.web.151-200.txt: 0.1042 + topics.web.51-100.txt: 0.1111 + topics.web.101-150.txt: 0.1081 + topics.web.151-200.txt: 0.1089 ndcg20: - topics.web.51-100.txt: 0.13895 - topics.web.101-150.txt: 0.18424 - topics.web.151-200.txt: 0.08933 + topics.web.51-100.txt: 0.14176 + topics.web.101-150.txt: 0.18778 + topics.web.151-200.txt: 0.09333 err20: - topics.web.51-100.txt: 0.07512 - topics.web.101-150.txt: 0.09258 - topics.web.151-200.txt: 0.12932 + topics.web.51-100.txt: 0.07756 + topics.web.101-150.txt: 0.09354 + topics.web.151-200.txt: 0.13872 cw12b13: map: - topics.web.201-250.txt: 0.0434 - topics.web.251-300.txt: 0.0201 + topics.web.201-250.txt: 0.0450 + topics.web.251-300.txt: 0.0205 ndcg20: - topics.web.201-250.txt: 0.12254 - topics.web.251-300.txt: 0.11349 + topics.web.201-250.txt: 0.12218 + topics.web.251-300.txt: 0.11593 err20: - topics.web.201-250.txt: 0.08114 - topics.web.251-300.txt: 0.11991 + topics.web.201-250.txt: 0.07970 + topics.web.251-300.txt: 0.12031 mb11: map: - topics.microblog2011.txt: 0.3770 - topics.microblog2012.txt: 0.2098 + topics.microblog2011.txt: 0.3769 + topics.microblog2012.txt: 0.2107 mb13: map: - topics.microblog2013.txt: 0.2541 - topics.microblog2014.txt: 0.3844 + topics.microblog2013.txt: 0.2531 + topics.microblog2014.txt: 0.3854 f2log: params: f2log.s: @@ -313,55 +313,55 @@ models: expected: disk12: map: - topics.51-100.txt: 0.2230 - topics.101-150.txt: 0.1992 - topics.151-200.txt: 0.2531 + topics.51-100.txt: 0.2260 + topics.101-150.txt: 0.2031 + topics.151-200.txt: 0.2571 robust04: map: - topics.robust04.301-450.601-700.txt: 0.2500 + topics.robust04.301-450.601-700.txt: 0.2523 robust05: map: - topics.robust05.txt: 0.1976 + topics.robust05.txt: 0.2023 core17: map: - topics.core17.txt: 0.2041 + topics.core17.txt: 0.2050 wt10g: map: - topics.451-550.txt: 0.1923 + topics.451-550.txt: 0.1938 gov2: map: - topics.701-750.txt: 0.2627 - topics.751-800.txt: 0.3298 - topics.801-850.txt: 0.2970 + topics.701-750.txt: 0.2689 + topics.751-800.txt: 0.3342 + topics.801-850.txt: 0.3026 cw09b: map: - topics.web.51-100.txt: 0.107 - topics.web.101-150.txt: 0.1108 - topics.web.151-200.txt: 0.1046 + topics.web.51-100.txt: 0.1110 + topics.web.101-150.txt: 0.1104 + topics.web.151-200.txt: 0.1091 ndcg20: - topics.web.51-100.txt: 0.13495 - topics.web.101-150.txt: 0.19114 - topics.web.151-200.txt: 0.09591 + topics.web.51-100.txt: 0.13763 + topics.web.101-150.txt: 0.19169 + topics.web.151-200.txt: 0.09859 err20: - topics.web.51-100.txt: 0.07234 - topics.web.101-150.txt: 0.09381 - topics.web.151-200.txt: 0.14312 + topics.web.51-100.txt: 0.07245 + topics.web.101-150.txt: 0.09435 + topics.web.151-200.txt: 0.15240 cw12b13: map: - topics.web.201-250.txt: 0.0446 - topics.web.251-300.txt: 0.0212 + topics.web.201-250.txt: 0.0454 + topics.web.251-300.txt: 0.0213 ndcg20: - topics.web.201-250.txt: 0.12442 - topics.web.251-300.txt: 0.11743 + topics.web.201-250.txt: 0.12473 + topics.web.251-300.txt: 0.11891 err20: - topics.web.201-250.txt: 0.08356 - topics.web.251-300.txt: 0.12344 + topics.web.201-250.txt: 0.08210 + topics.web.251-300.txt: 0.12094 mb11: map: topics.microblog2011.txt: 0.3823 - topics.microblog2012.txt: 0.2018 + topics.microblog2012.txt: 0.2033 mb13: map: topics.microblog2013.txt: 0.2622 - topics.microblog2014.txt: 0.4104 + topics.microblog2014.txt: 0.4121 diff --git a/src/main/resources/regression/cacm.yaml b/src/main/resources/regression/cacm.yaml index 1aba4802a9..eda72978cb 100644 --- a/src/main/resources/regression/cacm.yaml +++ b/src/main/resources/regression/cacm.yaml @@ -52,18 +52,18 @@ models: - -bm25 results: map: - - 0.3102 + - 0.3123 p30: - - 0.1936 + - 0.1942 - name: bm25+rm3 params: - -bm25 - -rm3 results: map: - - 0.3698 + - 0.3688 p30: - - 0.2301 + - 0.2295 - name: bm25+ax params: - -bm25 @@ -72,9 +72,9 @@ models: - -axiom.deterministic results: map: - - 0.3002 + - 0.3077 p30: - - 0.1974 + - 0.1955 - name: ql params: - -ql @@ -89,9 +89,9 @@ models: - -rm3 results: map: - - 0.3768 + - 0.3818 p30: - - 0.2250 + - 0.2237 - name: ql+ax params: - -ql @@ -100,6 +100,6 @@ models: - -axiom.deterministic results: map: - - 0.2894 + - 0.2907 p30: - - 0.1795 + - 0.1840 diff --git a/src/main/resources/regression/car17.yaml b/src/main/resources/regression/car17.yaml index 737bbfa0f1..23cbcd6e7b 100644 --- a/src/main/resources/regression/car17.yaml +++ b/src/main/resources/regression/car17.yaml @@ -50,18 +50,18 @@ models: - -bm25 results: map: - - 0.1650 + - 0.1689 recip_rank: - - 0.2270 + - 0.2321 - name: bm25+rm3 params: - -bm25 - -rm3 results: map: - - 0.1343 + - 0.1386 recip_rank: - - 0.1852 + - 0.1907 - name: bm25+ax params: - -bm25 @@ -70,15 +70,15 @@ models: - -axiom.deterministic results: map: - - 0.1318 + - 0.1355 recip_rank: - - 0.1817 + - 0.1857 - name: ql params: - -ql results: map: - - 0.1515 + - 0.1516 recip_rank: - 0.2085 - name: ql+rm3 @@ -87,9 +87,9 @@ models: - -rm3 results: map: - - 0.1211 + - 0.1198 recip_rank: - - 0.1672 + - 0.1653 - name: ql+ax params: - -ql @@ -98,6 +98,6 @@ models: - -axiom.deterministic results: map: - - 0.1083 + - 0.1082 recip_rank: - - 0.1503 + - 0.1501 diff --git a/src/main/resources/regression/core17.yaml b/src/main/resources/regression/core17.yaml index dd1707deb8..42ca9c6f26 100644 --- a/src/main/resources/regression/core17.yaml +++ b/src/main/resources/regression/core17.yaml @@ -50,18 +50,18 @@ models: - -bm25 results: map: - - 0.1996 + - 0.1977 p30: - - 0.4207 + - 0.4160 - name: bm25+rm3 params: - -bm25 - -rm3 results: map: - - 0.2639 + - 0.2596 p30: - - 0.4880 + - 0.4820 - name: bm25+ax params: - -bm25 @@ -70,26 +70,26 @@ models: - -axiom.deterministic results: map: - - 0.2719 + - 0.2700 p30: - - 0.4900 + - 0.4927 - name: ql params: - -ql results: map: - - 0.1928 + - 0.1913 p30: - - 0.4327 + - 0.4373 - name: ql+rm3 params: - -ql - -rm3 results: map: - - 0.2427 + - 0.2405 p30: - - 0.4640 + - 0.4580 - name: ql+ax params: - -ql @@ -98,6 +98,6 @@ models: - -axiom.deterministic results: map: - - 0.2498 + - 0.2514 p30: - - 0.4813 + - 0.4827 diff --git a/src/main/resources/regression/core18.yaml b/src/main/resources/regression/core18.yaml index 4f363b1a79..7ad9953fae 100644 --- a/src/main/resources/regression/core18.yaml +++ b/src/main/resources/regression/core18.yaml @@ -50,18 +50,18 @@ models: - -bm25 results: map: - - 0.2487 + - 0.2491 p30: - - 0.3640 + - 0.3580 - name: bm25+rm3 params: - -bm25 - -rm3 results: map: - - 0.2911 + - 0.2952 p30: - - 0.4087 + - 0.4200 - name: bm25+ax params: - -bm25 @@ -70,26 +70,26 @@ models: - -axiom.deterministic results: map: - - 0.2919 + - 0.2921 p30: - - 0.4033 + - 0.4007 - name: ql params: - -ql results: map: - - 0.2504 + - 0.2522 p30: - - 0.3620 + - 0.3627 - name: ql+rm3 params: - -ql - -rm3 results: map: - - 0.2754 + - 0.2759 p30: - - 0.3773 + - 0.3753 - name: ql+ax params: - -ql @@ -98,6 +98,6 @@ models: - -axiom.deterministic results: map: - - 0.2976 + - 0.2975 p30: - - 0.4067 + - 0.4073 diff --git a/src/main/resources/regression/cw09b.yaml b/src/main/resources/regression/cw09b.yaml index bb018d4af7..e121457ac8 100644 --- a/src/main/resources/regression/cw09b.yaml +++ b/src/main/resources/regression/cw09b.yaml @@ -68,42 +68,42 @@ models: - -bm25 results: map: + - 0.1126 - 0.1094 - - 0.1095 - - 0.1072 + - 0.1106 p30: - - 0.2653 - - 0.2540 - - 0.2180 + - 0.2681 + - 0.2513 + - 0.2167 ndcg20: - - 0.13280 - - 0.19143 - - 0.09764 + - 0.13539 + - 0.18901 + - 0.10141 err20: - - 0.07167 - - 0.09470 - - 0.13823 + - 0.07335 + - 0.09592 + - 0.13036 - name: bm25+rm3 params: - -bm25 - -rm3 results: map: - - 0.1075 - - 0.1146 - - 0.1318 + - 0.1171 + - 0.1142 + - 0.1382 p30: - - 0.2604 - - 0.2713 - - 0.2387 + - 0.2819 + - 0.2700 + - 0.2473 ndcg20: - - 0.14228 - - 0.18613 - - 0.13083 + - 0.15446 + - 0.18227 + - 0.13294 err20: - - 0.07842 - - 0.10805 - - 0.21787 + - 0.08653 + - 0.10422 + - 0.22241 - name: bm25+ax params: - -bm25 @@ -113,62 +113,62 @@ models: - -axiom.beta 0.1 results: map: - - 0.0966 - - 0.0996 - - 0.1242 + - 0.0928 + - 0.0974 + - 0.1315 p30: - - 0.2521 - - 0.2420 - - 0.2313 + - 0.2354 + - 0.2393 + - 0.2553 ndcg20: - - 0.17151 - - 0.18775 - - 0.11873 + - 0.16375 + - 0.18330 + - 0.14413 err20: - - 0.10073 - - 0.10645 - - 0.19208 + - 0.09815 + - 0.10909 + - 0.23554 - name: ql params: - -ql results: map: - - 0.1027 - - 0.0971 - - 0.1035 + - 0.1060 + - 0.0958 + - 0.1069 p30: - - 0.2417 - - 0.2220 - - 0.2013 + - 0.2431 + - 0.2147 + - 0.2080 ndcg20: - - 0.11319 - - 0.16347 - - 0.08620 + - 0.11431 + - 0.16192 + - 0.08682 err20: - - 0.05863 - - 0.08419 - - 0.13155 + - 0.05994 + - 0.08487 + - 0.13052 - name: ql+rm3 params: - -ql - -rm3 results: map: - - 0.1060 - - 0.0961 - - 0.1132 + - 0.1117 + - 0.0964 + - 0.1167 p30: - - 0.2507 - - 0.2207 - - 0.2040 + - 0.2611 + - 0.2147 + - 0.2053 ndcg20: - - 0.13136 - - 0.16075 - - 0.10750 + - 0.13618 + - 0.15199 + - 0.10590 err20: - - 0.06493 - - 0.09210 - - 0.15740 + - 0.06486 + - 0.08655 + - 0.14750 - name: ql+ax params: - -ql @@ -178,18 +178,18 @@ models: - -axiom.beta 0.1 results: map: - - 0.1088 - - 0.0914 - - 0.1215 + - 0.1086 + - 0.0879 + - 0.1212 p30: - 0.2618 - - 0.2267 - - 0.2100 + - 0.2167 + - 0.2140 ndcg20: - - 0.14695 - - 0.15916 - - 0.10551 + - 0.14541 + - 0.15091 + - 0.10296 err20: - - 0.08023 - - 0.08791 - - 0.15829 + - 0.07424 + - 0.08203 + - 0.15575 diff --git a/src/main/resources/regression/cw12.yaml b/src/main/resources/regression/cw12.yaml index 532f1894e5..219e7b7c38 100644 --- a/src/main/resources/regression/cw12.yaml +++ b/src/main/resources/regression/cw12.yaml @@ -65,64 +65,65 @@ models: - -bm25 results: map: - - 0.1673 - - 0.2432 + - 0.1695 + - 0.2469 p30: - - 0.2827 - - 0.4500 + - 0.2767 + - 0.4533 ndcg20: - - 0.20662 - - 0.26458 + - 0.20858 + - 0.25776 err20: - - 0.12126 - - 0.17373 + - 0.12835 + - 0.16305 - name: bm25+rm3 params: - -bm25 - -rm3 results: map: - - 0.1489 - - 0.2468 + - 0.1498 + - 0.2496 p30: - - 0.2347 - - 0.4200 + - 0.2407 + - 0.4180 ndcg20: - - 0.17566 - - 0.24349 + - 0.18362 + - 0.24303 err20: - - 0.09148 - - 0.17411 + - 0.09742 + - 0.17134 - name: ql params: - -ql results: map: - - 0.1438 - - 0.2401 + - 0.1493 + - 0.2467 p30: - - 0.2507 - - 0.4367 + - 0.2613 + - 0.4380 ndcg20: - - 0.19046 - - 0.23273 + - 0.19935 + - 0.22282 err20: - - 0.11694 - - 0.14512 + - 0.12319 + - 0.13211 - name: ql+rm3 params: - -ql - -rm3 results: map: - - 0.1235 - - 0.2331 + - 0.1280 + - 0.2383 p30: - - 0.2047 - - 0.4013 + - 0.2207 + - 0.4107 ndcg20: - - 0.15570 - - 0.21679 + - 0.16115 + - 0.22580 err20: - - 0.08588 - - 0.13441 + - 0.09129 + - 0.14066 + diff --git a/src/main/resources/regression/cw12b13.yaml b/src/main/resources/regression/cw12b13.yaml index 0e6b636389..de5a60229b 100644 --- a/src/main/resources/regression/cw12b13.yaml +++ b/src/main/resources/regression/cw12b13.yaml @@ -65,34 +65,34 @@ models: - -bm25 results: map: - - 0.0457 - - 0.0219 + - 0.0468 + - 0.0224 p30: - - 0.2000 - - 0.1293 + - 0.2113 + - 0.1273 ndcg20: - - 0.12419 - - 0.11900 + - 0.12862 + - 0.11849 err20: - - 0.08205 - - 0.12373 + - 0.08379 + - 0.12013 - name: bm25+rm3 params: - -bm25 - -rm3 results: map: - - 0.0440 - - 0.0192 + - 0.0450 + - 0.0189 p30: - - 0.1767 - - 0.1113 + - 0.1787 + - 0.1133 ndcg20: - - 0.11974 - - 0.10017 + - 0.12284 + - 0.10124 err20: - - 0.07771 - - 0.10139 + - 0.08793 + - 0.10390 - name: bm25+ax params: - -bm25 @@ -102,50 +102,50 @@ models: - -axiom.beta 0.1 results: map: - - 0.0411 - - 0.0177 + - 0.0435 + - 0.0180 p30: - - 0.1800 - - 0.1173 + - 0.1840 + - 0.1107 ndcg20: - - 0.12449 - - 0.09690 + - 0.12875 + - 0.09637 err20: - - 0.09151 - - 0.09588 + - 0.09430 + - 0.09289 - name: ql params: - -ql results: map: - - 0.0389 - - 0.0228 + - 0.0397 + - 0.0235 p30: - - 0.1720 - - 0.1313 + - 0.1767 + - 0.1373 ndcg20: - - 0.11584 - - 0.11327 + - 0.11067 + - 0.11765 err20: - - 0.07636 - - 0.10398 + - 0.07689 + - 0.10908 - name: ql+rm3 params: - -ql - -rm3 results: map: - - 0.0314 - - 0.0202 + - 0.0319 + - 0.0205 p30: - - 0.1420 - - 0.1160 + - 0.1373 + - 0.1173 ndcg20: - - 0.08515 - - 0.09591 + - 0.08799 + - 0.10240 err20: - - 0.05106 - - 0.09114 + - 0.05681 + - 0.10356 - name: ql+ax params: - -ql @@ -155,14 +155,14 @@ models: - -axiom.beta 0.1 results: map: - - 0.0354 - - 0.0189 + - 0.0359 + - 0.0186 p30: - 0.1513 - - 0.1180 + - 0.1167 ndcg20: - - 0.11169 - - 0.09989 + - 0.11435 + - 0.10013 err20: - - 0.07054 - - 0.09945 + - 0.07800 + - 0.08965 diff --git a/src/main/resources/regression/disk12.yaml b/src/main/resources/regression/disk12.yaml index 78f7ce87df..e1997ecd22 100644 --- a/src/main/resources/regression/disk12.yaml +++ b/src/main/resources/regression/disk12.yaml @@ -56,12 +56,12 @@ models: - -bm25 results: map: - - 0.2254 - - 0.2003 - - 0.2571 + - 0.2273 + - 0.2010 + - 0.2580 p30: - - 0.4493 - - 0.4213 + - 0.4533 + - 0.4280 - 0.4740 - name: bm25+rm3 params: @@ -69,13 +69,13 @@ models: - -rm3 results: map: - - 0.2607 - - 0.2579 - - 0.3224 + - 0.2617 + - 0.2600 + - 0.3227 p30: - - 0.4813 + - 0.4867 - 0.4580 - - 0.5100 + - 0.5040 - name: bm25+ax params: - -bm25 @@ -84,38 +84,38 @@ models: - -axiom.deterministic results: map: - - 0.2675 - - 0.2708 - - 0.3349 + - 0.2640 + - 0.2722 + - 0.3318 p30: - - 0.5167 - - 0.4787 - - 0.5160 + - 0.5067 + - 0.4753 + - 0.5100 - name: ql params: - -ql results: map: - - 0.2188 - - 0.2013 - - 0.2530 + - 0.2189 + - 0.2015 + - 0.2518 p30: - - 0.4453 - - 0.4153 - - 0.4647 + - 0.4520 + - 0.4207 + - 0.4580 - name: ql+rm3 params: - -ql - -rm3 results: map: - - 0.2500 - - 0.2475 - - 0.3019 + - 0.2478 + - 0.2485 + - 0.2996 p30: - - 0.4687 - - 0.4427 - - 0.5013 + - 0.4653 + - 0.4453 + - 0.4933 - name: ql+ax params: - -ql @@ -124,10 +124,10 @@ models: - -axiom.deterministic results: map: - - 0.2519 - - 0.2606 - - 0.3113 + - 0.2501 + - 0.2593 + - 0.3103 p30: - - 0.4967 - - 0.4660 - - 0.5160 + - 0.4953 + - 0.4740 + - 0.5167 diff --git a/src/main/resources/regression/gov2.yaml b/src/main/resources/regression/gov2.yaml index 126c4a68eb..9173dd1bfe 100644 --- a/src/main/resources/regression/gov2.yaml +++ b/src/main/resources/regression/gov2.yaml @@ -56,26 +56,26 @@ models: - -bm25 results: map: - - 0.2673 - - 0.3366 - - 0.3055 + - 0.2689 + - 0.3390 + - 0.3080 p30: - - 0.4837 - - 0.5520 - - 0.4900 + - 0.4864 + - 0.5540 + - 0.4907 - name: bm25+rm3 params: - -bm25 - -rm3 results: map: - - 0.2974 - - 0.3846 - - 0.3438 + - 0.2943 + - 0.3800 + - 0.3356 p30: - - 0.5347 - - 0.5960 - - 0.5227 + - 0.5313 + - 0.5873 + - 0.5160 - name: bm25+ax params: - -bm25 @@ -85,38 +85,38 @@ models: - -axiom.deterministic results: map: - - 0.2735 - - 0.3669 - - 0.3061 + - 0.2665 + - 0.3664 + - 0.3069 p30: - - 0.5082 - - 0.5947 - - 0.5007 + - 0.4986 + - 0.5933 + - 0.5033 - name: ql params: - -ql results: map: - - 0.2636 - - 0.3264 - - 0.2957 + - 0.2681 + - 0.3303 + - 0.2996 p30: - - 0.4667 - - 0.5160 - - 0.4753 + - 0.4755 + - 0.5347 + - 0.4720 - name: ql+rm3 params: - -ql - -rm3 results: map: - - 0.2770 - - 0.3610 - - 0.3160 + - 0.2806 + - 0.3628 + - 0.3173 p30: - - 0.4878 - - 0.5673 - - 0.4853 + - 0.4952 + - 0.5720 + - 0.4773 - name: ql+ax params: - -ql @@ -126,10 +126,10 @@ models: - -axiom.deterministic results: map: - - 0.2638 - - 0.3670 - - 0.3112 + - 0.2666 + - 0.3646 + - 0.3084 p30: - - 0.4837 - - 0.5880 - - 0.5007 + - 0.4932 + - 0.5840 + - 0.4920 diff --git a/src/main/resources/regression/mb11.yaml b/src/main/resources/regression/mb11.yaml index 225b46f02e..b9d5085184 100644 --- a/src/main/resources/regression/mb11.yaml +++ b/src/main/resources/regression/mb11.yaml @@ -58,11 +58,11 @@ models: - -bm25 results: map: - - 0.3351 - - 0.1912 + - 0.3384 + - 0.1948 p30: - - 0.3837 - - 0.3328 + - 0.3959 + - 0.3316 - name: bm25+rm3 params: - -searchtweets @@ -70,11 +70,11 @@ models: - -rm3 results: map: - - 0.3477 - - 0.2055 + - 0.3621 + - 0.2124 p30: - - 0.4027 - - 0.3424 + - 0.4088 + - 0.3463 - name: bm25+ax params: - -searchtweets @@ -85,22 +85,22 @@ models: - -axiom.deterministic results: map: - - 0.4042 - - 0.2310 + - 0.4008 + - 0.2309 p30: - - 0.4558 - - 0.3588 + - 0.4612 + - 0.3554 - name: ql params: - -searchtweets - -ql results: map: - - 0.3614 - - 0.2100 + - 0.3584 + - 0.2102 p30: - - 0.4095 - - 0.3322 + - 0.4061 + - 0.3333 - name: ql+rm3 params: - -searchtweets @@ -108,11 +108,11 @@ models: - -rm3 results: map: - - 0.4093 - - 0.2412 + - 0.4097 + - 0.2397 p30: - 0.4483 - - 0.3542 + - 0.3571 - name: ql+ax params: - -searchtweets @@ -123,8 +123,8 @@ models: - -axiom.deterministic results: map: - - 0.4179 - - 0.2502 + - 0.4201 + - 0.2474 p30: - - 0.4367 - - 0.3864 + - 0.4408 + - 0.3842 diff --git a/src/main/resources/regression/mb13.yaml b/src/main/resources/regression/mb13.yaml index 926476e84e..df6175c273 100644 --- a/src/main/resources/regression/mb13.yaml +++ b/src/main/resources/regression/mb13.yaml @@ -59,11 +59,11 @@ models: - -bm25 results: map: - - 0.2306 - - 0.3836 + - 0.2371 + - 0.3931 p30: - - 0.4222 - - 0.6176 + - 0.4339 + - 0.6212 - name: bm25+rm3 params: - -searchtweets @@ -71,11 +71,11 @@ models: - -rm3 results: map: - - 0.2356 - - 0.4036 + - 0.2440 + - 0.4158 p30: - - 0.4044 - - 0.6061 + - 0.4350 + - 0.6236 - name: bm25+ax params: - -searchtweets @@ -86,22 +86,22 @@ models: - -axiom.deterministic results: map: - - 0.2770 - - 0.4673 + - 0.2855 + - 0.4796 p30: - - 0.4611 - - 0.6479 + - 0.4728 + - 0.6648 - name: ql params: - -searchtweets - -ql results: map: - - 0.2599 - - 0.4184 + - 0.2602 + - 0.4181 p30: - - 0.4517 - - 0.6424 + - 0.4561 + - 0.6430 - name: ql+rm3 params: - -searchtweets @@ -109,11 +109,11 @@ models: - -rm3 results: map: - - 0.2796 - - 0.4763 + - 0.2815 + - 0.4746 p30: - - 0.4600 - - 0.6606 + - 0.4672 + - 0.6594 - name: ql+ax params: - -searchtweets @@ -124,8 +124,8 @@ models: - -axiom.deterministic results: map: - - 0.3167 - - 0.4943 + - 0.3152 + - 0.4965 p30: - - 0.5117 - - 0.6770 + - 0.5078 + - 0.6727 diff --git a/src/main/resources/regression/robust04.yaml b/src/main/resources/regression/robust04.yaml index 921789114b..55844ce23b 100644 --- a/src/main/resources/regression/robust04.yaml +++ b/src/main/resources/regression/robust04.yaml @@ -51,18 +51,18 @@ models: - -bm25 results: map: - - 0.2501 + - 0.2531 p30: - - 0.3123 + - 0.3102 - name: bm25+rm3 params: - -bm25 - -rm3 results: map: - - 0.2759 + - 0.2778 p30: - - 0.3252 + - 0.3288 - name: bm25+ax params: - -bm25 @@ -71,26 +71,26 @@ models: - -axiom.deterministic results: map: - - 0.2860 + - 0.2895 p30: - - 0.3339 + - 0.3333 - name: ql params: - -ql results: map: - - 0.2468 + - 0.2467 p30: - - 0.3083 + - 0.3079 - name: ql+rm3 params: - -ql - -rm3 results: map: - - 0.2643 + - 0.2649 p30: - - 0.3138 + - 0.3171 - name: ql+ax params: - -ql @@ -99,6 +99,6 @@ models: - -axiom.deterministic results: map: - - 0.2775 + - 0.2774 p30: - - 0.3233 + - 0.3229 diff --git a/src/main/resources/regression/robust05.yaml b/src/main/resources/regression/robust05.yaml index 4e9f2c14ba..901d84fa48 100644 --- a/src/main/resources/regression/robust05.yaml +++ b/src/main/resources/regression/robust05.yaml @@ -51,18 +51,18 @@ models: - -bm25 results: map: - - 0.2003 + - 0.2031 p30: - - 0.3660 + - 0.3693 - name: bm25+rm3 params: - -bm25 - -rm3 results: map: - - 0.2517 + - 0.2523 p30: - - 0.3913 + - 0.4007 - name: bm25+ax params: - -bm25 @@ -71,26 +71,26 @@ models: - -axiom.deterministic results: map: - - 0.2528 + - 0.2584 p30: - - 0.4007 + - 0.4120 - name: ql params: - -ql results: map: - - 0.2026 + - 0.2028 p30: - - 0.3713 + - 0.3653 - name: ql+rm3 params: - -ql - -rm3 results: map: - - 0.2474 + - 0.2466 p30: - - 0.4020 + - 0.4067 - name: ql+ax params: - -ql @@ -99,6 +99,6 @@ models: - -axiom.deterministic results: map: - - 0.2501 + - 0.2476 p30: - - 0.4080 + - 0.4113 diff --git a/src/main/resources/regression/wt10g.yaml b/src/main/resources/regression/wt10g.yaml index 862973cab2..7f2197cf90 100644 --- a/src/main/resources/regression/wt10g.yaml +++ b/src/main/resources/regression/wt10g.yaml @@ -51,18 +51,18 @@ models: - -bm25 results: map: - - 0.1981 + - 0.1992 p30: - - 0.2201 + - 0.2218 - name: bm25+rm3 params: - -bm25 - -rm3 results: map: - - 0.2169 + - 0.2163 p30: - - 0.2456 + - 0.2463 - name: bm25+ax params: - -bm25 @@ -72,26 +72,26 @@ models: - -axiom.deterministic results: map: - - 0.2185 + - 0.2200 p30: - - 0.2442 + - 0.2483 - name: ql params: - -ql results: map: - - 0.2015 + - 0.2021 p30: - - 0.2184 + - 0.2180 - name: ql+rm3 params: - -ql - -rm3 results: map: - - 0.2169 + - 0.2151 p30: - - 0.2354 + - 0.2276 - name: ql+ax params: - -ql @@ -101,6 +101,6 @@ models: - -axiom.deterministic results: map: - - 0.2250 + - 0.2275 p30: - - 0.2520 + - 0.2517 diff --git a/src/test/java/io/anserini/integration/IndexerTest.java b/src/test/java/io/anserini/integration/IndexerTest.java index 908d9dbbe6..c37c9c69f6 100644 --- a/src/test/java/io/anserini/integration/IndexerTest.java +++ b/src/test/java/io/anserini/integration/IndexerTest.java @@ -273,6 +273,16 @@ public FieldsProducer getPostingsReader() { System.out.println("Getting custom postings reader..."); return new MyFieldsProducer(in.getPostingsReader()); } + + @Override + public IndexReader.CacheHelper getCoreCacheHelper() { + throw new UnsupportedOperationException(); + } + + @Override + public IndexReader.CacheHelper getReaderCacheHelper() { + throw new UnsupportedOperationException(); + } } // Custom class so we can intercept calls and potentially alter behavior. diff --git a/src/test/java/io/anserini/integration/MultiThreadingSearchTest.java b/src/test/java/io/anserini/integration/MultiThreadingSearchTest.java index 5620a87f41..076d2f4854 100644 --- a/src/test/java/io/anserini/integration/MultiThreadingSearchTest.java +++ b/src/test/java/io/anserini/integration/MultiThreadingSearchTest.java @@ -43,7 +43,7 @@ protected void init() { protected void setSearchArgs() { super.setSearchArgs(); searchArgs.bm25 = true; - searchArgs.b = new String[] {"0.2", "0.4"}; + searchArgs.b = new String[] {"0.2", "0.8"}; } protected void testEval() throws Exception { diff --git a/src/test/java/io/anserini/integration/TrecEndToEndTest.java b/src/test/java/io/anserini/integration/TrecEndToEndTest.java index 508e8f00a6..c5c83c26a2 100644 --- a/src/test/java/io/anserini/integration/TrecEndToEndTest.java +++ b/src/test/java/io/anserini/integration/TrecEndToEndTest.java @@ -25,16 +25,22 @@ protected void init() { generator = "Jsoup"; topicReader = "Trec"; - fieldNormStatusTotalFields = 1; // text - termIndexStatusTermCount = 12; // Please note that standard analyzer ignores stopwords. - // Also, this includes docids - termIndexStatusTotFreq = 17; // - termIndexStatusTotPos = 16; // only "text" fields are indexed with position so we have 16 + fieldNormStatusTotalFields = 1; // text + termIndexStatusTermCount = 12; // Note that standard analyzer ignores stopwords; includes docids. + termIndexStatusTotFreq = 17; + termIndexStatusTotPos = 16; // Only "text" fields are indexed with position so we have 16. storedFieldStatusTotalDocCounts = 3; storedFieldStatusTotFields = 9; // 3 docs * (1 id + 1 text + 1 raw) - evalMetricValue = (float)(0.0/1+1.0/2+2.0/3)/2.0f; // 3 retrieved docs in total: - // 1st retrieved doc is non-rel, 2nd and 3rd are rel - // and there are in total 3 rel docs in qrels + // The search output should be as follows (for Lucene 7.5): + // 1 Q0 DOC222 1 0.652100 Anserini + // 1 Q0 TREC_DOC_1 2 0.633500 Anserini + // 1 Q0 WSJ_1 3 0.130400 Anserini + + // Qrels are at src/test/resources/sample_qrels/Trec + // 1 0 TREC_DOC_1 0 + // 1 0 DOC222 1 + // 1 0 WSJ_1 1 + evalMetricValue = (float) (1.0/1.0 + 2.0/3)/2.0f; } } diff --git a/src/test/java/io/anserini/integration/TweetEndToEndTest.java b/src/test/java/io/anserini/integration/TweetEndToEndTest.java index a0ef17e845..247f127244 100644 --- a/src/test/java/io/anserini/integration/TweetEndToEndTest.java +++ b/src/test/java/io/anserini/integration/TweetEndToEndTest.java @@ -34,9 +34,21 @@ protected void init() { storedFieldStatusTotalDocCounts = 4; storedFieldStatusTotFields = 12; // 4 tweets * (1 id + 1 text + 1 raw) - evalMetricValue = (float)(0.0/1+1.0/2)/3.0f; // 2 retrieved docs in total: (please note the querytweettime filters 1 rel tweet) - // 1st retrieved doc is non-rel, 2nd retrieved is rel - // and there are in total 3 rel docs in qrels + // The search output should be as follows (for Lucene 7.5): + // 1 Q0 5 1 1.167100 Anserini + // 1 Q0 3 2 0.693100 Anserini + + // Qrels are at src/test/resources/sample_qrels/Microblog + // 1 0 1 0 + // 1 0 3 1 + // 1 0 5 0 + // 1 0 6 0 + // 1 0 8 1 + // 1 0 10 1 + evalMetricValue = (float) (0.0/1 + 1.0/2)/3.0f; + // 2 retrieved docs in total: note that querytweettime filters 1 rel tweet. + // 1st retrieved doc is not relevant, 2nd retrieved doc is relelevant, + // and there are 3 relevant docs in qrels. } @Override