-
Notifications
You must be signed in to change notification settings - Fork 0
/
bibliography.bib
675 lines (627 loc) · 30.5 KB
/
bibliography.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
@article{Shannon1948,
abstract = {The recent development of various methods of modulation such as PCM and PPM which exchange bandwidth for signal-to-noise ratio has intensified the interest in a general theory of communication. A basis for such a theory is contained in the important papers of Nyquist1 and Hartley2 on this subject. In the present paper we will extend the theory to include a number of new factors, in particular the effect of noise in the channel, and the savings possible due to the statistical structure of the original message and due to the nature of the final destination of the information.},
author = {Shannon, Claude Elwood},
doi = {10.1002/j.1538-7305.1948.tb01338.x},
issn = {0005-8580},
journal = {Bell System Technical Journal, The},
month = {July},
number = {3},
pages = {379-423},
title = {A mathematical theory of communication},
url = {http://cm.bell-labs.com/cm/ms/what/shannonday/shannon1948.pdf},
volume = {27},
year = {1948},
}
@article{Shannon1951,
abstract = {A new method of estimating the entropy and redundancy of a language is described. This method exploits the knowledge of the language statistics possessed by those who speak the language, and depends on experimental results in prediction of the next letter when the preceding text is known. Results of experiments in prediction are given, and some properties of an ideal predictor are developed.},
author = {Shannon, Claude Elwood},
doi = {10.1002/j.1538-7305.1951.tb01366.x},
issn = {0005-8580},
journal = {Bell System Technical Journal, The},
month = {Jan},
number = {1},
pages = {50-64},
title = {Prediction and entropy of printed English},
url = {https://archive.org/details/bstj30-1-50},
volume = {30},
year = {1951},
}
@book{Tukey1977,
author = {Tukey, John W.},
citeulike-article-id = {107137},
keywords = {eda, statistics},
posted-at = {2005-03-01 04:45:58},
priority = {0},
publisher = {Addison-Wesley},
title = {Exploratory Data Analysis},
year = {1977}
}
@inproceedings{JelinekMercer1980,
added-at = {2010-11-08T15:06:07.000+0100},
address = {Amsterdam, The Netherlands: North-Holland},
author = {Jelinek, Frederick and Mercer, Robert L.},
biburl = {http://www.bibsonomy.org/bibtex/2399ceb97d18dd2e0f31f6f32cd80b51f/yfpeng},
booktitle = {In Proceedings of the Workshop on Pattern Recognition in Practice},
interhash = {20df321e3bc1fc00b748c65be9998cc5},
intrahash = {399ceb97d18dd2e0f31f6f32cd80b51f},
keywords = {imported},
month = May,
owner = {Yifan Peng},
pages = {381-397},
timestamp = {2010-11-08T15:06:10.000+0100},
title = {Interpolated estimation of Markov source parameters from sparse data},
year = 1980,
}
@article{Swiffin1987,
author = {Swiffin, Andrew and Arnott, John and Pickering, J. Adrian and Newell, Alan},
doi = {10.1080/07434618712331274499},
eprint = {http://dx.doi.org/10.1080/07434618712331274499},
journal = {Augmentative and Alternative Communication},
number = {4},
pages = {181-191},
title = {Adaptive and predictive techniques in a communication prosthesis},
URL = {http://dx.doi.org/10.1080/07434618712331274499},
volume = {3},
year = {1987},
}
@inproceedings{Church1988,
acmid = {974260},
address = {Stroudsburg, PA, USA},
author = {Church, Kenneth Ward},
booktitle = {Proceedings of the Second Conference on Applied Natural Language Processing},
doi = {10.3115/974235.974260},
location = {Austin, Texas},
numpages = {8},
pages = {136--143},
publisher = {Association for Computational Linguistics},
series = {ANLC '88},
title = {A Stochastic Parts Program and Noun Phrase Parser for Unrestricted Text},
url = {http://dx.doi.org/10.3115/974235.974260},
year = {1988},
}
@article{Brown1990,
acmid = {92860},
address = {Cambridge, MA, USA},
author = {Brown, Peter F. and Cocke, John and Pietra, Stephen A. Della and Pietra, Vincent J. Della and Jelinek, Fredrick and Lafferty, John D. and Mercer, Robert L. and Roossin, Paul S.},
issn = {0891-2017},
issue_date = {June 1990},
journal = {Computational Linguistics},
masid = {170313},
month = {Jun},
number = {2},
numpages = {7},
pages = {79--85},
publisher = {MIT Press},
title = {A Statistical Approach to Machine Translation},
url = {http://dl.acm.org/citation.cfm?id=92858.92860},
volume = {16},
year = {1990},
}
@inproceedings{Kernighan1990,
acmid = {997975},
address = {Stroudsburg, PA, USA},
author = {Kernighan, Mark D. and Church, Kenneth Ward and Gale, William A.},
booktitle = {Proceedings of the 13th Conference on Computational Linguistics - Volume 2},
doi = {10.3115/997939.997975},
location = {Helsinki, Finland},
numpages = {6},
pages = {205--210},
publisher = {Association for Computational Linguistics},
series = {COLING '90},
title = {A Spelling Correction Program Based on a Noisy Channel Model},
url = {http://dx.doi.org/10.3115/997939.997975},
year = {1990},
}
@article{Mays1991,
acmid = {117651},
address = {Tarrytown, NY, USA},
author = {Mays, Eric and Damerau, Fred J. and Mercer, Robert L.},
doi = {10.1016/0306-4573(91)90066-U},
issn = {0306-4573},
issue_date = {1991},
journal = {Inf. Process. Manage.},
month = {Sep},
number = {5},
numpages = {6},
pages = {517--522},
publisher = {Pergamon Press, Inc.},
title = {Context Based Spelling Correction},
url = {http://dx.doi.org/10.1016/0306-4573(91)90066-U},
volume = {27},
year = {1991},
}
@inproceedings{NeyEssen1991,
author = {Ney, Hermann and Essen, Ute},
abstract = {The authors study various problems related to smoothing bigram probabilities for natural language modeling: the type of interpolation, i.e. linear vs. nonlinear, the optimal estimation of interpolation parameters, and the use of word equivalence classes (parts of speech). A nonlinear interpolation method that results in significant improvements over linear interpolation in the experimental tests is proposed. It is shown that the leaving-one-out method in combination with the maximum likelihood criterion can be efficiently used for the optimal estimation of interpolation parameters. In addition, an automatic clustering procedure is developed for finding word equivalence classes using a maximum likelihood criterion. Experimental results are presented for two text databases: a German database with 100000 words and an English database with 1.1 million words},
booktitle = {Acoustics, Speech, and Signal Processing, 1991. ICASSP-91., 1991 International Conference on},
doi = {10.1109/ICASSP.1991.150464},
ISSN = {1520-6149},
keywords = {natural languages;probability;speech analysis and processing;speech recognition;English database;German database;automatic clustering procedure;bigram probabilities;interpolation parameters;leaving-one-out method;linear interpolation;maximum likelihood criterion;natural language modelling;nonlinear interpolation;optimal estimation;parameter estimation;smoothing techniques;speech recognition;text databases;word equivalence classes;Databases;Equations;Error analysis;Interpolation;Maximum likelihood estimation;Natural languages;Parameter estimation;Smoothing methods;Speech recognition;Testing},
month = {Apr},
pages = {825-828 vol.2},
title = {On smoothing techniques for bigram-based natural language modelling},
year = {1991},
}
@article{Newell1992,
abstract = { In the main, the value of rate-enhancement techniques in prostheses for people with severe speech disorders has been judged on the basis of simple measures such as rate of composition and keysaving. Comparative studies have not been conducted into long-term effects of the use of such systems. In addition the quality of the users' output can be as important as the quantity and the ease with which it can be produced. In this paper, long-term case studies of adults and children are discussed. Results show that a predictive word processing system can make valuable improvements to the quantity and quality of the written work of users with both physical impairments and spelling problems. In addition, some evidence has been found of the potential of orthographic predictive systems for assisting those with language disorders. It is argued that, in many clinical and educational situations, these effects can be more important than those indicated by simple measurements of rate enhancement and keysaving for such systems. },
author = {Alan Newell and John Arnott and Lynda Booth and William Beattie and Bernadette Brophy and Ian Ricketts},
doi = {10.1080/07434619212331276343},
eprint = {http://www.tandfonline.com/doi/pdf/10.1080/07434619212331276343},
journal = {Augmentative and Alternative Communication},
number = {4},
pages = {304-311},
title = {Effect of the “PAL” word prediction system on the quality and quantity of text generation},
URL = {http://www.tandfonline.com/doi/abs/10.1080/07434619212331276343},
volume = {8},
year = {1992},
}
@article{NeyKneser1994,
author = {Ney, Hermann and Essen, Ute and Kneser, Reinhard},
citeulike-article-id = {1740288},
journal = {Computer Speech and Language},
keywords = {lus},
pages = {1--38},
posted-at = {2007-10-08 09:39:20},
priority = {0},
title = {On Structuring Probabilistic Dependencies in Stochastic Language Modelling},
volume = {8},
year = {1994}
}
@inproceedings{KneserNey1995,
author = {Kneser, Reinhard and Ney, Hermann},
booktitle = {Acoustics, Speech, and Signal Processing, 1995. ICASSP-95., 1995 International Conference on},
doi = {10.1109/ICASSP.1995.479394},
issn = {1520-6149},
keywords = {grammars, natural languages, probability, speech processing, speech recognition, statistical analysis, stochastic processes, backing-off, distributions, experiments, perplexity, sparse data problem, stochastic language modeling, word error rate, Error analysis, History, Interpolation, Laboratories, Natural languages, Probability distribution, Smoothing methods, Stochastic processes, Training data},
month = {May},
pages = {181-184 vol.1},
title = {Improved backing-off for M-gram language modeling},
volume = {1},
year = {1995},
}
@inproceedings{ChenGoodman1996,
acmid = {981904},
address = {Stroudsburg, PA, USA},
author = {Chen, Stanley F. and Goodman, Joshua T.},
booktitle = {Proceedings of the 34th Annual Meeting on Association for Computational Linguistics},
doi = {10.3115/981863.981904},
location = {Santa Cruz, California},
numpages = {9},
pages = {310--318},
publisher = {Association for Computational Linguistics},
series = {ACL '96},
title = {An Empirical Study of Smoothing Techniques for Language Modeling},
url = {http://dx.doi.org/10.3115/981863.981904},
year = {1996},
}
@techreport{ChenGoodman1998,
author = {Chen, Stanley F. and Goodman, Joshua T.},
institution = {Computer Science Group, Harvard University},
title = {An Empirical Study of Smoothing Techniques for Language Modeling},
year = {1998},
}
@article{ChenGoodman1999,
author = {Chen, Stanley F. and Goodman, Joshua T.},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.uni-trier.de/rec/bib/journals/csl/ChenG99},
doi = {10.1006/csla.1999.0128},
journal = {Computer Speech {\&} Language},
number = {4},
pages = {359--393},
timestamp = {Fri, 17 Jul 2009 14:38:09 +0200},
title = {An empirical study of smoothing techniques for language modeling},
url = {http://dx.doi.org/10.1006/csla.1999.0128},
volume = {13},
year = {1999},
}
@inproceedings{Brill2000,
abstract = {The noisy channel model has been applied to a wide range of problems, including spelling correction. These models consist of two components: a source model and a channel model. Very little research has gone into improving the channel model for spelling correction. This paper describes a new channel model for spelling correction, based on generic string to string edits. Using this model gives significant performance improvements compared to previously proposed models.},
acmid = {1075255},
author = {Brill, Eric and Moore, Robert C.},
address = {Stroudsburg, PA, USA},
booktitle = {Proceedings of the 38th Annual Meeting on Association for Computational Linguistics},
doi = {10.3115/1075218.1075255},
location = {Hong Kong},
month = {January},
numpages = {8},
pages = {286--293},
publisher = {Association for Computational Linguistics},
series = {ACL '00},
title = {An Improved Error Model for Noisy Channel Spelling Correction},
url = {http://dx.doi.org/10.3115/1075218.1075255},
year = {2000},
}
@inproceedings{Chang2000,
acmid = {335433},
address = {New York, NY, USA},
author = {Chang, Yuan-Chi and Bergman, Lawrence and Castelli, Vittorio and Li, Chung-Sheng and Lo, Ming-Ling and Smith, John R.},
booktitle = {Proceedings of the 2000 ACM SIGMOD International Conference on Management of Data},
doi = {10.1145/342009.335433},
isbn = {1-58113-217-4},
keywords = {database indexing, linear optimization},
location = {Dallas, Texas, USA},
numpages = {12},
pages = {391--402},
publisher = {ACM},
series = {SIGMOD '00},
title = {The Onion Technique: Indexing for Linear Optimization Queries},
url = {http://doi.acm.org/10.1145/342009.335433},
year = {2000},
}
@inproceedings{Guentzer2000,
acmid = {671875},
address = {San Francisco, CA, USA},
author = {G\"{u}ntzer, Ulrich and Balke, Wolf-Tilo and Kie{\ss}ling, Werner},
booktitle = {Proceedings of the 26th International Conference on Very Large Data Bases},
isbn = {1-55860-715-3},
numpages = {10},
pages = {419--428},
publisher = {Morgan Kaufmann Publishers Inc.},
series = {VLDB '00},
title = {Optimizing Multi-Feature Queries for Image Databases},
url = {http://dl.acm.org/citation.cfm?id=645926.671875},
year = {2000},
}
@article{Stolcke2000,
author = {Andreas Stolcke},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/cs-CL-0006025},
journal = {CoRR},
timestamp = {Mon, 05 Dec 2011 18:04:26 +0100},
title = {Entropy-based Pruning of Backoff Language Models},
url = {http://arxiv.org/abs/cs.CL/0006025},
volume = {cs.CL/0006025},
year = {2000},
}
@inproceedings{Fagin2001,
acmid = {375567},
address = {New York, NY, USA},
author = {Fagin, Ronald and Lotem, Amnon and Naor, Moni},
booktitle = {Proceedings of the Twentieth ACM SIGMOD-SIGACT-SIGART Symposium on Principles of Database Systems},
doi = {10.1145/375551.375567},
isbn = {1-58113-361-8},
location = {Santa Barbara, California, USA},
numpages = {12},
pages = {102--113},
publisher = {ACM},
series = {PODS '01},
title = {Optimal Aggregation Algorithms for Middleware},
url = {http://doi.acm.org/10.1145/375551.375567},
year = {2001},
}
@article{Goodman2001,
author = {Goodman, Joshua T.},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/cs-CL-0108005},
doi = {10.1006/csla.2001.0174},
issue = {4},
journal = {Computer Speech \& Language},
masid = {2544976},
pages = {403--434},
timestamp = {Mon, 05 Dec 2011 18:05:05 +0100},
title = {A Bit of Progress in Language Modeling},
url = {http://arxiv.org/abs/cs.CL/0108005},
volume = {15},
year = {2001},
}
@inproceedings{Guentzer2001,
acmid = {878237},
address = {Washington, DC, USA},
author = {G\"{u}ntzer, Ulrich and Balke, Wolf-Tilo and Kie{\ss}ling, Werner},
booktitle = {Proceedings of the International Conference on Information Technology: Coding and Computing},
doi={10.1109/ITCC.2001.918866},
isbn = {0-7695-1062-0},
month={Apr},
pages = {622--628},
publisher = {IEEE Computer Society},
series = {ITCC '01},
title = {Towards Efficient Multi-Feature Queries in Heterogeneous Environments},
url = {http://dl.acm.org/citation.cfm?id=876870.878237},
year = {2001},
}
@inproceedings{Natsev2001,
acmid = {672365},
address = {San Francisco, CA, USA},
author = {Natsev, Apostol and Chang, Yuan-Chi and Smith, John R. and Li, Chung-Sheng and Vitter, Jeffrey Scott},
booktitle = {Proceedings of the 27th International Conference on Very Large Data Bases},
isbn = {1-55860-804-4},
numpages = {10},
publisher = {Morgan Kaufmann Publishers Inc.},
pages = {281--290},
series = {VLDB '01},
title = {Supporting Incremental Join Queries on Ranked Inputs},
url = {http://dl.acm.org/citation.cfm?id=645927.672365},
year = {2001},
}
@article{Bengio2003,
acmid = {944966},
author = {Bengio, Yoshua and Ducharme, R{\'e}jean and Vincent, Pascal and Janvin, Christian},
issn = {1532-4435},
issue_date = {3/1/2003},
journal = {J. Mach. Learn. Res.},
month = mar,
numpages = {19},
pages = {1137--1155},
publisher = {JMLR.org},
title = {A Neural Probabilistic Language Model},
url = {http://dl.acm.org/citation.cfm?id=944919.944966},
volume = {3},
year = {2003},
}
@inproceedings{BilmesKirchhoff2003,
acmid = {1073485},
address = {Stroudsburg, PA, USA},
author = {Bilmes, Jeff A. and Kirchhoff, Katrin},
booktitle = {Proceedings of the 2003 Conference of the North American Chapter of the Association for Computational Linguistics on Human Language Technology: Companion Volume of the Proceedings of HLT-NAACL 2003--short Papers - Volume 2},
doi = {10.3115/1073483.1073485},
location = {Edmonton, Canada},
numpages = {3},
pages = {4--6},
publisher = {Association for Computational Linguistics},
series = {NAACL-Short '03},
title = {Factored Language Models and Generalized Parallel Backoff},
url = {http://dx.doi.org/10.3115/1073483.1073485},
year = {2003},
}
@inproceedings{HonalSchults2003,
author = {Honal, Matthias and Schultz, Tanja},
booktitle = {in Proceedings of the 8th Eurospeech Conference},
title = {Correction of Disfluencies in Spontaneous Speech using a Noisy-Channel Approach},
year = {2003}
}
@inproceedings{Tsaparas2003,
author = {Tsaparas, P. and Palpanas, T. and Kotidis, Y. and Koudas, N. and Srivastava, D.},
booktitle = {Data Engineering, 2003. Proceedings. 19th International Conference on},
doi = {10.1109/ICDE.2003.1260799},
keywords = {computational complexity;database indexing;query processing;relational algebra;attribute domain;data entity;join operation;query processing;ranked join index;real data set;scoring function;synthetic data set;target query;top-k join query;user preference;Airports;Availability;Data engineering;Databases;Delay effects;Performance analysis;Proposals;Quality of service;Query processing},
month = {March},
organization = {IEEE},
pages = {277--288},
title = {Ranked join indices},
year = {2003},
}
@article{Ilyas2004,
acmid = {1023915},
author = {Ilyas, Ihab F. and Aref, Walid G. and Elmagarmid, Ahmed K.},
address = {Secaucus, NJ, USA},
doi = {10.1007/s00778-004-0128-2},
issn = {1066-8888},
issue_date = {September 2004},
journal = {The VLDB Journal},
keywords = {Query operators, Rank aggregarion, Ranking, Top-k queries},
month = {Sep},
number = {3},
numpages = {15},
pages = {207--221},
publisher = {Springer-Verlag New York, Inc.},
title = {Supporting Top-k Join Queries in Relational Databases},
url = {http://dx.doi.org/10.1007/s00778-004-0128-2},
volume = {13},
year = {2004},
}
@inproceedings{Bickel2005,
acmid = {1220600},
address = {Stroudsburg, PA, USA},
author = {Bickel, Steffen and Haider, Peter and Scheffer, Tobias},
booktitle = {Proceedings of the Conference on Human Language Technology and Empirical Methods in Natural Language Processing},
doi = {10.3115/1220575.1220600},
location = {Vancouver, British Columbia, Canada},
numpages = {8},
pages = {193--200},
publisher = {Association for Computational Linguistics},
series = {HLT '05},
title = {Predicting Sentences Using N-gram Language Models},
url = {http://dx.doi.org/10.3115/1220575.1220600},
year = {2005},
}
@inproceedings{Bast2006,
acmid = {1164169},
author = {Bast, Holger and Majumdar, Debapriyo and Schenkel, Ralf and Theobald, Martin and Weikum, Gerhard},
booktitle = {Proceedings of the 32Nd International Conference on Very Large Data Bases},
location = {Seoul, Korea},
numpages = {12},
pages = {475--486},
publisher = {VLDB Endowment},
series = {VLDB '06},
title = {IO-Top-k: Index-access Optimized Top-k Query Processing},
url = {http://dl.acm.org/citation.cfm?id=1182635.1164169},
year = {2006},
}
@article{Siivola2007,
abstract={N-gram models are the most widely used language models in large vocabulary continuous speech recognition. Since the size of the model grows rapidly with respect to the model order and available training data, many methods have been proposed for pruning the least relevant -grams from the model. However, correct smoothing of the N-gram probability distributions is important and performance may degrade significantly if pruning conflicts with smoothing. In this paper, we show that some of the commonly used pruning methods do not take into account how removing an -gram should modify the backoff distributions in the state-of-the-art Kneser-Ney smoothing. To solve this problem, we present two new algorithms: one for pruning Kneser-Ney smoothed models, and one for growing them incrementally. Experiments on Finnish and English text corpora show that the proposed pruning algorithm provides considerable improvements over previous pruning algorithms on Kneser-Ney smoothed models and is also better than the baseline entropy pruned Good-Turing smoothed models. The models created by the growing algorithm provide a good starting point for our pruning algorithm, leading to further improvements. The improvements in the Finnish speech recognition over the other Kneser-Ney smoothed models are statistically significant, as well.},
author={Siivola, V. and Hirsimaki, T. and Virpioja, S.},
doi={10.1109/TASL.2007.896666},
issn={1558-7916},
journal={Audio, Speech, and Language Processing, IEEE Transactions on},
keywords={computational linguistics;natural language processing;smoothing methods;speech recognition;statistical distributions;English text corpora;Finnish text corpora;Kneser-Ney smoothed n-gram models pruning;baseline entropy;good-turing smoothed models;gram probability distributions;language models;vocabulary continuous speech recognition;Context modeling;Degradation;Entropy;Informatics;Natural languages;Probability distribution;Smoothing methods;Speech recognition;Training data;Vocabulary;Modeling;natural languages;smoothing methods;speech recognition},
month={July},
number={5},
pages={1617-1624},
title={On Growing and Pruning Kneser ndash;Ney Smoothed N -Gram Models},
volume={15},
year={2007},
}
@inproceedings{Trnka2007,
acmid = {1614152},
address = {Stroudsburg, PA, USA},
author = {Trnka, Keith and Yarrington, Debra and McCaw, John and McCoy, Kathleen F. and Pennington, Christopher},
booktitle = {Human Language Technologies 2007: The Conference of the North American Chapter of the Association for Computational Linguistics; Companion Volume, Short Papers},
location = {Rochester, New York},
numpages = {4},
pages = {173--176},
publisher = {Association for Computational Linguistics},
series = {NAACL-Short '07},
title = {The Effects of Word Prediction on Communication Rate for AAC},
url = {http://dl.acm.org/citation.cfm?id=1614108.1614152},
year = {2007},
}
@article{Ilyas2008,
acmid = {1391730},
address = {New York, NY, USA},
articleno = {11},
author = {Ilyas, Ihab F. and Beskales, George and Soliman, Mohamed A.},
doi = {10.1145/1391729.1391730},
issn = {0360-0300},
issue_date = {October 2008},
journal = {ACM Comput. Surv.},
keywords = {Top-k, rank aggregation, rank-aware processing, voting},
month = {Oct},
number = {4},
numpages = {58},
pages = {11:1--11:58},
publisher = {ACM},
url = {http://doi.acm.org/10.1145/1391729.1391730},
title = {A Survey of Top-k Query Processing Techniques in Relational Database Systems},
volume = {40},
year = {2008},
}
@book{Manning2008,
author = {Manning, Christopher D. and Raghavan, Prabhakar and Sch\"{u}tze, Hinrich},
address = {New York, NY, USA},
isbn = {0521865719, 9780521865715},
publisher = {Cambridge University Press},
title = {Introduction to Information Retrieval},
year = {2008},
}
@inproceedings{TrnkaMcCoy2008,
acmid = {1557766},
address = {Stroudsburg, PA, USA},
author = {Trnka, Keith and McCoy, Kathleen F.},
booktitle = {Proceedings of the 46th Annual Meeting of the Association for Computational Linguistics on Human Language Technologies: Short Papers},
location = {Columbus, Ohio},
numpages = {4},
pages = {261--264},
publisher = {Association for Computational Linguistics},
series = {HLT-Short '08},
title = {Evaluating Word Prediction: Framing Keystroke Savings},
url = {http://dl.acm.org/citation.cfm?id=1557690.1557766},
year = {2008},
}
@book{JurafskyMartin2009,
address = {Upper Saddle River, NJ, USA},
asin = {0131873210},
author = {Jurafsky, Daniel and Martin, James H.},
dewey = {410.285},
isbn = {0131873210},
publisher = {Prentice-Hall, Inc.},
timestamp = {2009-05-16T06:06:41.000+0200},
title = {Speech and Language Processing (2Nd Edition)},
year = {2009},
}
@inproceedings{Chelba2010,
author = {Chelba, Ciprian and Brants, Thorsten and Neveitt, Will and Xu, Peng},
booktitle = {Proceedings of Interspeech},
pages = {2242--2245},
title = {Study on Interaction between Entropy Pruning and Kneser-Ney Smoothing},
year = 2010,
}
@phdthesis{Trnka2011,
author = {Trnka, Keith},
address = {Newark, DE, USA},
advisor = {Mccoy, Kathleen F.},
isbn = {978-1-124-48009-1},
note = {AAI3443248},
publisher = {University of Delaware},
title = {Word Prediction Techniques for User Adaptation and Sparse Data Mitigation},
year = {2011},
}
@phdthesis{Mikolov2012,
author={Mikolov, Tom{\'a}{\v{s}}},
school={Ph. D. thesis, Brno University of Technology},
title={Statistical Language Models Based on Neural Networks},
year={2012},
}
@inproceedings{BlanasPatel2013,
acmid = {2523626},
address = {New York, NY, USA},
articleno = {19},
author = {Blanas, Spyros and Patel, Jignesh M.},
booktitle = {Proceedings of the 4th Annual Symposium on Cloud Computing},
doi = {10.1145/2523616.2523626},
isbn = {978-1-4503-2428-1},
location = {Santa Clara, California},
numpages = {16},
pages = {19:1--19:16},
publisher = {ACM},
series = {SOCC '13},
title = {Memory Footprint Matters: Efficient Equi-join Algorithms for Main Memory Data Processing},
url = {http://doi.acm.org/10.1145/2523616.2523626},
year = {2013},
}
@techreport{Chelba2013,
author = {Chelba, Ciprian and Mikolov, Tom{\'a}{\v{s}} and Schuster, Mike and Ge, Qi and Brants, Thorsten and Koehn, Phillipp and Robinson, Tony},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/ChelbaMSGBK13},
institution = {Google},
title = {One Billion Word Benchmark for Measuring Progress in Statistical Language Modeling},
url = {http://arxiv.org/abs/1312.3005},
year = 2013,
}
@inproceedings{HsuOttaviano2013,
acmid = {2488440},
address = {Republic and Canton of Geneva, Switzerland},
author = {Hsu, Bo-June (Paul) and Ottaviano, Giuseppe},
booktitle = {Proceedings of the 22Nd International Conference on World Wide Web},
isbn = {978-1-4503-2035-1},
keywords = {compression, scored string sets, top-k completion, tries},
location = {Rio de Janeiro, Brazil},
numpages = {12},
pages = {583--594},
publisher = {International World Wide Web Conferences Steering Committee},
series = {WWW '13},
title = {Space-efficient Data Structures for Top-k Completion},
url = {http://dl.acm.org/citation.cfm?id=2488388.2488440},
year = {2013},
}
@inproceedings{Pickhardt2014,
author = {Pickhardt, Ren{\'e} and Gottron, Thomas and K{\"{o}}rner, Martin and Wagner, Paul Georg and Speicher, Till and Staab, Steffen},
bibsource = {dblp computer science bibliography, http://dblp.org},
biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/PickhardtGKWSS14},
booktitle = {Proceedings of the 52nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
ee = {http://aclweb.org/anthology/P/P14/P14-1108.pdf},
isbn = {978-1-937284-72-5},
location = {Baltimore, Maryland},
pages = {1145--1154},
publisher = {Association for Computational Linguistics},
timestamp = {Sun, 20 Jul 2014 17:20:53 +0200},
title = {A Generalized Language Model as the Combination of Skipped n-grams and Modified Kneser Ney Smoothing},
url = {http://aclweb.org/anthology/P14-1108},
volume = {abs/1404.3377},
year = {2014},
}
@article{DeMulder2015,
abstract = {In this paper, we present a survey on the application of recurrent neural networks to the task of statistical language modeling. Although it has been shown that these models obtain good performance on this task, often superior to other state-of-the-art techniques, they suffer from some important drawbacks, including a very long training time and limitations on the number of context words that can be taken into account in practice. Recent extensions to recurrent neural network models have been developed in an attempt to address these drawbacks. This paper gives an overview of the most important extensions. Each technique is described and its performance on statistical language modeling, as described in the existing literature, is discussed. Our structured overview makes it possible to detect the most promising techniques in the field of recurrent neural networks, applied to language modeling, but it also highlights the techniques for which further research is required.},
author = {De Mulder, Wim and Bethard, Steven and Moens, Marie-Francine},
doi = {http://dx.doi.org/10.1016/j.csl.2014.09.005},
issn = {0885-2308},
journal = {Computer Speech & Language},
keywords = {Recurrent neural networks, Natural language processing, Language modeling, Speech recognition, Machine translation},
number = {1},
pages = {61--98},
title = {A survey on the application of recurrent neural networks to statistical language modeling},
url = {http://www.sciencedirect.com/science/article/pii/S088523081400093X},
volume = {30},
year = {2015},
}
@online{OANC,
author={Ide, Nancy and Suderman, Keith},
title = {The Open American National Corpus (OANC)},
url = {http://www.anc.org/data/oanc/},
urldate = {2015-07-05},
year = {2007},
}
@incollection{ENRON,
author = {Klimt, Bryan and Yang, Yiming},
booktitle = {Machine Learning: ECML 2004},
doi = {10.1007/978-3-540-30115-8_22},
editor = {Boulicaut, Jean-François and Esposito, Floriana and Giannotti, Fosca and Pedreschi, Dino},
isbn = {978-3-540-23105-9},
language = {English},
pages = {217-226},
publisher = {Springer Berlin Heidelberg},
series = {Lecture Notes in Computer Science},
title = {The Enron Corpus: A New Dataset for Email Classification Research},
url = {http://dx.doi.org/10.1007/978-3-540-30115-8_22},
volume = {3201},
year = {2004},
}