Skip to content

Commit

Permalink
fix: Do not generate duplicate entries by default (#3793)
Browse files Browse the repository at this point in the history
Default `stripCaseAndAccentsKeepDuplicate` to `false` because it uses too much memory.
  • Loading branch information
Jason3S committed Oct 31, 2022
1 parent fdfe823 commit f27d3c6
Show file tree
Hide file tree
Showing 7 changed files with 10 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -120,22 +120,14 @@ Code+
Error
Error+
msg
~!codecode
~!codemsg
~!err
~!errorerror
~+code
~+code+
~+error
~+error+
~+msg
~cafe
~café
~code
~code+
~error
~error+
~msg
"
`;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ describe('Validate the wordListCompiler', () => {
const destName = path.join(temp, 'example0.txt');
await compileWordList(source, destName, compileOpt(false));
const output = await fsp.readFile(destName, 'utf8');
expect(output).toBe(__testing__.wordListHeader + '\n' + 'hello\n~hello\ntry\n~try\nwork\n~work\n');
expect(output).toBe(__testing__.wordListHeader + '\n' + 'hello\ntry\nwork\n');
expect(consoleOutput()).toMatchSnapshot();
});

Expand Down
6 changes: 3 additions & 3 deletions packages/cspell-tools/src/compiler/wordListParser.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ describe('Validate the wordListCompiler', () => {

test.each`
lines | sort | expectedResult
${'banana|Apple|Apple|apple'} | ${true} | ${'Apple|apple|banana|~apple|~banana'}
${'banana|Apple|Apple|apple|banana'} | ${false} | ${'banana|~banana|Apple|~apple|apple'}
${'hello'} | ${true} | ${'hello|~hello'}
${'banana|Apple|Apple|apple'} | ${true} | ${'Apple|apple|banana|~apple'}
${'banana|Apple|Apple|apple|banana'} | ${false} | ${'banana|Apple|~apple|apple'}
${'hello'} | ${true} | ${'hello'}
${'!Hello'} | ${true} | ${'!Hello|~!hello'}
`('createSortAndFilterOperation $lines $sort', ({ lines, expectedResult, sort }) => {
const normalizer = normalizeTargetWords({ sort, generateNonStrict: true });
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ describe('Validate SimpleDictionaryParser', () => {
${s('Word')} | ${s('Word|~word')}
${s('*error*')} | ${s('error|~error|error+|~error+|+error|~+error|+error+|~+error+')}
`('parseDictionaryLines simple $lines', ({ lines, expected }) => {
const r = [...parseDictionaryLines(lines)];
const r = [...parseDictionaryLines(lines, { stripCaseAndAccentsKeepDuplicate: true })];
expect(r).toEqual(expected);
});

Expand All @@ -165,7 +165,7 @@ describe('Validate SimpleDictionaryParser', () => {
${s('# cspell-dictionary: generate-alternatives|Apple|Arizona|New York')} | ${{ stripCaseAndAccents: false }} | ${s('Apple|~apple|Arizona|~arizona|New York|~new york')}
${s('Apple| # cspell-dictionary: no-generate-alternatives|Arizona|New York')} | ${{}} | ${s('Apple|~apple|Arizona|New York')}
${dictionary3()} | ${{ stripCaseAndAccentsKeepDuplicate: false }} | ${s('Error|~error|Error+|~error+|+error|+error+|Code|~code|Code+|~code+|+code|+code+|msg|+msg|!err|!Errorerror|!Codemsg|Café|~café|~cafe|!codecode')}
${s('# cspell-dictionary: split|"New York"|Tower of London')} | ${{}} | ${s('New York|Tower|~tower|of|~of|London|~london')}
${s('# cspell-dictionary: split|"New York"|Tower of London')} | ${{ stripCaseAndAccentsKeepDuplicate: true }} | ${s('New York|Tower|~tower|of|~of|London|~london')}
${s('Hello|!Goodbye')} | ${{}} | ${s('Hello|~hello|!Goodbye')}
${s('Hello|!Goodbye')} | ${{ stripCaseAndAccentsOnForbidden: true }} | ${s('Hello|~hello|!Goodbye|~!goodbye')}
`('parseDictionaryLines complex $lines', ({ lines, options, expected }) => {
Expand Down
4 changes: 2 additions & 2 deletions packages/cspell-trie-lib/src/lib/SimpleDictionaryParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ export interface ParseDictionaryOptions {

/**
* Tell the parser to keep non-case/accent version in both forms.
* @default true
* @default false
*/
stripCaseAndAccentsKeepDuplicate: boolean;

Expand Down Expand Up @@ -74,7 +74,7 @@ const _defaultOptions: ParseDictionaryOptions = {
caseInsensitivePrefix: CASE_INSENSITIVE_PREFIX,
keepExactPrefix: IDENTITY_PREFIX,
stripCaseAndAccents: true,
stripCaseAndAccentsKeepDuplicate: true,
stripCaseAndAccentsKeepDuplicate: false,
stripCaseAndAccentsOnForbidden: false,
split: false,
splitKeepBoth: false,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ describe('Validate Util Functions', () => {

test.each`
dict | ignoreCase | sep | depth | compoundMethod | expected
${['A*', '+a*', '*b*', '+c']} | ${true} | ${''} | ${2} | ${undefined} | ${['A', 'Aa', 'Ab', 'Ac', 'Aa', 'Ab', 'Ac', 'b', 'ba', 'bb', 'bc', 'ba', 'bb', 'bc', 'a', 'aa', 'ab', 'ac', 'aa', 'ab', 'ac', 'b', 'ba', 'bb', 'bc', 'ba', 'bb', 'bc']}
${['A*', '+a*', '*b*', '+c']} | ${true} | ${''} | ${2} | ${undefined} | ${['A', 'Aa', 'Ab', 'Ac', 'b', 'ba', 'bb', 'bc', 'a', 'aa', 'ab', 'ac']}
${['A*', '+a*', '*b*', '+c']} | ${false} | ${''} | ${2} | ${undefined} | ${['A', 'Aa', 'Ab', 'Ac', 'b', 'ba', 'bb', 'bc']}
${['A*', '+b+', '+C']} | ${false} | ${'•'} | ${3} | ${CompoundWordsMethod.NONE} | ${['A', 'A•C', 'A•b•C']}
${['A*', '+b+', '+C']} | ${false} | ${'•'} | ${3} | ${CompoundWordsMethod.JOIN_WORDS} | ${['A', 'A•C', 'A•b•C', 'A+A', 'A+C']}
Expand Down
2 changes: 1 addition & 1 deletion packages/cspell-trie-lib/src/lib/trie.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ describe('Validate Trie Class', () => {
`);

expect(trieModern.isSizeKnown()).toBe(false);
expect(trieModern.size()).toBe(11); // begin, begin+, end, +end, café ~cafe
expect(trieModern.size()).toBe(6); // begin, begin+, end, +end, café ~cafe
expect(trieModern.isSizeKnown()).toBe(true);
});

Expand Down

0 comments on commit f27d3c6

Please sign in to comment.