diff --git a/packages/title-case/README.md b/packages/title-case/README.md index e8e9a6c5..1a0c973a 100644 --- a/packages/title-case/README.md +++ b/packages/title-case/README.md @@ -19,12 +19,12 @@ titleCase("follow step-by-step instructions"); //=> "Follow Step-by-Step Instruc ### Options -- `locale?: string | string[]` +- `locale?: string | string[]` Locale used for `toLocaleUpperCase` during case transformation (default: `undefined`) - `sentenceCase?: boolean` Only capitalize the first word of each sentence (default: `false`) - `sentenceTerminators?: Set` Set of characters to consider a new sentence under sentence case behavior (e.g. `.`, default: `SENTENCE_TERMINATORS`) - `smallWords?: Set` Set of words to keep lower-case when `sentenceCase === false` (default: `SMALL_WORDS`) -- `titleTerminators?: Set` Set of characters to consider a new sentence under title case behavior (e.g. `:`, default: `TITLE_TERMINATORS`). -- `wordSeparators?: Set` Set of characters to consider a new word for capitalization, such as hyphenation (default: `WORD_SEPARATORS`). +- `titleTerminators?: Set` Set of characters to consider a new sentence under title case behavior (e.g. `:`, default: `TITLE_TERMINATORS`) +- `wordSeparators?: Set` Set of characters to consider a new word for capitalization, such as hyphenation (default: `WORD_SEPARATORS`) ## TypeScript and ESM diff --git a/packages/title-case/src/index.spec.ts b/packages/title-case/src/index.spec.ts index 463231a0..d575391f 100644 --- a/packages/title-case/src/index.spec.ts +++ b/packages/title-case/src/index.spec.ts @@ -3,20 +3,31 @@ import { inspect } from "util"; import { titleCase, Options } from "./index.js"; /** - * Based on https://github.com/gouch/to-title-case/blob/master/test/tests.json. + * Original tests from https://github.com/gouch/to-title-case/blob/master/test/tests.json. */ const TEST_CASES: [string, string, Options?][] = [ - ["", ""], - ["2019", "2019"], - ["test", "Test"], - ["two words", "Two Words"], - ["one. two.", "One. Two."], + ["one two", "One Two"], + ["one two three", "One Two Three"], + [ + "Start a an and as at but by en for if in nor of on or per the to v vs via end", + "Start a an and as at but by en for if in nor of on or per the to v vs via End", + ], ["a small word starts", "A Small Word Starts"], ["small word ends on", "Small Word Ends On"], + ["questions?", "Questions?"], + ["Two questions?", "Two Questions?"], + ["one sentence. two sentences.", "One Sentence. Two Sentences."], ["we keep NASA capitalized", "We Keep NASA Capitalized"], ["pass camelCase through", "Pass camelCase Through"], + ["this sub-phrase is nice", "This Sub-Phrase Is Nice"], ["follow step-by-step instructions", "Follow Step-by-Step Instructions"], + ["easy as one-two-three end", "Easy as One-Two-Three End"], + ["start on-demand end", "Start On-Demand End"], + ["start in-or-out end", "Start In-or-Out End"], + ["start e-commerce end", "Start E-Commerce End"], + ["start e-mail end", "Start E-Mail End"], ["your hair[cut] looks (nice)", "Your Hair[cut] Looks (Nice)"], + ["keep that colo(u)r", "Keep that Colo(u)r"], ["leave Q&A unscathed", "Leave Q&A Unscathed"], [ "piña colada while you listen to ænima", @@ -31,10 +42,16 @@ const TEST_CASES: [string, string, Options?][] = [ ['"double quotes"', '"Double Quotes"'], ['double quotes "inner" word', 'Double Quotes "Inner" Word'], ["fancy double quotes “inner” word", "Fancy Double Quotes “Inner” Word"], + ["'single quotes'", "'Single Quotes'"], + ["single quotes 'inner' word", "Single Quotes 'Inner' Word"], + ["fancy single quotes ‘inner’ word", "Fancy Single Quotes ‘Inner’ Word"], + ["“‘a twice quoted subtitle’”", "“‘A Twice Quoted Subtitle’”"], ["have you read “The Lottery”?", "Have You Read “The Lottery”?"], ["one: two", "One: Two"], ["one two: three four", "One Two: Three Four"], ['one two: "Three Four"', 'One Two: "Three Four"'], + ["one on: an end", "One On: An End"], + ['one on: "an end"', 'One On: "An End"'], ["email email@example.com address", "Email email@example.com Address"], [ "you have an https://example.com/ title", @@ -42,10 +59,14 @@ const TEST_CASES: [string, string, Options?][] = [ ], ["_underscores around words_", "_Underscores Around Words_"], ["*asterisks around words*", "*Asterisks Around Words*"], - ["this vs. that", "This vs. That"], ["this vs that", "This vs That"], - ["this v. that", "This v. That"], + ["this *vs* that", "This *vs* That"], ["this v that", "This v That"], + // Contractions with a period are not supported due to sentence support. + // It's difficult to tell if a period is part of a contraction or not. + ["this vs. that", "This Vs. That"], + ["this v. that", "This V. That"], + ["", ""], [ "Scott Moritz and TheStreet.com’s million iPhone la-la land", "Scott Moritz and TheStreet.com’s Million iPhone La-La Land", @@ -54,6 +75,7 @@ const TEST_CASES: [string, string, Options?][] = [ "Notes and observations regarding Apple’s announcements from ‘The Beat Goes On’ special event", "Notes and Observations Regarding Apple’s Announcements From ‘The Beat Goes On’ Special Event", ], + ["2018", "2018"], [ "the quick brown fox jumps over the lazy dog", "The Quick Brown Fox Jumps over the Lazy Dog", @@ -76,10 +98,16 @@ const TEST_CASES: [string, string, Options?][] = [ ["the iPhone: a quote", "The iPhone: A Quote"], ["the iPhone: a quote", "The iPhone: a quote", { sentenceCase: true }], ["the U.N. and me", "The U.N. and Me"], + ["the *U.N.* and me", "The *U.N.* and Me"], ["the U.N. and me", "The U.N. and me", { sentenceCase: true }], ["the U.N. and me", "The U.N. And Me", { smallWords: new Set() }], ["start-and-end", "Start-and-End"], ["go-to-iPhone", "Go-to-iPhone"], + ["the go-to", "The Go-To"], + ["the go-to", "The go-to", { sentenceCase: true }], + ["this to-go", "This To-Go"], + ["test(ing)", "Test(ing)"], + ["test(s)", "Test(s)"], ["Keep #tag", "Keep #tag"], ['"Hello world", says John.', '"Hello World", Says John.'], [ @@ -87,6 +115,27 @@ const TEST_CASES: [string, string, Options?][] = [ '"Hello world", says John.', { sentenceCase: true }, ], + ["foo/bar", "Foo/Bar"], + ["this is the *end.*", "This Is the *End.*"], + ["*something about me?* and you.", "*Something About Me?* And You."], + [ + "*something about me?* and you.", + "*Something about me?* And you.", + { sentenceCase: true }, + ], + ["something about _me-too?_ and you.", "Something About _Me-Too?_ And You."], + ["something about _me_? and you.", "Something About _Me_? And You."], + [ + "something about _me_? and you.", + "Something about _me_? And you.", + { sentenceCase: true }, + ], + [ + "something about _me-too_? and you too.", + "Something About _Me-Too_? And You Too.", + ], + ["an example. i.e. test.", "An Example. I.e. Test."], + ['an example. "i.e. test."', 'An Example. "I.e. Test."'], ]; describe("swap case", () => { diff --git a/packages/title-case/src/index.ts b/packages/title-case/src/index.ts index 69431999..0ac0f080 100644 --- a/packages/title-case/src/index.ts +++ b/packages/title-case/src/index.ts @@ -1,8 +1,9 @@ const TOKENS = /(\S+)|(.)/g; -const IS_SPECIAL_CASE = /[\.#]\p{L}/u; // #tag, example.com, etc. +const IS_SPECIAL_CASE = /[\.#]\p{Alphabetic}/u; // #tag, example.com, etc. const IS_MANUAL_CASE = /\p{Ll}(?=[\p{Lu}])/u; // iPhone, iOS, etc. -const ALPHANUMERIC_PATTERN = /[\p{L}\d]+/gu; -const IS_ACRONYM = /(?:\p{Lu}\.){2,}$/u; +const ALPHANUMERIC_PATTERN = /\p{Alphabetic}+/gu; +const IS_ACRONYM = + /^(\P{Alphabetic})*(?:\p{Alphabetic}\.){2,}(\P{Alphabetic})*$/u; export const WORD_SEPARATORS = new Set(["—", "–", "-", "―", "/"]); @@ -94,61 +95,83 @@ export function titleCase( // Ignore URLs, email addresses, acronyms, etc. if (IS_SPECIAL_CASE.test(token)) { - result += token; - - // The period at the end of an acronym is not a new sentence. - if (IS_ACRONYM.test(token)) { - isNewSentence = false; + const acronym = token.match(IS_ACRONYM); + + // The period at the end of an acronym is not a new sentence, + // but we should uppercase first for i.e., e.g., etc. + if (acronym) { + const [_, prefix = "", suffix = ""] = acronym; + result += upperAt(token, prefix.length, locale); + isNewSentence = terminators.has(suffix.charAt(0)); continue; } + + result += token; + isNewSentence = terminators.has(token.charAt(token.length - 1)); } else { const matches = Array.from(token.matchAll(ALPHANUMERIC_PATTERN)); let value = token; + let isSentenceEnd = false; for (let i = 0; i < matches.length; i++) { const { 0: word, index: wordIndex = 0 } = matches[i]; + const nextChar = token.charAt(wordIndex + word.length); + + isSentenceEnd = terminators.has(nextChar); - // Reset "new sentence" when we find a word. + // Always the capitalize first word and reset "new sentence". if (isNewSentence) { isNewSentence = false; - } else { - // Skip capitalizing all words if sentence case is enabled. - if (sentenceCase) { + } + // Skip capitalizing all words if sentence case is enabled. + else if (sentenceCase || IS_MANUAL_CASE.test(word)) { + continue; + } + // Handle simple words. + else if (matches.length === 1) { + // Avoid capitalizing small words, except at the end of a sentence. + if (smallWords.has(word)) { + const isFinalToken = index + token.length === input.length; + + if (!isFinalToken && !isSentenceEnd) { + continue; + } + } + } + // Multi-word tokens need to be parsed differently. + else if (i > 0) { + // Avoid capitalizing words without a valid word separator, + // e.g. "apple's" or "test(ing)". + if (!wordSeparators.has(token.charAt(wordIndex - 1))) { continue; } - // Ignore small words except at beginning or end, - // or previous token is a new sentence. - if ( - smallWords.has(word) && - // Not the final token and word. - !(index + token.length === input.length && i === matches.length - 1) - ) { + // Ignore small words in the middle of hyphenated words. + if (smallWords.has(word) && wordSeparators.has(nextChar)) { continue; } } - if (IS_MANUAL_CASE.test(word)) { - continue; - } - - // Only capitalize words after a valid word separator. - if (i > 0 && !wordSeparators.has(token.charAt(wordIndex - 1))) { - continue; - } - - value = - value.slice(0, wordIndex) + - value.charAt(wordIndex).toLocaleUpperCase(locale) + - value.slice(wordIndex + 1); + value = upperAt(value, wordIndex, locale); } result += value; + isNewSentence = + isSentenceEnd || terminators.has(token.charAt(token.length - 1)); } - - const lastChar = token.charAt(token.length - 1); - isNewSentence = terminators.has(lastChar); } return result; } + +function upperAt( + input: string, + index: number, + locale: string | string[] | undefined, +) { + return ( + input.slice(0, index) + + input.charAt(index).toLocaleUpperCase(locale) + + input.slice(index + 1) + ); +}