Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Report RegExp errors in grammar check, use Annex B grammar #58295

Merged
merged 7 commits into from
Apr 24, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 40 additions & 1 deletion src/compiler/checker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ import {
countWhere,
createBinaryExpressionTrampoline,
createCompilerDiagnostic,
createDetachedDiagnostic,
createDiagnosticCollection,
createDiagnosticForFileFromMessageChain,
createDiagnosticForNode,
Expand All @@ -123,6 +124,7 @@ import {
createPrinterWithRemoveCommentsNeverAsciiEscape,
createPrinterWithRemoveCommentsOmitTrailingSemicolon,
createPropertyNameNodeForIdentifierOrLiteral,
createScanner,
createSymbolTable,
createSyntacticTypeNodeBuilder,
createTextWriter,
Expand Down Expand Up @@ -937,6 +939,7 @@ import {
rangeOfTypeParameters,
ReadonlyKeyword,
reduceLeft,
RegularExpressionLiteral,
RelationComparisonResult,
relativeComplement,
removeExtension,
Expand Down Expand Up @@ -31353,6 +31356,42 @@ export function createTypeChecker(host: TypeCheckerHost): TypeChecker {
}
}

function checkGrammarRegularExpressionLiteral(node: RegularExpressionLiteral) {
const sourceFile = getSourceFileOfNode(node);
if (!hasParseDiagnostics(sourceFile)) {
let lastError: DiagnosticWithLocation | undefined;
const scanner = createScanner(
rbuckton marked this conversation as resolved.
Show resolved Hide resolved
sourceFile.languageVersion,
/*skipTrivia*/ true,
sourceFile.languageVariant,
sourceFile.text,
(message, length, arg0) => {
// emulate `parseErrorAtPosition` from parser.ts
const start = scanner.getTokenEnd();
if (message.category === DiagnosticCategory.Message && lastError && start === lastError.start && length === lastError.length) {
const error = createDetachedDiagnostic(sourceFile.fileName, sourceFile.text, start, length, message, arg0);
addRelatedInfo(lastError, error);
}
else if (!lastError || start !== lastError.start) {
lastError = createFileDiagnostic(sourceFile, start, length, message, arg0);
diagnostics.add(lastError);
}
},
node.pos,
node.end - node.pos,
);
Debug.assert(scanner.scan() === SyntaxKind.SlashToken);
Debug.assert(scanner.reScanSlashToken(/*reportErrors*/ true) === SyntaxKind.RegularExpressionLiteral);
return !!lastError;
}
return false;
}

function checkRegularExpressionLiteral(node: RegularExpressionLiteral) {
checkGrammarRegularExpressionLiteral(node);
rbuckton marked this conversation as resolved.
Show resolved Hide resolved
return globalRegExpType;
}

function checkSpreadExpression(node: SpreadElement, checkMode?: CheckMode): Type {
if (languageVersion < LanguageFeatureMinimumTarget.SpreadElements) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you can actually reuse NodeCheckFlags.TypeChecked for this, if you'd like. It's how we do similar marking for getter/setter pairs, and nodes with a checkNodeDeferred component.

checkExternalEmitHelpers(node, compilerOptions.downlevelIteration ? ExternalEmitHelpers.SpreadIncludes : ExternalEmitHelpers.SpreadArray);
Expand Down Expand Up @@ -39662,7 +39701,7 @@ export function createTypeChecker(host: TypeCheckerHost): TypeChecker {
case SyntaxKind.TemplateExpression:
return checkTemplateExpression(node as TemplateExpression);
case SyntaxKind.RegularExpressionLiteral:
return globalRegExpType;
return checkRegularExpressionLiteral(node as RegularExpressionLiteral);
case SyntaxKind.ArrayLiteralExpression:
return checkArrayLiteral(node as ArrayLiteralExpression, checkMode, forceTuple);
case SyntaxKind.ObjectLiteralExpression:
Expand Down
107 changes: 68 additions & 39 deletions src/compiler/scanner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ export interface Scanner {
getTokenFlags(): TokenFlags;
reScanGreaterToken(): SyntaxKind;
reScanSlashToken(): SyntaxKind;
/** @internal */
reScanSlashToken(reportErrors?: boolean): SyntaxKind; // eslint-disable-line @typescript-eslint/unified-signatures
reScanAsteriskEqualsToken(): SyntaxKind;
reScanTemplateToken(isTaggedTemplate: boolean): SyntaxKind;
/** @deprecated use {@link reScanTemplateToken}(false) */
Expand Down Expand Up @@ -1484,7 +1486,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
// | [0-3] [0-7] [0-7]?
// | [4-7] [0-7]
// NonOctalDecimalEscapeSequence ::= [89]
function scanEscapeSequence(shouldEmitInvalidEscapeError: boolean, isRegularExpression: boolean): string {
function scanEscapeSequence(shouldEmitInvalidEscapeError: boolean, isRegularExpression: boolean | "annex-b"): string {
rbuckton marked this conversation as resolved.
Show resolved Hide resolved
const start = pos;
pos++;
if (pos >= end) {
Expand Down Expand Up @@ -1523,7 +1525,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
tokenFlags |= TokenFlags.ContainsInvalidEscape;
if (isRegularExpression || shouldEmitInvalidEscapeError) {
const code = parseInt(text.substring(start + 1, pos), 8);
error(Diagnostics.Octal_escape_sequences_are_not_allowed_Use_the_syntax_0, start, pos - start, "\\x" + code.toString(16).padStart(2, "0"));
if (isRegularExpression !== "annex-b") {
error(Diagnostics.Octal_escape_sequences_are_not_allowed_Use_the_syntax_0, start, pos - start, "\\x" + code.toString(16).padStart(2, "0"));
}
return String.fromCharCode(code);
}
return text.substring(start, pos);
Expand Down Expand Up @@ -1559,7 +1563,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
) {
// '\u{DDDDDD}'
pos -= 2;
return scanExtendedUnicodeEscape(isRegularExpression || shouldEmitInvalidEscapeError);
return scanExtendedUnicodeEscape(!!isRegularExpression || shouldEmitInvalidEscapeError);
}
// '\uDDDD'
for (; pos < start + 6; pos++) {
Expand Down Expand Up @@ -1623,7 +1627,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
case CharacterCodes.paragraphSeparator:
return "";
default:
if (isRegularExpression && (shouldEmitInvalidEscapeError || isIdentifierPart(ch, languageVersion))) {
if (isRegularExpression === true && (shouldEmitInvalidEscapeError || isIdentifierPart(ch, languageVersion))) {
error(Diagnostics.This_character_cannot_be_escaped_in_a_regular_expression, pos - 2, 2);
}
return String.fromCharCode(ch);
Expand Down Expand Up @@ -2386,7 +2390,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
return token = SyntaxKind.EqualsToken;
}

function reScanSlashToken(): SyntaxKind {
function reScanSlashToken(reportErrors?: boolean): SyntaxKind {
if (token === SyntaxKind.SlashToken || token === SyntaxKind.SlashEqualsToken) {
// Quickly get to the end of regex such that we know the flags
let p = tokenStart + 1;
Expand Down Expand Up @@ -2444,44 +2448,57 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
if (!isIdentifierPart(ch, languageVersion)) {
break;
}
const flag = characterToRegularExpressionFlag(String.fromCharCode(ch));
if (flag === undefined) {
error(Diagnostics.Unknown_regular_expression_flag, p, 1);
}
else if (regExpFlags & flag) {
error(Diagnostics.Duplicate_regular_expression_flag, p, 1);
}
else if (((regExpFlags | flag) & RegularExpressionFlags.UnicodeMode) === RegularExpressionFlags.UnicodeMode) {
error(Diagnostics.The_Unicode_u_flag_and_the_Unicode_Sets_v_flag_cannot_be_set_simultaneously, p, 1);
}
else {
regExpFlags |= flag;
const availableFrom = regExpFlagToFirstAvailableLanguageVersion.get(flag)!;
if (languageVersion < availableFrom) {
error(Diagnostics.This_regular_expression_flag_is_only_available_when_targeting_0_or_later, p, 1, getNameOfScriptTarget(availableFrom));
if (reportErrors) {
const flag = characterToRegularExpressionFlag(String.fromCharCode(ch));
if (flag === undefined) {
error(Diagnostics.Unknown_regular_expression_flag, p, 1);
}
else if (regExpFlags & flag) {
error(Diagnostics.Duplicate_regular_expression_flag, p, 1);
}
else if (((regExpFlags | flag) & RegularExpressionFlags.UnicodeMode) === RegularExpressionFlags.UnicodeMode) {
error(Diagnostics.The_Unicode_u_flag_and_the_Unicode_Sets_v_flag_cannot_be_set_simultaneously, p, 1);
}
else {
regExpFlags |= flag;
const availableFrom = regExpFlagToFirstAvailableLanguageVersion.get(flag)!;
if (languageVersion < availableFrom) {
error(Diagnostics.This_regular_expression_flag_is_only_available_when_targeting_0_or_later, p, 1, getNameOfScriptTarget(availableFrom));
}
}
}
p++;
}
pos = tokenStart + 1;
const saveTokenPos = tokenStart;
const saveTokenFlags = tokenFlags;
scanRegularExpressionWorker(text, endOfBody, regExpFlags, isUnterminated);
if (!isUnterminated) {
if (reportErrors) {
pos = tokenStart + 1;
const saveTokenPos = tokenStart;
const saveTokenFlags = tokenFlags;
scanRegularExpressionWorker(text, endOfBody, regExpFlags, isUnterminated, /*annexB*/ true);
if (!isUnterminated) {
pos = p;
}
tokenStart = saveTokenPos;
tokenFlags = saveTokenFlags;
}
else {
pos = p;
}
tokenStart = saveTokenPos;
tokenFlags = saveTokenFlags;
tokenValue = text.substring(tokenStart, pos);
token = SyntaxKind.RegularExpressionLiteral;
}
return token;

function scanRegularExpressionWorker(text: string, end: number, regExpFlags: RegularExpressionFlags, isUnterminated: boolean) {
/** Grammar parameter */
const unicodeMode = !!(regExpFlags & RegularExpressionFlags.UnicodeMode);
function scanRegularExpressionWorker(text: string, end: number, regExpFlags: RegularExpressionFlags, isUnterminated: boolean, annexB: boolean) {
/** Grammar parameter */
const unicodeSetsMode = !!(regExpFlags & RegularExpressionFlags.UnicodeSets);
/** Grammar parameter */
const unicodeMode = !!(regExpFlags & RegularExpressionFlags.UnicodeMode);

if (unicodeMode) {
// Annex B treats any unicode mode as the strict syntax.
annexB = false;
}

/** @see {scanClassSetExpression} */
let mayContainStrings = false;

Expand Down Expand Up @@ -2571,7 +2588,8 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
case CharacterCodes.equals:
case CharacterCodes.exclamation:
pos++;
isPreviousTermQuantifiable = false;
// In Annex B, `(?=Disjunction)` and `(?!Disjunciton)` are quantifiable
rbuckton marked this conversation as resolved.
Show resolved Hide resolved
isPreviousTermQuantifiable = annexB;
break;
case CharacterCodes.lessThan:
const groupNameStart = pos;
Expand Down Expand Up @@ -2763,7 +2781,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
default:
// The scanEscapeSequence call in scanCharacterEscape must return non-empty strings
// since there must not be line breaks in a regex literal
Debug.assert(scanCharacterClassEscape() || scanDecimalEscape() || scanCharacterEscape());
Debug.assert(scanCharacterClassEscape() || scanDecimalEscape() || scanCharacterEscape(/*atomEscape*/ true));
break;
}
}
Expand All @@ -2788,7 +2806,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
// IdentityEscape ::=
// | '^' | '$' | '/' | '\' | '.' | '*' | '+' | '?' | '(' | ')' | '[' | ']' | '{' | '}' | '|'
// | [~UnicodeMode] (any other non-identifier characters)
function scanCharacterEscape(): string {
function scanCharacterEscape(atomEscape: boolean): string {
Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.backslash);
let ch = text.charCodeAt(pos);
switch (ch) {
Expand All @@ -2802,6 +2820,15 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
if (unicodeMode) {
error(Diagnostics.c_must_be_followed_by_an_ASCII_letter, pos - 2, 2);
}
else if (atomEscape && annexB) {
// Annex B treats
//
// ExtendedAtom : `\` [lookahead = `c`]
//
// as the single character `\` when `c` isn't followed by a valid control character
pos--;
return "\\";
}
return String.fromCharCode(ch);
case CharacterCodes.caret:
case CharacterCodes.$:
Expand All @@ -2826,7 +2853,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
return "\\";
}
pos--;
return scanEscapeSequence(/*shouldEmitInvalidEscapeError*/ unicodeMode, /*isRegularExpression*/ true);
return scanEscapeSequence(/*shouldEmitInvalidEscapeError*/ unicodeMode, /*isRegularExpression*/ annexB ? "annex-b" : true);
}
}

Expand Down Expand Up @@ -2873,12 +2900,12 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
if (isClassContentExit(ch)) {
return;
}
if (!minCharacter) {
if (!minCharacter && !annexB) {
error(Diagnostics.A_character_class_range_must_not_be_bounded_by_another_character_class, minStart, pos - 1 - minStart);
}
const maxStart = pos;
const maxCharacter = scanClassAtom();
if (!maxCharacter) {
if (!maxCharacter && !annexB) {
error(Diagnostics.A_character_class_range_must_not_be_bounded_by_another_character_class, maxStart, pos - maxStart);
continue;
}
Expand Down Expand Up @@ -3208,7 +3235,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
pos++;
return String.fromCharCode(ch);
default:
return scanCharacterEscape();
return scanCharacterEscape(/*atomEscape*/ false);
}
}
else if (ch === text.charCodeAt(pos + 1)) {
Expand Down Expand Up @@ -3275,7 +3302,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
if (scanCharacterClassEscape()) {
return "";
}
return scanCharacterEscape();
return scanCharacterEscape(/*atomEscape*/ false);
}
}
else {
Expand Down Expand Up @@ -3407,7 +3434,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
}
});
forEach(decimalEscapes, escape => {
if (escape.value > numberOfCapturingGroups) {
// in AnnexB, if a DecimalEscape is greater than the number of capturing groups then it is treated as
// either a LegacyOctalEscapeSequence or IdentityEscape
if (!annexB && escape.value > numberOfCapturingGroups) {
if (numberOfCapturingGroups) {
error(Diagnostics.A_decimal_escape_must_refer_to_an_existent_capturing_group_There_are_only_0_capturing_groups_in_this_regular_expression, escape.pos, escape.end - escape.pos, numberOfCapturingGroups);
}
Expand Down
2 changes: 1 addition & 1 deletion src/testRunner/unittests/incrementalParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ describe("unittests:: Incremental Parser", () => {
const oldText = ts.ScriptSnapshot.fromString(source);
const newTextAndChange = withInsert(oldText, semicolonIndex, "/");

compareTrees(oldText, newTextAndChange.text, newTextAndChange.textChangeRange, 4);
compareTrees(oldText, newTextAndChange.text, newTextAndChange.textChangeRange, 0);
rbuckton marked this conversation as resolved.
Show resolved Hide resolved
});

it("Regular expression 2", () => {
Expand Down
7 changes: 0 additions & 7 deletions tests/baselines/reference/parserRegularExpression1.errors.txt

This file was deleted.

4 changes: 2 additions & 2 deletions tests/baselines/reference/parserRegularExpression1.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//// [tests/cases/conformance/parser/ecmascript5/RegularExpressions/parserRegularExpression1.ts] ////

//// [parserRegularExpression1.ts]
return /(#?-?\d*\.\d\w*%?)|(@?#?[\w-?]+%?)/g;
/(#?-?\d*\.\d\w*%?)|(@?#?[\w-?]+%?)/g;

//// [parserRegularExpression1.js]
return /(#?-?\d*\.\d\w*%?)|(@?#?[\w-?]+%?)/g;
/(#?-?\d*\.\d\w*%?)|(@?#?[\w-?]+%?)/g;
2 changes: 1 addition & 1 deletion tests/baselines/reference/parserRegularExpression1.symbols
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@

=== parserRegularExpression1.ts ===

return /(#?-?\d*\.\d\w*%?)|(@?#?[\w-?]+%?)/g;
/(#?-?\d*\.\d\w*%?)|(@?#?[\w-?]+%?)/g;
2 changes: 1 addition & 1 deletion tests/baselines/reference/parserRegularExpression1.types
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//// [tests/cases/conformance/parser/ecmascript5/RegularExpressions/parserRegularExpression1.ts] ////

=== parserRegularExpression1.ts ===
return /(#?-?\d*\.\d\w*%?)|(@?#?[\w-?]+%?)/g;
/(#?-?\d*\.\d\w*%?)|(@?#?[\w-?]+%?)/g;
>/(#?-?\d*\.\d\w*%?)|(@?#?[\w-?]+%?)/g : RegExp
> : ^^^^^^

Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
parserRegularExpressionDivideAmbiguity4.ts(1,1): error TS2304: Cannot find name 'foo'.
parserRegularExpressionDivideAmbiguity4.ts(1,6): error TS1161: Unterminated regular expression literal.
parserRegularExpressionDivideAmbiguity4.ts(1,17): error TS1005: ')' expected.


==== parserRegularExpressionDivideAmbiguity4.ts (2 errors) ====
==== parserRegularExpressionDivideAmbiguity4.ts (3 errors) ====
foo(/notregexp);
~~~
!!! error TS2304: Cannot find name 'foo'.

!!! error TS1161: Unterminated regular expression literal.
!!! error TS1161: Unterminated regular expression literal.

!!! error TS1005: ')' expected.
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
foo(/notregexp);

//// [parserRegularExpressionDivideAmbiguity4.js]
foo(/notregexp);
foo(/notregexp););
Loading