Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Report RegExp errors in grammar check, use Annex B grammar #58295

Merged
merged 7 commits into from
Apr 24, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 48 additions & 1 deletion src/compiler/checker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ import {
countWhere,
createBinaryExpressionTrampoline,
createCompilerDiagnostic,
createDetachedDiagnostic,
createDiagnosticCollection,
createDiagnosticForFileFromMessageChain,
createDiagnosticForNode,
Expand All @@ -123,6 +124,7 @@ import {
createPrinterWithRemoveCommentsNeverAsciiEscape,
createPrinterWithRemoveCommentsOmitTrailingSemicolon,
createPropertyNameNodeForIdentifierOrLiteral,
createScanner,
createSymbolTable,
createSyntacticTypeNodeBuilder,
createTextWriter,
Expand Down Expand Up @@ -937,6 +939,7 @@ import {
rangeOfTypeParameters,
ReadonlyKeyword,
reduceLeft,
RegularExpressionLiteral,
RelationComparisonResult,
relativeComplement,
removeExtension,
Expand All @@ -953,6 +956,7 @@ import {
ReverseMappedType,
sameMap,
SatisfiesExpression,
Scanner,
scanTokenAtPosition,
ScriptKind,
ScriptTarget,
Expand Down Expand Up @@ -1446,6 +1450,7 @@ export function createTypeChecker(host: TypeCheckerHost): TypeChecker {
var requestedExternalEmitHelperNames = new Set<string>();
var requestedExternalEmitHelpers: ExternalEmitHelpers;
var externalHelpersModule: Symbol;
var scanner: Scanner | undefined;

var Symbol = objectAllocator.getSymbolConstructor();
var Type = objectAllocator.getTypeConstructor();
Expand Down Expand Up @@ -31353,6 +31358,48 @@ export function createTypeChecker(host: TypeCheckerHost): TypeChecker {
}
}

function checkGrammarRegularExpressionLiteral(node: RegularExpressionLiteral) {
const sourceFile = getSourceFileOfNode(node);
if (!hasParseDiagnostics(sourceFile)) {
rbuckton marked this conversation as resolved.
Show resolved Hide resolved
let lastError: DiagnosticWithLocation | undefined;
scanner ??= createScanner(ScriptTarget.ESNext, /*skipTrivia*/ true);
scanner.setScriptTarget(sourceFile.languageVersion);
scanner.setLanguageVariant(sourceFile.languageVariant);
scanner.setOnError((message, length, arg0) => {
// emulate `parseErrorAtPosition` from parser.ts
const start = scanner!.getTokenEnd();
if (message.category === DiagnosticCategory.Message && lastError && start === lastError.start && length === lastError.length) {
const error = createDetachedDiagnostic(sourceFile.fileName, sourceFile.text, start, length, message, arg0);
addRelatedInfo(lastError, error);
}
else if (!lastError || start !== lastError.start) {
lastError = createFileDiagnostic(sourceFile, start, length, message, arg0);
diagnostics.add(lastError);
}
});
scanner.setText(sourceFile.text, node.pos, node.end - node.pos);
try {
scanner.scan();
Debug.assert(scanner.reScanSlashToken(/*reportErrors*/ true) === SyntaxKind.RegularExpressionLiteral, "Expected scanner to rescan RegularExpressionLiteral");
return !!lastError;
}
finally {
scanner.setText("");
scanner.setOnError(/*onError*/ undefined);
}
}
return false;
rbuckton marked this conversation as resolved.
Show resolved Hide resolved
}

function checkRegularExpressionLiteral(node: RegularExpressionLiteral) {
const nodeLinks = getNodeLinks(node);
if (!nodeLinks.grammarChecked) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you can actually reuse NodeCheckFlags.TypeChecked for this, if you'd like. It's how we do similar marking for getter/setter pairs, and nodes with a checkNodeDeferred component.

addLazyDiagnostic(() => checkGrammarRegularExpressionLiteral(node));
nodeLinks.grammarChecked = true;
}
return globalRegExpType;
}

function checkSpreadExpression(node: SpreadElement, checkMode?: CheckMode): Type {
if (languageVersion < LanguageFeatureMinimumTarget.SpreadElements) {
checkExternalEmitHelpers(node, compilerOptions.downlevelIteration ? ExternalEmitHelpers.SpreadIncludes : ExternalEmitHelpers.SpreadArray);
Expand Down Expand Up @@ -39662,7 +39709,7 @@ export function createTypeChecker(host: TypeCheckerHost): TypeChecker {
case SyntaxKind.TemplateExpression:
return checkTemplateExpression(node as TemplateExpression);
case SyntaxKind.RegularExpressionLiteral:
return globalRegExpType;
return checkRegularExpressionLiteral(node as RegularExpressionLiteral);
case SyntaxKind.ArrayLiteralExpression:
return checkArrayLiteral(node as ArrayLiteralExpression, checkMode, forceTuple);
case SyntaxKind.ObjectLiteralExpression:
Expand Down
107 changes: 68 additions & 39 deletions src/compiler/scanner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ export interface Scanner {
getTokenFlags(): TokenFlags;
reScanGreaterToken(): SyntaxKind;
reScanSlashToken(): SyntaxKind;
/** @internal */
reScanSlashToken(reportErrors?: boolean): SyntaxKind; // eslint-disable-line @typescript-eslint/unified-signatures
reScanAsteriskEqualsToken(): SyntaxKind;
reScanTemplateToken(isTaggedTemplate: boolean): SyntaxKind;
/** @deprecated use {@link reScanTemplateToken}(false) */
Expand Down Expand Up @@ -1484,7 +1486,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
// | [0-3] [0-7] [0-7]?
// | [4-7] [0-7]
// NonOctalDecimalEscapeSequence ::= [89]
function scanEscapeSequence(shouldEmitInvalidEscapeError: boolean, isRegularExpression: boolean): string {
function scanEscapeSequence(shouldEmitInvalidEscapeError: boolean, isRegularExpression: boolean | "annex-b"): string {
rbuckton marked this conversation as resolved.
Show resolved Hide resolved
const start = pos;
pos++;
if (pos >= end) {
Expand Down Expand Up @@ -1523,7 +1525,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
tokenFlags |= TokenFlags.ContainsInvalidEscape;
if (isRegularExpression || shouldEmitInvalidEscapeError) {
const code = parseInt(text.substring(start + 1, pos), 8);
error(Diagnostics.Octal_escape_sequences_are_not_allowed_Use_the_syntax_0, start, pos - start, "\\x" + code.toString(16).padStart(2, "0"));
if (isRegularExpression !== "annex-b") {
error(Diagnostics.Octal_escape_sequences_are_not_allowed_Use_the_syntax_0, start, pos - start, "\\x" + code.toString(16).padStart(2, "0"));
}
return String.fromCharCode(code);
}
return text.substring(start, pos);
Expand Down Expand Up @@ -1559,7 +1563,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
) {
// '\u{DDDDDD}'
pos -= 2;
return scanExtendedUnicodeEscape(isRegularExpression || shouldEmitInvalidEscapeError);
return scanExtendedUnicodeEscape(!!isRegularExpression || shouldEmitInvalidEscapeError);
}
// '\uDDDD'
for (; pos < start + 6; pos++) {
Expand Down Expand Up @@ -1623,7 +1627,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
case CharacterCodes.paragraphSeparator:
return "";
default:
if (isRegularExpression && (shouldEmitInvalidEscapeError || isIdentifierPart(ch, languageVersion))) {
if (isRegularExpression === true && (shouldEmitInvalidEscapeError || isIdentifierPart(ch, languageVersion))) {
error(Diagnostics.This_character_cannot_be_escaped_in_a_regular_expression, pos - 2, 2);
}
return String.fromCharCode(ch);
Expand Down Expand Up @@ -2386,7 +2390,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
return token = SyntaxKind.EqualsToken;
}

function reScanSlashToken(): SyntaxKind {
function reScanSlashToken(reportErrors?: boolean): SyntaxKind {
if (token === SyntaxKind.SlashToken || token === SyntaxKind.SlashEqualsToken) {
// Quickly get to the end of regex such that we know the flags
let p = tokenStart + 1;
Expand Down Expand Up @@ -2444,44 +2448,57 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
if (!isIdentifierPart(ch, languageVersion)) {
break;
}
const flag = characterToRegularExpressionFlag(String.fromCharCode(ch));
if (flag === undefined) {
error(Diagnostics.Unknown_regular_expression_flag, p, 1);
}
else if (regExpFlags & flag) {
error(Diagnostics.Duplicate_regular_expression_flag, p, 1);
}
else if (((regExpFlags | flag) & RegularExpressionFlags.UnicodeMode) === RegularExpressionFlags.UnicodeMode) {
error(Diagnostics.The_Unicode_u_flag_and_the_Unicode_Sets_v_flag_cannot_be_set_simultaneously, p, 1);
}
else {
regExpFlags |= flag;
const availableFrom = regExpFlagToFirstAvailableLanguageVersion.get(flag)!;
if (languageVersion < availableFrom) {
error(Diagnostics.This_regular_expression_flag_is_only_available_when_targeting_0_or_later, p, 1, getNameOfScriptTarget(availableFrom));
if (reportErrors) {
const flag = characterToRegularExpressionFlag(String.fromCharCode(ch));
if (flag === undefined) {
error(Diagnostics.Unknown_regular_expression_flag, p, 1);
}
else if (regExpFlags & flag) {
error(Diagnostics.Duplicate_regular_expression_flag, p, 1);
}
else if (((regExpFlags | flag) & RegularExpressionFlags.UnicodeMode) === RegularExpressionFlags.UnicodeMode) {
error(Diagnostics.The_Unicode_u_flag_and_the_Unicode_Sets_v_flag_cannot_be_set_simultaneously, p, 1);
}
else {
regExpFlags |= flag;
const availableFrom = regExpFlagToFirstAvailableLanguageVersion.get(flag)!;
if (languageVersion < availableFrom) {
error(Diagnostics.This_regular_expression_flag_is_only_available_when_targeting_0_or_later, p, 1, getNameOfScriptTarget(availableFrom));
}
}
}
p++;
}
pos = tokenStart + 1;
const saveTokenPos = tokenStart;
const saveTokenFlags = tokenFlags;
scanRegularExpressionWorker(text, endOfBody, regExpFlags, isUnterminated);
if (!isUnterminated) {
if (reportErrors) {
pos = tokenStart + 1;
const saveTokenPos = tokenStart;
const saveTokenFlags = tokenFlags;
scanRegularExpressionWorker(text, endOfBody, regExpFlags, isUnterminated, /*annexB*/ true);
if (!isUnterminated) {
pos = p;
}
tokenStart = saveTokenPos;
tokenFlags = saveTokenFlags;
}
else {
pos = p;
}
tokenStart = saveTokenPos;
tokenFlags = saveTokenFlags;
tokenValue = text.substring(tokenStart, pos);
token = SyntaxKind.RegularExpressionLiteral;
}
return token;

function scanRegularExpressionWorker(text: string, end: number, regExpFlags: RegularExpressionFlags, isUnterminated: boolean) {
/** Grammar parameter */
const unicodeMode = !!(regExpFlags & RegularExpressionFlags.UnicodeMode);
function scanRegularExpressionWorker(text: string, end: number, regExpFlags: RegularExpressionFlags, isUnterminated: boolean, annexB: boolean) {
/** Grammar parameter */
const unicodeSetsMode = !!(regExpFlags & RegularExpressionFlags.UnicodeSets);
/** Grammar parameter */
const unicodeMode = !!(regExpFlags & RegularExpressionFlags.UnicodeMode);

if (unicodeMode) {
// Annex B treats any unicode mode as the strict syntax.
annexB = false;
}

/** @see {scanClassSetExpression} */
let mayContainStrings = false;

Expand Down Expand Up @@ -2571,7 +2588,8 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
case CharacterCodes.equals:
case CharacterCodes.exclamation:
pos++;
isPreviousTermQuantifiable = false;
// In Annex B, `(?=Disjunction)` and `(?!Disjunction)` are quantifiable
isPreviousTermQuantifiable = annexB;
break;
case CharacterCodes.lessThan:
const groupNameStart = pos;
Expand Down Expand Up @@ -2763,7 +2781,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
default:
// The scanEscapeSequence call in scanCharacterEscape must return non-empty strings
// since there must not be line breaks in a regex literal
Debug.assert(scanCharacterClassEscape() || scanDecimalEscape() || scanCharacterEscape());
Debug.assert(scanCharacterClassEscape() || scanDecimalEscape() || scanCharacterEscape(/*atomEscape*/ true));
break;
}
}
Expand All @@ -2788,7 +2806,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
// IdentityEscape ::=
// | '^' | '$' | '/' | '\' | '.' | '*' | '+' | '?' | '(' | ')' | '[' | ']' | '{' | '}' | '|'
// | [~UnicodeMode] (any other non-identifier characters)
function scanCharacterEscape(): string {
function scanCharacterEscape(atomEscape: boolean): string {
Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.backslash);
let ch = text.charCodeAt(pos);
switch (ch) {
Expand All @@ -2802,6 +2820,15 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
if (unicodeMode) {
error(Diagnostics.c_must_be_followed_by_an_ASCII_letter, pos - 2, 2);
}
else if (atomEscape && annexB) {
// Annex B treats
//
// ExtendedAtom : `\` [lookahead = `c`]
//
// as the single character `\` when `c` isn't followed by a valid control character
pos--;
return "\\";
}
return String.fromCharCode(ch);
case CharacterCodes.caret:
case CharacterCodes.$:
Expand All @@ -2826,7 +2853,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
return "\\";
}
pos--;
return scanEscapeSequence(/*shouldEmitInvalidEscapeError*/ unicodeMode, /*isRegularExpression*/ true);
return scanEscapeSequence(/*shouldEmitInvalidEscapeError*/ unicodeMode, /*isRegularExpression*/ annexB ? "annex-b" : true);
}
}

Expand Down Expand Up @@ -2873,12 +2900,12 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
if (isClassContentExit(ch)) {
return;
}
if (!minCharacter) {
if (!minCharacter && !annexB) {
error(Diagnostics.A_character_class_range_must_not_be_bounded_by_another_character_class, minStart, pos - 1 - minStart);
}
const maxStart = pos;
const maxCharacter = scanClassAtom();
if (!maxCharacter) {
if (!maxCharacter && !annexB) {
error(Diagnostics.A_character_class_range_must_not_be_bounded_by_another_character_class, maxStart, pos - maxStart);
continue;
}
Expand Down Expand Up @@ -3208,7 +3235,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
pos++;
return String.fromCharCode(ch);
default:
return scanCharacterEscape();
return scanCharacterEscape(/*atomEscape*/ false);
}
}
else if (ch === text.charCodeAt(pos + 1)) {
Expand Down Expand Up @@ -3275,7 +3302,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
if (scanCharacterClassEscape()) {
return "";
}
return scanCharacterEscape();
return scanCharacterEscape(/*atomEscape*/ false);
}
}
else {
Expand Down Expand Up @@ -3407,7 +3434,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
}
});
forEach(decimalEscapes, escape => {
if (escape.value > numberOfCapturingGroups) {
// in AnnexB, if a DecimalEscape is greater than the number of capturing groups then it is treated as
// either a LegacyOctalEscapeSequence or IdentityEscape
if (!annexB && escape.value > numberOfCapturingGroups) {
if (numberOfCapturingGroups) {
error(Diagnostics.A_decimal_escape_must_refer_to_an_existent_capturing_group_There_are_only_0_capturing_groups_in_this_regular_expression, escape.pos, escape.end - escape.pos, numberOfCapturingGroups);
}
Expand Down
1 change: 1 addition & 0 deletions src/compiler/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6140,6 +6140,7 @@ export interface NodeLinks {
parameterInitializerContainsUndefined?: boolean; // True if this is a parameter declaration whose type annotation contains "undefined".
fakeScopeForSignatureDeclaration?: "params" | "typeParams"; // If present, this is a fake scope injected into an enclosing declaration chain.
assertionExpressionType?: Type; // Cached type of the expression of a type assertion
grammarChecked?: boolean; // Whether this expression has already been grammar-checked.
}

/** @internal */
Expand Down
2 changes: 1 addition & 1 deletion src/testRunner/unittests/incrementalParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ describe("unittests:: Incremental Parser", () => {
const oldText = ts.ScriptSnapshot.fromString(source);
const newTextAndChange = withInsert(oldText, semicolonIndex, "/");

compareTrees(oldText, newTextAndChange.text, newTextAndChange.textChangeRange, 4);
compareTrees(oldText, newTextAndChange.text, newTextAndChange.textChangeRange, 0);
rbuckton marked this conversation as resolved.
Show resolved Hide resolved
});

it("Regular expression 2", () => {
Expand Down
7 changes: 0 additions & 7 deletions tests/baselines/reference/parserRegularExpression1.errors.txt

This file was deleted.

Loading