Skip to content

Scan bigger/fewer jsdoc tokens #53081

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Mar 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 69 additions & 56 deletions src/compiler/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,7 @@ import {
tracing,
TransformFlags,
trimString,
trimStringEnd,
TryStatement,
TupleTypeNode,
TypeAliasDeclaration,
Expand Down Expand Up @@ -2165,6 +2166,10 @@ namespace Parser {
return currentToken = scanner.scanJsDocToken();
}

function nextJSDocCommentTextToken(inBackticks: boolean): JSDocSyntaxKind | SyntaxKind.JSDocCommentTextToken {
return currentToken = scanner.scanJSDocCommentTextToken(inBackticks);
}

function reScanGreaterToken(): SyntaxKind {
return currentToken = scanner.reScanGreaterToken();
}
Expand Down Expand Up @@ -8602,19 +8607,14 @@ namespace Parser {
loop: while (true) {
switch (token()) {
case SyntaxKind.AtToken:
if (state === JSDocState.BeginningOfLine || state === JSDocState.SawAsterisk) {
removeTrailingWhitespace(comments);
if (!commentsPos) commentsPos = getNodePos();
addTag(parseTag(indent));
// NOTE: According to usejsdoc.org, a tag goes to end of line, except the last tag.
// Real-world comments may break this rule, so "BeginningOfLine" will not be a real line beginning
// for malformed examples like `/** @param {string} x @returns {number} the length */`
state = JSDocState.BeginningOfLine;
margin = undefined;
}
else {
pushComment(scanner.getTokenText());
}
removeTrailingWhitespace(comments);
if (!commentsPos) commentsPos = getNodePos();
addTag(parseTag(indent));
// NOTE: According to usejsdoc.org, a tag goes to end of line, except the last tag.
// Real-world comments may break this rule, so "BeginningOfLine" will not be a real line beginning
// for malformed examples like `/** @param {string} x @returns {number} the length */`
state = JSDocState.BeginningOfLine;
margin = undefined;
break;
case SyntaxKind.NewLineTrivia:
comments.push(scanner.getTokenText());
Expand All @@ -8623,30 +8623,33 @@ namespace Parser {
break;
case SyntaxKind.AsteriskToken:
const asterisk = scanner.getTokenText();
if (state === JSDocState.SawAsterisk || state === JSDocState.SavingComments) {
if (state === JSDocState.SawAsterisk) {
// If we've already seen an asterisk, then we can no longer parse a tag on this line
state = JSDocState.SavingComments;
pushComment(asterisk);
}
else {
Debug.assert(state === JSDocState.BeginningOfLine);
// Ignore the first asterisk on a line
state = JSDocState.SawAsterisk;
indent += asterisk.length;
}
break;
case SyntaxKind.WhitespaceTrivia:
Debug.assert(state !== JSDocState.SavingComments, "whitespace shouldn't come from the scanner while saving top-level comment text");
// only collect whitespace if we're already saving comments or have just crossed the comment indent margin
const whitespace = scanner.getTokenText();
if (state === JSDocState.SavingComments) {
comments.push(whitespace);
}
else if (margin !== undefined && indent + whitespace.length > margin) {
if (margin !== undefined && indent + whitespace.length > margin) {
comments.push(whitespace.slice(margin - indent));
}
indent += whitespace.length;
break;
case SyntaxKind.EndOfFileToken:
break loop;
case SyntaxKind.JSDocCommentTextToken:
state = JSDocState.SavingComments;
pushComment(scanner.getTokenValue());
break;
case SyntaxKind.OpenBraceToken:
state = JSDocState.SavingComments;
const commentEnd = scanner.getTokenFullStart();
Expand All @@ -8671,15 +8674,20 @@ namespace Parser {
pushComment(scanner.getTokenText());
break;
}
nextTokenJSDoc();
if (state === JSDocState.SavingComments) {
nextJSDocCommentTextToken(/*inBackticks*/ false);
}
else {
nextTokenJSDoc();
}
}
removeTrailingWhitespace(comments);
if (parts.length && comments.length) {
parts.push(finishNode(factory.createJSDocText(comments.join("")), linkEnd ?? start, commentsPos));
const trimmedComments = trimStringEnd(comments.join(""));
if (parts.length && trimmedComments.length) {
parts.push(finishNode(factory.createJSDocText(trimmedComments), linkEnd ?? start, commentsPos));
}
if (parts.length && tags) Debug.assertIsDefined(commentsPos, "having parsed tags implies that the end of the comment span should be set");
const tagsArray = tags && createNodeArray(tags, tagsPos, tagsEnd);
return finishNode(factory.createJSDocComment(parts.length ? createNodeArray(parts, start, commentsPos) : comments.length ? comments.join("") : undefined, tagsArray), start, end);
return finishNode(factory.createJSDocComment(parts.length ? createNodeArray(parts, start, commentsPos) : trimmedComments.length ? trimmedComments : undefined, tagsArray), start, end);
});

function removeLeadingNewlines(comments: string[]) {
Expand All @@ -8689,8 +8697,18 @@ namespace Parser {
}

function removeTrailingWhitespace(comments: string[]) {
while (comments.length && comments[comments.length - 1].trim() === "") {
comments.pop();
while (comments.length) {
const trimmed = trimStringEnd(comments[comments.length - 1]);
if (trimmed === "") {
comments.pop();
}
else if (trimmed.length < comments[comments.length - 1].length) {
comments[comments.length - 1] = trimmed;
break;
}
else {
break;
}
}
}

Expand Down Expand Up @@ -8846,7 +8864,6 @@ namespace Parser {
const parts: JSDocComment[] = [];
let linkEnd;
let state = JSDocState.BeginningOfLine;
let previousWhitespace = true;
let margin: number | undefined;
function pushComment(text: string) {
if (!margin) {
Expand All @@ -8862,7 +8879,7 @@ namespace Parser {
}
state = JSDocState.SawAsterisk;
}
let tok = token() as JSDocSyntaxKind;
let tok = token() as JSDocSyntaxKind | SyntaxKind.JSDocCommentTextToken;
loop: while (true) {
switch (tok) {
case SyntaxKind.NewLineTrivia:
Expand All @@ -8872,29 +8889,20 @@ namespace Parser {
indent = 0;
break;
case SyntaxKind.AtToken:
if (state === JSDocState.SavingBackticks
|| state === JSDocState.SavingComments && (!previousWhitespace || lookAhead(isNextJSDocTokenWhitespace))) {
// @ doesn't start a new tag inside ``, and inside a comment, only after whitespace or not before whitespace
comments.push(scanner.getTokenText());
break;
}
scanner.resetTokenState(scanner.getTokenEnd() - 1);
// falls through
break loop;
case SyntaxKind.EndOfFileToken:
// Done
break loop;
case SyntaxKind.WhitespaceTrivia:
if (state === JSDocState.SavingComments || state === JSDocState.SavingBackticks) {
pushComment(scanner.getTokenText());
}
else {
const whitespace = scanner.getTokenText();
// if the whitespace crosses the margin, take only the whitespace that passes the margin
if (margin !== undefined && indent + whitespace.length > margin) {
comments.push(whitespace.slice(margin - indent));
}
indent += whitespace.length;
Debug.assert(state !== JSDocState.SavingComments && state !== JSDocState.SavingBackticks, "whitespace shouldn't come from the scanner while saving comment text");
const whitespace = scanner.getTokenText();
Copy link
Member

@DanielRosenwasser DanielRosenwasser Mar 6, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This slices whitespace, then you slice out of the whitespace. Swap this to grab the beginning/end of the whitespace, calculate the length, and slice right out of the original sourceText only if necessary.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good idea. I noted it and will try it in a separate PR.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done: #53121

// if the whitespace crosses the margin, take only the whitespace that passes the margin
if (margin !== undefined && indent + whitespace.length > margin) {
comments.push(whitespace.slice(margin - indent));
state = JSDocState.SavingComments;
}
indent += whitespace.length;
break;
case SyntaxKind.OpenBraceToken:
state = JSDocState.SavingComments;
Expand All @@ -8920,6 +8928,12 @@ namespace Parser {
}
pushComment(scanner.getTokenText());
break;
case SyntaxKind.JSDocCommentTextToken:
if (state !== JSDocState.SavingBackticks) {
state = JSDocState.SavingComments; // leading identifiers start recording as well
}
pushComment(scanner.getTokenValue());
break;
case SyntaxKind.AsteriskToken:
if (state === JSDocState.BeginningOfLine) {
// leading asterisks start recording on the *next* (non-whitespace) token
Expand All @@ -8936,28 +8950,27 @@ namespace Parser {
pushComment(scanner.getTokenText());
break;
}
previousWhitespace = token() === SyntaxKind.WhitespaceTrivia;
tok = nextTokenJSDoc();
if (state === JSDocState.SavingComments || state === JSDocState.SavingBackticks) {
tok = nextJSDocCommentTextToken(state === JSDocState.SavingBackticks);
}
else {
tok = nextTokenJSDoc();
}
}

removeLeadingNewlines(comments);
removeTrailingWhitespace(comments);
const trimmedComments = trimStringEnd(comments.join(""));
if (parts.length) {
if (comments.length) {
parts.push(finishNode(factory.createJSDocText(comments.join("")), linkEnd ?? commentsPos));
if (trimmedComments.length) {
parts.push(finishNode(factory.createJSDocText(trimmedComments), linkEnd ?? commentsPos));
}
return createNodeArray(parts, commentsPos, scanner.getTokenEnd());
}
else if (comments.length) {
return comments.join("");
else if (trimmedComments.length) {
return trimmedComments;
}
}

function isNextJSDocTokenWhitespace() {
const next = nextTokenJSDoc();
return next === SyntaxKind.WhitespaceTrivia || next === SyntaxKind.NewLineTrivia;
}

function parseJSDocLink(start: number) {
const linkType = tryParse(parseJSDocLinkPrefix);
if (!linkType) {
Expand Down
31 changes: 31 additions & 0 deletions src/compiler/scanner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ export interface Scanner {
reScanInvalidIdentifier(): SyntaxKind;
scanJsxToken(): JsxTokenSyntaxKind;
scanJsDocToken(): JSDocSyntaxKind;
/** @internal */
scanJSDocCommentTextToken(inBackticks: boolean): JSDocSyntaxKind | SyntaxKind.JSDocCommentTextToken;
scan(): SyntaxKind;

getText(): string;
Expand Down Expand Up @@ -1031,6 +1033,7 @@ export function createScanner(languageVersion: ScriptTarget,
reScanInvalidIdentifier,
scanJsxToken,
scanJsDocToken,
scanJSDocCommentTextToken,
scan,
getText,
clearCommentDirectives,
Expand Down Expand Up @@ -2467,6 +2470,34 @@ export function createScanner(languageVersion: ScriptTarget,
return scanJsxAttributeValue();
}

function scanJSDocCommentTextToken(inBackticks: boolean): JSDocSyntaxKind | SyntaxKind.JSDocCommentTextToken {
fullStartPos = tokenStart = pos;
tokenFlags = TokenFlags.None;
if (pos >= end) {
return token = SyntaxKind.EndOfFileToken;
}
for (let ch = text.charCodeAt(pos);
pos < end && (!isLineBreak(ch) && ch !== CharacterCodes.backtick);
ch = codePointAt(text, ++pos)) {
if (!inBackticks) {
if (ch === CharacterCodes.openBrace) {
break;
}
else if (ch === CharacterCodes.at
&& pos - 1 >= 0 && isWhiteSpaceSingleLine(text.charCodeAt(pos - 1))
&& !(pos + 1 < end && isWhiteSpaceLike(text.charCodeAt(pos + 1)))) {
// @ doesn't start a new tag inside ``, and elsewhere, only after whitespace and before non-whitespace
break;
}
}
}
if (pos === tokenStart) {
return scanJsDocToken();
}
tokenValue = text.substring(tokenStart, pos);
return token = SyntaxKind.JSDocCommentTextToken;
}

function scanJsDocToken(): JSDocSyntaxKind {
fullStartPos = tokenStart = pos;
tokenFlags = TokenFlags.None;
Expand Down
5 changes: 5 additions & 0 deletions src/compiler/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,11 @@ export const enum SyntaxKind {
// Identifiers and PrivateIdentifiers
Identifier,
PrivateIdentifier,
/**
* Only the special JSDoc comment text scanner produces JSDocCommentTextTokes. One of these tokens spans all text after a tag comment's start and before the next @
* @internal
*/
JSDocCommentTextToken,
// Reserved words
BreakKeyword,
CaseKeyword,
Expand Down
1 change: 1 addition & 0 deletions src/testRunner/unittests/jsDocParsing.ts
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,7 @@ oh.no
* Some\n\n * text\r\n * with newlines.
*/`);
parsesCorrectly("Chained tags, no leading whitespace", `/**@a @b @c@d*/`);
parsesCorrectly("Single trailing whitespace", `/** trailing whitespace */`);
parsesCorrectly("Initial star is not a tag", `/***@a*/`);
parsesCorrectly("Initial star space is not a tag", `/*** @a*/`);
parsesCorrectly("Initial email address is not a tag", `/**[email protected]*/`);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,27 @@
"flags": "JSDoc",
"modifierFlagsCache": 0,
"transformFlags": 0,
"comment": "* @a"
"comment": "*",
"tags": {
"0": {
"kind": "JSDocTag",
"pos": 5,
"end": 7,
"modifierFlagsCache": 0,
"transformFlags": 0,
"tagName": {
"kind": "Identifier",
"pos": 6,
"end": 7,
"modifierFlagsCache": 0,
"transformFlags": 0,
"escapedText": "a"
}
},
"length": 1,
"pos": 5,
"end": 7,
"hasTrailingComma": false,
"transformFlags": 0
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"kind": "JSDoc",
"pos": 0,
"end": 26,
"flags": "JSDoc",
"modifierFlagsCache": 0,
"transformFlags": 0,
"comment": "trailing whitespace"
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,41 @@
"flags": "JSDoc",
"modifierFlagsCache": 0,
"transformFlags": 0,
"comment": "* @type {number}"
"comment": "*",
"tags": {
"0": {
"kind": "JSDocTypeTag",
"pos": 6,
"end": 21,
"modifierFlagsCache": 0,
"transformFlags": 0,
"tagName": {
"kind": "Identifier",
"pos": 7,
"end": 11,
"modifierFlagsCache": 0,
"transformFlags": 0,
"escapedText": "type"
},
"typeExpression": {
"kind": "JSDocTypeExpression",
"pos": 12,
"end": 20,
"modifierFlagsCache": 0,
"transformFlags": 0,
"type": {
"kind": "NumberKeyword",
"pos": 13,
"end": 19,
"modifierFlagsCache": 0,
"transformFlags": 1
}
}
},
"length": 1,
"pos": 6,
"end": 21,
"hasTrailingComma": false,
"transformFlags": 0
}
}
Loading