Skip to content

Commit 03efa71

Browse files
BridgeARtargos
authored andcommitted
readline: improve getStringWidth()
1. Simplify the getStringWidth function used by Intl builds by removing dead code (the options were unused) and by refactoring the logic. 2. Improve the getStringWidth unicode handling used by non-Intl builds. The getStringWidth function returned the wrong width for multiple inputs. It's now improved by supporting various zero width characters and more full width characters. PR-URL: #31112 Reviewed-By: MichaΓ«l Zasso <[email protected]> Reviewed-By: James M Snell <[email protected]>
1 parent cf28afe commit 03efa71

File tree

3 files changed

+49
-101
lines changed

3 files changed

+49
-101
lines changed

β€Žlib/internal/readline/utils.js

+28-46
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
'use strict';
22

33
const {
4-
Boolean,
5-
NumberIsInteger,
64
RegExp,
75
Symbol,
86
} = primordials;
@@ -21,7 +19,6 @@ const kEscape = '\x1b';
2119
const kSubstringSearch = Symbol('kSubstringSearch');
2220

2321
let getStringWidth;
24-
let isFullWidthCodePoint;
2522

2623
function CSI(strings, ...args) {
2724
let ret = `${kEscape}[`;
@@ -41,63 +38,37 @@ CSI.kClearScreenDown = CSI`0J`;
4138

4239
if (internalBinding('config').hasIntl) {
4340
const icu = internalBinding('icu');
44-
getStringWidth = function getStringWidth(str, options) {
45-
options = options || {};
46-
if (NumberIsInteger(str)) {
47-
// Provide information about the character with code point 'str'.
48-
return icu.getStringWidth(
49-
str,
50-
Boolean(options.ambiguousAsFullWidth),
51-
false
52-
);
53-
}
54-
str = stripVTControlCharacters(String(str));
41+
// icu.getStringWidth(string, ambiguousAsFullWidth, expandEmojiSequence)
42+
// Defaults: ambiguousAsFullWidth = false; expandEmojiSequence = true;
43+
getStringWidth = function getStringWidth(str) {
5544
let width = 0;
45+
str = stripVTControlCharacters(str);
5646
for (let i = 0; i < str.length; i++) {
5747
// Try to avoid calling into C++ by first handling the ASCII portion of
5848
// the string. If it is fully ASCII, we skip the C++ part.
5949
const code = str.charCodeAt(i);
60-
if (code < 127) {
61-
width += code >= 32;
62-
continue;
50+
if (code >= 127) {
51+
width += icu.getStringWidth(str.slice(i));
52+
break;
6353
}
64-
width += icu.getStringWidth(
65-
str.slice(i),
66-
Boolean(options.ambiguousAsFullWidth),
67-
Boolean(options.expandEmojiSequence)
68-
);
69-
break;
54+
width += code >= 32 ? 1 : 0;
7055
}
7156
return width;
7257
};
73-
isFullWidthCodePoint =
74-
function isFullWidthCodePoint(code, options) {
75-
if (typeof code !== 'number')
76-
return false;
77-
return icu.getStringWidth(code, options) === 2;
78-
};
7958
} else {
8059
/**
8160
* Returns the number of columns required to display the given string.
8261
*/
8362
getStringWidth = function getStringWidth(str) {
84-
if (NumberIsInteger(str))
85-
return isFullWidthCodePoint(str) ? 2 : 1;
86-
8763
let width = 0;
8864

89-
str = stripVTControlCharacters(String(str));
90-
91-
for (let i = 0; i < str.length; i++) {
92-
const code = str.codePointAt(i);
93-
94-
if (code >= kUTF16SurrogateThreshold) { // Surrogates.
95-
i++;
96-
}
65+
str = stripVTControlCharacters(str);
9766

67+
for (const char of str) {
68+
const code = char.codePointAt(0);
9869
if (isFullWidthCodePoint(code)) {
9970
width += 2;
100-
} else {
71+
} else if (!isZeroWidthCodePoint(code)) {
10172
width++;
10273
}
10374
}
@@ -109,10 +80,10 @@ if (internalBinding('config').hasIntl) {
10980
* Returns true if the character represented by a given
11081
* Unicode code point is full-width. Otherwise returns false.
11182
*/
112-
isFullWidthCodePoint = function isFullWidthCodePoint(code) {
113-
// Code points are derived from:
83+
const isFullWidthCodePoint = (code) => {
84+
// Code points are partially derived from:
11485
// http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt
115-
return NumberIsInteger(code) && code >= 0x1100 && (
86+
return code >= 0x1100 && (
11687
code <= 0x115f || // Hangul Jamo
11788
code === 0x2329 || // LEFT-POINTING ANGLE BRACKET
11889
code === 0x232a || // RIGHT-POINTING ANGLE BRACKET
@@ -139,10 +110,23 @@ if (internalBinding('config').hasIntl) {
139110
(code >= 0x1b000 && code <= 0x1b001) ||
140111
// Enclosed Ideographic Supplement
141112
(code >= 0x1f200 && code <= 0x1f251) ||
113+
// Miscellaneous Symbols and Pictographs 0x1f300 - 0x1f5ff
114+
// Emoticons 0x1f600 - 0x1f64f
115+
(code >= 0x1f300 && code <= 0x1f64f) ||
142116
// CJK Unified Ideographs Extension B .. Tertiary Ideographic Plane
143117
(code >= 0x20000 && code <= 0x3fffd)
144118
);
145119
};
120+
121+
const isZeroWidthCodePoint = (code) => {
122+
return code <= 0x1F || // C0 control codes
123+
(code > 0x7F && code <= 0x9F) || // C1 control codes
124+
(code >= 0x0300 && code <= 0x036F) || // Combining Diacritical Marks
125+
(code >= 0x200B && code <= 0x200F) || // Modifying Invisible Characters
126+
(code >= 0xFE00 && code <= 0xFE0F) || // Variation Selectors
127+
(code >= 0xFE20 && code <= 0xFE2F) || // Combining Half Marks
128+
(code >= 0xE0100 && code <= 0xE01EF); // Variation Selectors
129+
};
146130
}
147131

148132
/**
@@ -471,9 +455,7 @@ module.exports = {
471455
commonPrefix,
472456
emitKeys,
473457
getStringWidth,
474-
isFullWidthCodePoint,
475458
kSubstringSearch,
476-
kUTF16SurrogateThreshold,
477459
stripVTControlCharacters,
478460
CSI
479461
};

β€Žlib/readline.js

+5-11
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,7 @@ const {
5353
CSI,
5454
emitKeys,
5555
getStringWidth,
56-
isFullWidthCodePoint,
5756
kSubstringSearch,
58-
kUTF16SurrogateThreshold,
5957
stripVTControlCharacters
6058
} = require('internal/readline/utils');
6159

@@ -743,18 +741,14 @@ Interface.prototype._getDisplayPos = function(str) {
743741
const col = this.columns;
744742
let rows = 0;
745743
str = stripVTControlCharacters(str);
746-
for (let i = 0, len = str.length; i < len; i++) {
747-
const code = str.codePointAt(i);
748-
if (code >= kUTF16SurrogateThreshold) { // Surrogates.
749-
i++;
750-
}
751-
if (code === 0x0a) { // new line \n
752-
// rows must be incremented by 1 even if offset = 0 or col = +Infinity
744+
for (const char of str) {
745+
if (char === '\n') {
746+
// Rows must be incremented by 1 even if offset = 0 or col = +Infinity.
753747
rows += MathCeil(offset / col) || 1;
754748
offset = 0;
755749
continue;
756750
}
757-
const width = getStringWidth(code);
751+
const width = getStringWidth(char);
758752
if (width === 0 || width === 1) {
759753
offset += width;
760754
} else { // width === 2
@@ -781,7 +775,7 @@ Interface.prototype.getCursorPos = function() {
781775
// move the cursor to the beginning of the next line.
782776
if (cols + 1 === columns &&
783777
this.cursor < this.line.length &&
784-
isFullWidthCodePoint(this.line.codePointAt(this.cursor))) {
778+
getStringWidth(this.line[this.cursor]) > 1) {
785779
rows++;
786780
cols = 0;
787781
}

β€Žtest/parallel/test-readline-interface.js

+16-44
Original file line numberDiff line numberDiff line change
@@ -716,11 +716,7 @@ function isWarned(emitter) {
716716
fi.emit('keypress', '.', { name: 'right' });
717717
cursorPos = rli.getCursorPos();
718718
assert.strictEqual(cursorPos.rows, 0);
719-
if (common.hasIntl) {
720-
assert.strictEqual(cursorPos.cols, 2);
721-
} else {
722-
assert.strictEqual(cursorPos.cols, 1);
723-
}
719+
assert.strictEqual(cursorPos.cols, 2);
724720

725721
rli.on('line', common.mustCall((line) => {
726722
assert.strictEqual(line, 'πŸ’»');
@@ -749,14 +745,7 @@ function isWarned(emitter) {
749745
fi.emit('data', 'πŸ•');
750746
cursorPos = rli.getCursorPos();
751747
assert.strictEqual(cursorPos.rows, 0);
752-
753-
if (common.hasIntl) {
754-
assert.strictEqual(cursorPos.cols, 2);
755-
} else {
756-
assert.strictEqual(cursorPos.cols, 1);
757-
// Fix cursor position without internationalization
758-
fi.emit('keypress', '.', { name: 'left' });
759-
}
748+
assert.strictEqual(cursorPos.cols, 2);
760749

761750
rli.on('line', common.mustCall((line) => {
762751
assert.strictEqual(line, 'πŸ•πŸ’»');
@@ -780,22 +769,12 @@ function isWarned(emitter) {
780769
fi.emit('keypress', '.', { name: 'right' });
781770
let cursorPos = rli.getCursorPos();
782771
assert.strictEqual(cursorPos.rows, 0);
783-
if (common.hasIntl) {
784-
assert.strictEqual(cursorPos.cols, 2);
785-
} else {
786-
assert.strictEqual(cursorPos.cols, 1);
787-
// Fix cursor position without internationalization
788-
fi.emit('keypress', '.', { name: 'right' });
789-
}
772+
assert.strictEqual(cursorPos.cols, 2);
790773

791774
fi.emit('data', 'πŸ•');
792775
cursorPos = rli.getCursorPos();
793776
assert.strictEqual(cursorPos.rows, 0);
794-
if (common.hasIntl) {
795-
assert.strictEqual(cursorPos.cols, 4);
796-
} else {
797-
assert.strictEqual(cursorPos.cols, 2);
798-
}
777+
assert.strictEqual(cursorPos.cols, 4);
799778

800779
rli.on('line', common.mustCall((line) => {
801780
assert.strictEqual(line, 'πŸ’»πŸ•');
@@ -957,11 +936,7 @@ function isWarned(emitter) {
957936
fi.emit('data', 'πŸ’»');
958937
let cursorPos = rli.getCursorPos();
959938
assert.strictEqual(cursorPos.rows, 0);
960-
if (common.hasIntl) {
961-
assert.strictEqual(cursorPos.cols, 2);
962-
} else {
963-
assert.strictEqual(cursorPos.cols, 1);
964-
}
939+
assert.strictEqual(cursorPos.cols, 2);
965940
// Delete left character
966941
fi.emit('keypress', '.', { ctrl: true, name: 'h' });
967942
cursorPos = rli.getCursorPos();
@@ -1144,27 +1119,24 @@ function isWarned(emitter) {
11441119
}
11451120
}
11461121

1147-
// isFullWidthCodePoint() should return false for non-numeric values
1148-
[true, false, null, undefined, {}, [], 'あ'].forEach((v) => {
1149-
assert.strictEqual(internalReadline.isFullWidthCodePoint('あ'), false);
1150-
});
1151-
11521122
// Wide characters should be treated as two columns.
1153-
assert.strictEqual(internalReadline.isFullWidthCodePoint('a'.charCodeAt(0)),
1154-
false);
1155-
assert.strictEqual(internalReadline.isFullWidthCodePoint('あ'.charCodeAt(0)),
1156-
true);
1157-
assert.strictEqual(internalReadline.isFullWidthCodePoint('θ°’'.charCodeAt(0)),
1158-
true);
1159-
assert.strictEqual(internalReadline.isFullWidthCodePoint('κ³ '.charCodeAt(0)),
1160-
true);
1161-
assert.strictEqual(internalReadline.isFullWidthCodePoint(0x1f251), true);
1123+
assert.strictEqual(internalReadline.getStringWidth('a'), 1);
1124+
assert.strictEqual(internalReadline.getStringWidth('あ'), 2);
1125+
assert.strictEqual(internalReadline.getStringWidth('θ°’'), 2);
1126+
assert.strictEqual(internalReadline.getStringWidth('κ³ '), 2);
1127+
assert.strictEqual(
1128+
internalReadline.getStringWidth(String.fromCodePoint(0x1f251)), 2);
11621129
assert.strictEqual(internalReadline.getStringWidth('abcde'), 5);
11631130
assert.strictEqual(internalReadline.getStringWidth('叀池や'), 6);
11641131
assert.strictEqual(internalReadline.getStringWidth('γƒŽγƒΌγƒ‰.js'), 9);
11651132
assert.strictEqual(internalReadline.getStringWidth('δ½ ε₯½'), 4);
11661133
assert.strictEqual(internalReadline.getStringWidth('μ•ˆλ…•ν•˜μ„Έμš”'), 10);
11671134
assert.strictEqual(internalReadline.getStringWidth('A\ud83c\ude00BC'), 5);
1135+
assert.strictEqual(internalReadline.getStringWidth('πŸ‘¨β€πŸ‘©β€πŸ‘¦β€πŸ‘¦'), 8);
1136+
assert.strictEqual(internalReadline.getStringWidth('πŸ•π·γ‚πŸ’»πŸ˜€'), 9);
1137+
// TODO(BridgeAR): This should have a width of 4.
1138+
assert.strictEqual(internalReadline.getStringWidth('⓬β“ͺ'), 2);
1139+
assert.strictEqual(internalReadline.getStringWidth('\u0301\u200D\u200E'), 0);
11681140

11691141
// Check if vt control chars are stripped
11701142
assert.strictEqual(

0 commit comments

Comments
Β (0)