Skip to content

Commit

Permalink
Make generate-identifier-regex capable of rewriting src/identifier.js
Browse files Browse the repository at this point in the history
Closes #693
  • Loading branch information
marijnh committed Sep 10, 2018
1 parent 22b22f3 commit 3442a80
Showing 1 changed file with 42 additions and 39 deletions.
81 changes: 42 additions & 39 deletions bin/generate-identifier-regex.js
Original file line number Diff line number Diff line change
@@ -1,57 +1,60 @@
'use strict';

// Which Unicode version should be used?
var pkg = require('../package.json');
var dependencies = Object.keys(pkg.devDependencies);
var unicodeVersion = dependencies.find((name) => /^unicode-\d/.test(name));
let pkg = require('../package.json')
let dependencies = Object.keys(pkg.devDependencies)
let unicodeVersion = dependencies.find((name) => /^unicode-\d/.test(name))

var start = require(unicodeVersion + '/Binary_Property/ID_Start/code-points.js')
.filter(function(ch) { return ch > 0x7f; });
var last = -1;
var cont = [0x200c, 0x200d].concat(require(unicodeVersion + '/Binary_Property/ID_Continue/code-points.js')
.filter(function(ch) { return ch > 0x7f && search(start, ch, last + 1) === -1; }));
let start = require(unicodeVersion + '/Binary_Property/ID_Start/code-points.js').filter(ch => ch > 0x7f)
let last = -1
let cont = [0x200c, 0x200d].concat(require(unicodeVersion + '/Binary_Property/ID_Continue/code-points.js')
.filter(ch => ch > 0x7f && search(start, ch, last + 1) === -1))

function search(arr, ch, starting) {
for (var i = starting; arr[i] <= ch && i < arr.length; last = i++)
if (arr[i] === ch)
return i;
return -1;
}

function pad(str, width) {
while (str.length < width) str = "0" + str;
return str;
for (let i = starting; arr[i] <= ch && i < arr.length; last = i++)
if (arr[i] === ch) return i
return -1
}

function esc(code) {
var hex = code.toString(16);
if (hex.length <= 2) return "\\x" + pad(hex, 2);
else return "\\u" + pad(hex, 4);
let hex = code.toString(16)
return hex.length <= 2 ? hex.padStart(2, "0") : "\\u" + hex.padStart(4, "0")
}

function generate(chars) {
var astral = [], re = "";
for (var i = 0, at = 0x10000; i < chars.length; i++) {
var from = chars[i], to = from;
while (i < chars.length - 1 && chars[i + 1] === to + 1) {
i++;
to++;
}
let astral = [], re = ""
for (let i = 0, at = 0x10000; i < chars.length; i++) {
let from = chars[i], to = from
while (i < chars.length - 1 && chars[i + 1] === to + 1) {i++; to++}
if (to <= 0xffff) {
if (from === to) re += esc(from);
else if (from + 1 === to) re += esc(from) + esc(to);
else re += esc(from) + "-" + esc(to);
if (from === to) re += esc(from)
else if (from + 1 === to) re += esc(from) + esc(to)
else re += esc(from) + "-" + esc(to)
} else {
astral.push(from - at, to - from);
at = to;
astral.push(from - at, to - from)
at = to
}
}
return {nonASCII: re, astral: astral};
return {nonASCII: re, astral: astral}
}

var startData = generate(start), contData = generate(cont);

console.log("let nonASCIIidentifierStartChars = \"" + startData.nonASCII + "\"");
console.log("let nonASCIIidentifierChars = \"" + contData.nonASCII + "\"");
console.log("const astralIdentifierStartCodes = " + JSON.stringify(startData.astral));
console.log("const astralIdentifierCodes = " + JSON.stringify(contData.astral));
let startData = generate(start), contData = generate(cont)

let code = [
` let nonASCIIidentifierStartChars = "${startData.nonASCII}"`,
` let nonASCIIidentifierChars = "${contData.nonASCII}"`,
` const astralIdentifierStartCodes = ${JSON.stringify(startData.astral)}`,
` const astralIdentifierCodes = ${JSON.stringify(contData.astral)}`
]

if (process.argv.length != 3) {
console.log(code.join("\n"))
} else {
let {readFile} = require('fs')
readFile(process.argv[2], "utf8", function(err, data) {
if (err) throw err
for (let line of code)
data = data.replace(new RegExp(/.* = /.exec(line)[0] + ".*"), line)
process.stdout.write(data)
})
}

0 comments on commit 3442a80

Please sign in to comment.