node_modules
.bin
@ampproject
@babel
@discoveryjs
@gar
@istanbuljs
@jridgewell
@leichtgewicht
@npmcli
@riotjs
@tootallnate
@types
@ungap
@webassemblyjs
@webpack-cli
@xtuc
abab
abbrev
accepts
acorn
acorn-globals
acorn-import-assertions
acorn-walk
agent-base
agentkeepalive
aggregate-error
ajv
ajv-formats
ajv-keywords
ansi-colors
ansi-html-community
ansi-regex
ansi-styles
anymatch
append-transform
aproba
archy
are-we-there-yet
argparse
array-flatten
arrify
assertion-error
ast-types
async-foreach
asynckit
babel-plugin-dynamic-import-node
babel-plugin-polyfill-corejs2
babel-plugin-polyfill-corejs3
babel-plugin-polyfill-regenerator
balanced-match
batch
bianco.attr
bianco.dom-to-array
bianco.events
bianco.query
big.js
binary-extensions
body-parser
bonjour-service
boolbase
brace-expansion
braces
browser-process-hrtime
browser-stdout
browserslist
buffer-from
bytes
cacache
caching-transform
call-bind
camel-case
camelcase
camelcase-keys
caniuse-lite
chai
chalk
check-error
chokidar
chownr
chrome-trace-event
clean-css
clean-stack
cliui
clone-deep
color-convert
color-name
color-support
colorette
combined-stream
commander
commondir
compressible
compression
concat-map
connect-history-api-fallback
console-control-strings
content-disposition
content-type
convert-source-map
cookie
cookie-signature
core-js-compat
core-util-is
cross-spawn
css-select
css-what
cssesc
cssom
cssstyle
cumpa
curri
data-urls
debug
decamelize
decamelize-keys
decimal.js
deep-eql
deep-is
default-gateway
default-require-extensions
define-lazy-prop
define-properties
delayed-stream
delegates
depd
destroy
detect-node
diff
dns-equal
dns-packet
dom-converter
dom-nodes
dom-serializer
domelementtype
domexception
domhandler
domutils
dot-case
ee-first
electron-to-chromium
emoji-regex
emojis-list
encodeurl
encoding
enhanced-resolve
entities
env-paths
envinfo
err-code
erre
error-ex
es-module-lexer
es6-error
escalade
escape-html
escape-string-regexp
escodegen
eslint-scope
esm
esprima
esrecurse
estraverse
esutils
etag
eventemitter3
events
execa
express
fast-deep-equal
fast-json-stable-stringify
fast-levenshtein
fastest-levenshtein
faye-websocket
fill-range
finalhandler
find-up
flat
follow-redirects
foreground-child
form-data
forwarded
fresh
fromentries
fs-minipass
fs-monkey
fs.realpath
function-bind
gauge
gaze
gensync
get-caller-file
get-func-name
get-intrinsic
get-package-type
get-stdin
get-stream
glob
glob-parent
glob-to-regexp
globals
globule
graceful-fs
growl
handle-thing
hard-rejection
has
has-flag
has-property-descriptors
has-symbols
has-unicode
hasha
he
hosted-git-info
hpack.js
html-encoding-sniffer
lib
html-encoding-sniffer.js
LICENSE.txt
README.md
package.json
html-entities
html-escaper
html-minifier-terser
html-webpack-plugin
htmlparser2
http-cache-semantics
http-deceiver
http-errors
http-parser-js
http-proxy
http-proxy-agent
http-proxy-middleware
https-proxy-agent
human-signals
humanize-ms
iconv-lite
imurmurhash
indent-string
infer-owner
inflight
inherits
interpret
ip
ipaddr.js
is-arrayish
is-binary-path
is-core-module
is-docker
is-extglob
is-fullwidth-code-point
is-glob
is-lambda
is-number
is-plain-obj
is-plain-object
is-potential-custom-element-name
is-stream
is-typedarray
is-windows
is-wsl
isarray
isexe
isobject
istanbul-lib-coverage
istanbul-lib-hook
istanbul-lib-instrument
istanbul-lib-processinfo
istanbul-lib-report
istanbul-lib-source-maps
istanbul-reports
jest-worker
js-base64
js-tokens
js-yaml
jsdom
jsdom-global
jsesc
json-parse-even-better-errors
json-schema-traverse
json5
kind-of
levn
lines-and-columns
loader-runner
locate-path
lodash
lodash.debounce
lodash.flattendeep
log-symbols
loupe
lower-case
lru-cache
make-fetch-happen
map-obj
media-typer
memfs
meow
merge-descriptors
merge-stream
methods
micromatch
mime
mime-db
mime-types
mimic-fn
min-indent
minimalistic-assert
minimatch
minimist-options
minipass
minipass-collect
minipass-fetch
minipass-flush
minipass-pipeline
minipass-sized
minizlib
mkdirp
mocha
ms
multicast-dns
nan
nanoid
negotiator
neo-async
no-case
node-forge
node-gyp
node-preload
node-releases
node-sass
nopt
normalize-package-data
normalize-path
npm-run-path
npmlog
nth-check
nwsapi
nyc
object-inspect
object-keys
object.assign
obuf
on-finished
on-headers
once
onetime
open
optionator
p-limit
p-locate
p-map
p-retry
p-try
package-hash
param-case
parse-json
parse5
parseurl
pascal-case
path-exists
path-is-absolute
path-key
path-parse
path-to-regexp
pathval
picocolors
picomatch
pirates
prelude-ls
pretty-error
process-nextick-args
process-on-spawn
promise-inflight
promise-retry
proxy-addr
psl
punycode
qs
quick-lru
randombytes
range-parser
raw-body
rawth
read-pkg
read-pkg-up
readable-stream
readdirp
recast
rechoir
redent
regenerate
regenerate-unicode-properties
regenerator-runtime
regenerator-transform
regexpu-core
regjsgen
regjsparser
relateurl
release-zalgo
renderkid
require-directory
require-from-string
require-main-filename
requires-port
resolve
retry
rimraf
riot
ruit
safe-buffer
safer-buffer
sass-graph
saxes
schema-utils
scss-tokenizer
select-hose
selfsigned
semver
send
serialize-javascript
serve-index
serve-static
set-blocking
setprototypeof
shallow-clone
shebang-command
shebang-regex
side-channel
signal-exit
smart-buffer
sockjs
socks
socks-proxy-agent
source-map
source-map-support
spawn-wrap
spdx-correct
spdx-exceptions
spdx-expression-parse
spdx-license-ids
spdy
spdy-transport
sprintf-js
ssri
statuses
stdout-stream
string-width
string_decoder
strip-ansi
strip-final-newline
strip-indent
strip-json-comments
supports-color
symbol-tree
tapable
tar
terser
terser-webpack-plugin
test-exclude
thunky
to-fast-properties
to-regex-range
toidentifier
tough-cookie
tr46
trim-newlines
true-case-path
tslib
type-check
type-detect
type-fest
type-is
typedarray-to-buffer
unicode-canonical-property-names-ecmascript
unicode-match-property-ecmascript
unicode-match-property-value-ecmascript
unicode-property-aliases-ecmascript
unique-filename
unique-slug
universalify
unpipe
update-browserslist-db
uri-js
util-deprecate
utila
utils-merge
uuid
validate-npm-package-license
vary
w3c-hr-time
w3c-xmlserializer
watchpack
wbuf
webidl-conversions
webpack
webpack-cli
webpack-dev-middleware
webpack-dev-server
webpack-merge
webpack-sources
websocket-driver
websocket-extensions
whatwg-encoding
whatwg-mimetype
whatwg-url
which
which-module
wide-align
wildcard
word-wrap
workerpool
wrap-ansi
wrappy
write-file-atomic
ws
xml-name-validator
xmlchars
y18n
yallist
yargs
yargs-parser
yargs-unparser
yocto-queue
.package-lock.json
src
LICENSE
package-lock.json
package.json
readme.md
webpack.config.js
296 lines
7.6 KiB
JavaScript
296 lines
7.6 KiB
JavaScript
![]() |
"use strict";
|
||
|
const whatwgEncoding = require("whatwg-encoding");
|
||
|
|
||
|
// https://html.spec.whatwg.org/#encoding-sniffing-algorithm
|
||
|
module.exports = (uint8Array, { transportLayerEncodingLabel, defaultEncoding = "windows-1252" } = {}) => {
|
||
|
let encoding = whatwgEncoding.getBOMEncoding(uint8Array);
|
||
|
|
||
|
if (encoding === null && transportLayerEncodingLabel !== undefined) {
|
||
|
encoding = whatwgEncoding.labelToName(transportLayerEncodingLabel);
|
||
|
}
|
||
|
|
||
|
if (encoding === null) {
|
||
|
encoding = prescanMetaCharset(uint8Array);
|
||
|
}
|
||
|
|
||
|
if (encoding === null) {
|
||
|
encoding = defaultEncoding;
|
||
|
}
|
||
|
|
||
|
return encoding;
|
||
|
};
|
||
|
|
||
|
// https://html.spec.whatwg.org/multipage/syntax.html#prescan-a-byte-stream-to-determine-its-encoding
|
||
|
function prescanMetaCharset(uint8Array) {
|
||
|
const l = Math.min(uint8Array.byteLength, 1024);
|
||
|
for (let i = 0; i < l; i++) {
|
||
|
let c = uint8Array[i];
|
||
|
if (c === 0x3C) {
|
||
|
// "<"
|
||
|
const c1 = uint8Array[i + 1];
|
||
|
const c2 = uint8Array[i + 2];
|
||
|
const c3 = uint8Array[i + 3];
|
||
|
const c4 = uint8Array[i + 4];
|
||
|
const c5 = uint8Array[i + 5];
|
||
|
// !-- (comment start)
|
||
|
if (c1 === 0x21 && c2 === 0x2D && c3 === 0x2D) {
|
||
|
i += 4;
|
||
|
for (; i < l; i++) {
|
||
|
c = uint8Array[i];
|
||
|
const cMinus1 = uint8Array[i - 1];
|
||
|
const cMinus2 = uint8Array[i - 2];
|
||
|
// --> (comment end)
|
||
|
if (c === 0x3E && cMinus1 === 0x2D && cMinus2 === 0x2D) {
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
} else if ((c1 === 0x4D || c1 === 0x6D) &&
|
||
|
(c2 === 0x45 || c2 === 0x65) &&
|
||
|
(c3 === 0x54 || c3 === 0x74) &&
|
||
|
(c4 === 0x41 || c4 === 0x61) &&
|
||
|
(isSpaceCharacter(c5) || c5 === 0x2F)) {
|
||
|
// "meta" + space or /
|
||
|
i += 6;
|
||
|
const attributeList = new Set();
|
||
|
let gotPragma = false;
|
||
|
let needPragma = null;
|
||
|
let charset = null;
|
||
|
|
||
|
let attrRes;
|
||
|
do {
|
||
|
attrRes = getAttribute(uint8Array, i, l);
|
||
|
if (attrRes.attr && !attributeList.has(attrRes.attr.name)) {
|
||
|
attributeList.add(attrRes.attr.name);
|
||
|
if (attrRes.attr.name === "http-equiv") {
|
||
|
gotPragma = attrRes.attr.value === "content-type";
|
||
|
} else if (attrRes.attr.name === "content" && !charset) {
|
||
|
charset = extractCharacterEncodingFromMeta(attrRes.attr.value);
|
||
|
if (charset !== null) {
|
||
|
needPragma = true;
|
||
|
}
|
||
|
} else if (attrRes.attr.name === "charset") {
|
||
|
charset = whatwgEncoding.labelToName(attrRes.attr.value);
|
||
|
needPragma = false;
|
||
|
}
|
||
|
}
|
||
|
i = attrRes.i;
|
||
|
} while (attrRes.attr);
|
||
|
|
||
|
if (needPragma === null) {
|
||
|
continue;
|
||
|
}
|
||
|
if (needPragma === true && gotPragma === false) {
|
||
|
continue;
|
||
|
}
|
||
|
if (charset === null) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
if (charset === "UTF-16LE" || charset === "UTF-16BE") {
|
||
|
charset = "UTF-8";
|
||
|
}
|
||
|
if (charset === "x-user-defined") {
|
||
|
charset = "windows-1252";
|
||
|
}
|
||
|
|
||
|
return charset;
|
||
|
} else if ((c1 >= 0x41 && c1 <= 0x5A) || (c1 >= 0x61 && c1 <= 0x7A)) {
|
||
|
// a-z or A-Z
|
||
|
for (i += 2; i < l; i++) {
|
||
|
c = uint8Array[i];
|
||
|
// space or >
|
||
|
if (isSpaceCharacter(c) || c === 0x3E) {
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
let attrRes;
|
||
|
do {
|
||
|
attrRes = getAttribute(uint8Array, i, l);
|
||
|
i = attrRes.i;
|
||
|
} while (attrRes.attr);
|
||
|
} else if (c1 === 0x21 || c1 === 0x2F || c1 === 0x3F) {
|
||
|
// ! or / or ?
|
||
|
for (i += 2; i < l; i++) {
|
||
|
c = uint8Array[i];
|
||
|
// >
|
||
|
if (c === 0x3E) {
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return null;
|
||
|
}
|
||
|
|
||
|
// https://html.spec.whatwg.org/multipage/syntax.html#concept-get-attributes-when-sniffing
|
||
|
function getAttribute(uint8Array, i, l) {
|
||
|
for (; i < l; i++) {
|
||
|
let c = uint8Array[i];
|
||
|
// space or /
|
||
|
if (isSpaceCharacter(c) || c === 0x2F) {
|
||
|
continue;
|
||
|
}
|
||
|
// ">"
|
||
|
if (c === 0x3E) {
|
||
|
break;
|
||
|
}
|
||
|
let name = "";
|
||
|
let value = "";
|
||
|
nameLoop:for (; i < l; i++) {
|
||
|
c = uint8Array[i];
|
||
|
// "="
|
||
|
if (c === 0x3D && name !== "") {
|
||
|
i++;
|
||
|
break;
|
||
|
}
|
||
|
// space
|
||
|
if (isSpaceCharacter(c)) {
|
||
|
for (i++; i < l; i++) {
|
||
|
c = uint8Array[i];
|
||
|
// space
|
||
|
if (isSpaceCharacter(c)) {
|
||
|
continue;
|
||
|
}
|
||
|
// not "="
|
||
|
if (c !== 0x3D) {
|
||
|
return { attr: { name, value }, i };
|
||
|
}
|
||
|
|
||
|
i++;
|
||
|
break nameLoop;
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
// / or >
|
||
|
if (c === 0x2F || c === 0x3E) {
|
||
|
return { attr: { name, value }, i };
|
||
|
}
|
||
|
// A-Z
|
||
|
if (c >= 0x41 && c <= 0x5A) {
|
||
|
name += String.fromCharCode(c + 0x20); // lowercase
|
||
|
} else {
|
||
|
name += String.fromCharCode(c);
|
||
|
}
|
||
|
}
|
||
|
c = uint8Array[i];
|
||
|
// space
|
||
|
if (isSpaceCharacter(c)) {
|
||
|
for (i++; i < l; i++) {
|
||
|
c = uint8Array[i];
|
||
|
// space
|
||
|
if (isSpaceCharacter(c)) {
|
||
|
continue;
|
||
|
} else {
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// " or '
|
||
|
if (c === 0x22 || c === 0x27) {
|
||
|
const quote = c;
|
||
|
for (i++; i < l; i++) {
|
||
|
c = uint8Array[i];
|
||
|
|
||
|
if (c === quote) {
|
||
|
i++;
|
||
|
return { attr: { name, value }, i };
|
||
|
}
|
||
|
|
||
|
// A-Z
|
||
|
if (c >= 0x41 && c <= 0x5A) {
|
||
|
value += String.fromCharCode(c + 0x20); // lowercase
|
||
|
} else {
|
||
|
value += String.fromCharCode(c);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// >
|
||
|
if (c === 0x3E) {
|
||
|
return { attr: { name, value }, i };
|
||
|
}
|
||
|
|
||
|
// A-Z
|
||
|
if (c >= 0x41 && c <= 0x5A) {
|
||
|
value += String.fromCharCode(c + 0x20); // lowercase
|
||
|
} else {
|
||
|
value += String.fromCharCode(c);
|
||
|
}
|
||
|
|
||
|
for (i++; i < l; i++) {
|
||
|
c = uint8Array[i];
|
||
|
|
||
|
// space or >
|
||
|
if (isSpaceCharacter(c) || c === 0x3E) {
|
||
|
return { attr: { name, value }, i };
|
||
|
}
|
||
|
|
||
|
// A-Z
|
||
|
if (c >= 0x41 && c <= 0x5A) {
|
||
|
value += String.fromCharCode(c + 0x20); // lowercase
|
||
|
} else {
|
||
|
value += String.fromCharCode(c);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return { i };
|
||
|
}
|
||
|
|
||
|
function extractCharacterEncodingFromMeta(string) {
|
||
|
let position = 0;
|
||
|
|
||
|
while (true) {
|
||
|
const indexOfCharset = string.substring(position).search(/charset/ui);
|
||
|
|
||
|
if (indexOfCharset === -1) {
|
||
|
return null;
|
||
|
}
|
||
|
let subPosition = position + indexOfCharset + "charset".length;
|
||
|
|
||
|
while (isSpaceCharacter(string[subPosition].charCodeAt(0))) {
|
||
|
++subPosition;
|
||
|
}
|
||
|
|
||
|
if (string[subPosition] !== "=") {
|
||
|
position = subPosition - 1;
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
++subPosition;
|
||
|
|
||
|
while (isSpaceCharacter(string[subPosition].charCodeAt(0))) {
|
||
|
++subPosition;
|
||
|
}
|
||
|
|
||
|
position = subPosition;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
if (string[position] === "\"" || string[position] === "'") {
|
||
|
const nextIndex = string.indexOf(string[position], position + 1);
|
||
|
|
||
|
if (nextIndex !== -1) {
|
||
|
return whatwgEncoding.labelToName(string.substring(position + 1, nextIndex));
|
||
|
}
|
||
|
|
||
|
// It is an unmatched quotation mark
|
||
|
return null;
|
||
|
}
|
||
|
|
||
|
if (string.length === position + 1) {
|
||
|
return null;
|
||
|
}
|
||
|
|
||
|
const indexOfASCIIWhitespaceOrSemicolon = string.substring(position + 1).search(/\x09|\x0A|\x0C|\x0D|\x20|;/u);
|
||
|
const end = indexOfASCIIWhitespaceOrSemicolon === -1 ?
|
||
|
string.length :
|
||
|
position + indexOfASCIIWhitespaceOrSemicolon + 1;
|
||
|
|
||
|
return whatwgEncoding.labelToName(string.substring(position, end));
|
||
|
}
|
||
|
|
||
|
function isSpaceCharacter(c) {
|
||
|
return c === 0x09 || c === 0x0A || c === 0x0C || c === 0x0D || c === 0x20;
|
||
|
}
|