diff options
| author | Joel Kronqvist <joel.h.kronqvist@gmail.com> | 2022-03-05 19:02:27 +0200 | 
|---|---|---|
| committer | Joel Kronqvist <joel.h.kronqvist@gmail.com> | 2022-03-05 19:02:27 +0200 | 
| commit | 5d309ff52cd399a6b71968a6b9a70c8ac0b98981 (patch) | |
| tree | 360f7eb50f956e2367ef38fa1fc6ac7ac5258042 /node_modules/html-encoding-sniffer/lib | |
| parent | b500a50f1b97d93c98b36ed9a980f8188d648147 (diff) | |
| download | LYLLRuoka-5d309ff52cd399a6b71968a6b9a70c8ac0b98981.tar.gz LYLLRuoka-5d309ff52cd399a6b71968a6b9a70c8ac0b98981.zip  | |
Added node_modules for the updating to work properly.
Diffstat (limited to 'node_modules/html-encoding-sniffer/lib')
| -rw-r--r-- | node_modules/html-encoding-sniffer/lib/html-encoding-sniffer.js | 295 | 
1 files changed, 295 insertions, 0 deletions
diff --git a/node_modules/html-encoding-sniffer/lib/html-encoding-sniffer.js b/node_modules/html-encoding-sniffer/lib/html-encoding-sniffer.js new file mode 100644 index 0000000..25b7537 --- /dev/null +++ b/node_modules/html-encoding-sniffer/lib/html-encoding-sniffer.js @@ -0,0 +1,295 @@ +"use strict"; +const whatwgEncoding = require("whatwg-encoding"); + +// https://html.spec.whatwg.org/#encoding-sniffing-algorithm +module.exports = (buffer, { transportLayerEncodingLabel, defaultEncoding = "windows-1252" } = {}) => { +  let encoding = whatwgEncoding.getBOMEncoding(buffer); // see https://github.com/whatwg/html/issues/1910 + +  if (encoding === null && transportLayerEncodingLabel !== undefined) { +    encoding = whatwgEncoding.labelToName(transportLayerEncodingLabel); +  } + +  if (encoding === null) { +    encoding = prescanMetaCharset(buffer); +  } + +  if (encoding === null) { +    encoding = defaultEncoding; +  } + +  return encoding; +}; + +// https://html.spec.whatwg.org/multipage/syntax.html#prescan-a-byte-stream-to-determine-its-encoding +function prescanMetaCharset(buffer) { +  const l = Math.min(buffer.length, 1024); +  for (let i = 0; i < l; i++) { +    let c = buffer[i]; +    if (c === 0x3C) { +      // "<" +      const c1 = buffer[i + 1]; +      const c2 = buffer[i + 2]; +      const c3 = buffer[i + 3]; +      const c4 = buffer[i + 4]; +      const c5 = buffer[i + 5]; +      // !-- (comment start) +      if (c1 === 0x21 && c2 === 0x2D && c3 === 0x2D) { +        i += 4; +        for (; i < l; i++) { +          c = buffer[i]; +          const cMinus1 = buffer[i - 1]; +          const cMinus2 = buffer[i - 2]; +          // --> (comment end) +          if (c === 0x3E && cMinus1 === 0x2D && cMinus2 === 0x2D) { +            break; +          } +        } +      } else if ((c1 === 0x4D || c1 === 0x6D) && +         (c2 === 0x45 || c2 === 0x65) && +         (c3 === 0x54 || c3 === 0x74) && +         (c4 === 0x41 || c4 === 0x61) && +         (isSpaceCharacter(c5) || c5 === 0x2F)) { +        // "meta" + space or / +        i += 6; +        const attributeList = new Set(); +        let gotPragma = false; +        let needPragma = null; +        let charset = null; + +        let attrRes; +        do { +          attrRes = getAttribute(buffer, i, l); +          if (attrRes.attr && !attributeList.has(attrRes.attr.name)) { +            attributeList.add(attrRes.attr.name); +            if (attrRes.attr.name === "http-equiv") { +              gotPragma = attrRes.attr.value === "content-type"; +            } else if (attrRes.attr.name === "content" && !charset) { +              charset = extractCharacterEncodingFromMeta(attrRes.attr.value); +              if (charset !== null) { +                needPragma = true; +              } +            } else if (attrRes.attr.name === "charset") { +              charset = whatwgEncoding.labelToName(attrRes.attr.value); +              needPragma = false; +            } +          } +          i = attrRes.i; +        } while (attrRes.attr); + +        if (needPragma === null) { +          continue; +        } +        if (needPragma === true && gotPragma === false) { +          continue; +        } +        if (charset === null) { +          continue; +        } + +        if (charset === "UTF-16LE" || charset === "UTF-16BE") { +          charset = "UTF-8"; +        } +        if (charset === "x-user-defined") { +          charset = "windows-1252"; +        } + +        return charset; +      } else if ((c1 >= 0x41 && c1 <= 0x5A) || (c1 >= 0x61 && c1 <= 0x7A)) { +        // a-z or A-Z +        for (i += 2; i < l; i++) { +          c = buffer[i]; +          // space or > +          if (isSpaceCharacter(c) || c === 0x3E) { +            break; +          } +        } +        let attrRes; +        do { +          attrRes = getAttribute(buffer, i, l); +          i = attrRes.i; +        } while (attrRes.attr); +      } else if (c1 === 0x21 || c1 === 0x2F || c1 === 0x3F) { +        // ! or / or ? +        for (i += 2; i < l; i++) { +          c = buffer[i]; +          // > +          if (c === 0x3E) { +            break; +          } +        } +      } +    } +  } +  return null; +} + +// https://html.spec.whatwg.org/multipage/syntax.html#concept-get-attributes-when-sniffing +function getAttribute(buffer, i, l) { +  for (; i < l; i++) { +    let c = buffer[i]; +    // space or / +    if (isSpaceCharacter(c) || c === 0x2F) { +      continue; +    } +    // ">" +    if (c === 0x3E) { +      break; +    } +    let name = ""; +    let value = ""; +    nameLoop:for (; i < l; i++) { +      c = buffer[i]; +      // "=" +      if (c === 0x3D && name !== "") { +        i++; +        break; +      } +      // space +      if (isSpaceCharacter(c)) { +        for (i++; i < l; i++) { +          c = buffer[i]; +          // space +          if (isSpaceCharacter(c)) { +            continue; +          } +          // not "=" +          if (c !== 0x3D) { +            return { attr: { name, value }, i }; +          } + +          i++; +          break nameLoop; +        } +        break; +      } +      // / or > +      if (c === 0x2F || c === 0x3E) { +        return { attr: { name, value }, i }; +      } +      // A-Z +      if (c >= 0x41 && c <= 0x5A) { +        name += String.fromCharCode(c + 0x20); // lowercase +      } else { +        name += String.fromCharCode(c); +      } +    } +    c = buffer[i]; +    // space +    if (isSpaceCharacter(c)) { +      for (i++; i < l; i++) { +        c = buffer[i]; +        // space +        if (isSpaceCharacter(c)) { +          continue; +        } else { +          break; +        } +      } +    } +    // " or ' +    if (c === 0x22 || c === 0x27) { +      const quote = c; +      for (i++; i < l; i++) { +        c = buffer[i]; + +        if (c === quote) { +          i++; +          return { attr: { name, value }, i }; +        } + +        // A-Z +        if (c >= 0x41 && c <= 0x5A) { +          value += String.fromCharCode(c + 0x20); // lowercase +        } else { +          value += String.fromCharCode(c); +        } +      } +    } + +    // > +    if (c === 0x3E) { +      return { attr: { name, value }, i }; +    } + +    // A-Z +    if (c >= 0x41 && c <= 0x5A) { +      value += String.fromCharCode(c + 0x20); // lowercase +    } else { +      value += String.fromCharCode(c); +    } + +    for (i++; i < l; i++) { +      c = buffer[i]; + +      // space or > +      if (isSpaceCharacter(c) || c === 0x3E) { +        return { attr: { name, value }, i }; +      } + +      // A-Z +      if (c >= 0x41 && c <= 0x5A) { +        value += String.fromCharCode(c + 0x20); // lowercase +      } else { +        value += String.fromCharCode(c); +      } +    } +  } +  return { i }; +} + +function extractCharacterEncodingFromMeta(string) { +  let position = 0; + +  while (true) { +    const indexOfCharset = string.substring(position).search(/charset/i); + +    if (indexOfCharset === -1) { +      return null; +    } +    let subPosition = position + indexOfCharset + "charset".length; + +    while (isSpaceCharacter(string[subPosition].charCodeAt(0))) { +      ++subPosition; +    } + +    if (string[subPosition] !== "=") { +      position = subPosition - 1; +      continue; +    } + +    ++subPosition; + +    while (isSpaceCharacter(string[subPosition].charCodeAt(0))) { +      ++subPosition; +    } + +    position = subPosition; +    break; +  } + +  if (string[position] === "\"" || string[position] === "'") { +    const nextIndex = string.indexOf(string[position], position + 1); + +    if (nextIndex !== -1) { +      return whatwgEncoding.labelToName(string.substring(position + 1, nextIndex)); +    } + +    // It is an unmatched quotation mark +    return null; +  } + +  if (string.length === position + 1) { +    return null; +  } + +  const indexOfASCIIWhitespaceOrSemicolon = string.substring(position + 1).search(/\x09|\x0A|\x0C|\x0D|\x20|;/); +  const end = indexOfASCIIWhitespaceOrSemicolon === -1 ? +    string.length : +    position + indexOfASCIIWhitespaceOrSemicolon + 1; + +  return whatwgEncoding.labelToName(string.substring(position, end)); +} + +function isSpaceCharacter(c) { +  return c === 0x09 || c === 0x0A || c === 0x0C || c === 0x0D || c === 0x20; +}  | 
