Commit node_modules

2023-03-27 20:36:01 -07:00
parent 53a30a4c3e
commit 18ea53bb81
2693 changed files with 193960 additions and 70 deletions
--- a/paige/node_modules/katex/src/Lexer.js
+++ b/paige/node_modules/katex/src/Lexer.js
@@ -0,0 +1,122 @@
+// @flow
+/**
+ * The Lexer class handles tokenizing the input in various ways. Since our
+ * parser expects us to be able to backtrack, the lexer allows lexing from any
+ * given starting point.
+ *
+ * Its main exposed function is the `lex` function, which takes a position to
+ * lex from and a type of token to lex. It defers to the appropriate `_innerLex`
+ * function.
+ *
+ * The various `_innerLex` functions perform the actual lexing of different
+ * kinds.
+ */
+
+import ParseError from "./ParseError";
+import SourceLocation from "./SourceLocation";
+import {Token} from "./Token";
+
+import type {LexerInterface} from "./Token";
+import type Settings from "./Settings";
+
+/* The following tokenRegex
+ * - matches typical whitespace (but not NBSP etc.) using its first group
+ * - does not match any control character \x00-\x1f except whitespace
+ * - does not match a bare backslash
+ * - matches any ASCII character except those just mentioned
+ * - does not match the BMP private use area \uE000-\uF8FF
+ * - does not match bare surrogate code units
+ * - matches any BMP character except for those just described
+ * - matches any valid Unicode surrogate pair
+ * - matches a backslash followed by one or more whitespace characters
+ * - matches a backslash followed by one or more letters then whitespace
+ * - matches a backslash followed by any BMP character
+ * Capturing groups:
+ *   [1] regular whitespace
+ *   [2] backslash followed by whitespace
+ *   [3] anything else, which may include:
+ *     [4] left character of \verb*
+ *     [5] left character of \verb
+ *     [6] backslash followed by word, excluding any trailing whitespace
+ * Just because the Lexer matches something doesn't mean it's valid input:
+ * If there is no matching function or symbol definition, the Parser will
+ * still reject the input.
+ */
+const spaceRegexString = "[ \r\n\t]";
+const controlWordRegexString = "\\\\[a-zA-Z@]+";
+const controlSymbolRegexString = "\\\\[^\uD800-\uDFFF]";
+const controlWordWhitespaceRegexString =
+    `(${controlWordRegexString})${spaceRegexString}*`;
+const controlSpaceRegexString = "\\\\(\n|[ \r\t]+\n?)[ \r\t]*";
+const combiningDiacriticalMarkString = "[\u0300-\u036f]";
+export const combiningDiacriticalMarksEndRegex: RegExp =
+    new RegExp(`${combiningDiacriticalMarkString}+$`);
+const tokenRegexString = `(${spaceRegexString}+)|` +  // whitespace
+    `${controlSpaceRegexString}|` +                   // \whitespace
+    "([!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" +  // single codepoint
+    `${combiningDiacriticalMarkString}*` +            // ...plus accents
+    "|[\uD800-\uDBFF][\uDC00-\uDFFF]" +               // surrogate pair
+    `${combiningDiacriticalMarkString}*` +            // ...plus accents
+    "|\\\\verb\\*([^]).*?\\4" +                       // \verb*
+    "|\\\\verb([^*a-zA-Z]).*?\\5" +                   // \verb unstarred
+    `|${controlWordWhitespaceRegexString}` +          // \macroName + spaces
+    `|${controlSymbolRegexString})`;                  // \\, \', etc.
+
+/** Main Lexer class */
+export default class Lexer implements LexerInterface {
+    input: string;
+    settings: Settings;
+    tokenRegex: RegExp;
+    // Category codes. The lexer only supports comment characters (14) for now.
+    // MacroExpander additionally distinguishes active (13).
+    catcodes: {[string]: number};
+
+    constructor(input: string, settings: Settings) {
+        // Separate accents from characters
+        this.input = input;
+        this.settings = settings;
+        this.tokenRegex = new RegExp(tokenRegexString, 'g');
+        this.catcodes = {
+            "%": 14, // comment character
+            "~": 13, // active character
+        };
+    }
+
+    setCatcode(char: string, code: number) {
+        this.catcodes[char] = code;
+    }
+
+    /**
+     * This function lexes a single token.
+     */
+    lex(): Token {
+        const input = this.input;
+        const pos = this.tokenRegex.lastIndex;
+        if (pos === input.length) {
+            return new Token("EOF", new SourceLocation(this, pos, pos));
+        }
+        const match = this.tokenRegex.exec(input);
+        if (match === null || match.index !== pos) {
+            throw new ParseError(
+                `Unexpected character: '${input[pos]}'`,
+                new Token(input[pos], new SourceLocation(this, pos, pos + 1)));
+        }
+        const text = match[6] || match[3] || (match[2] ? "\\ " : " ");
+
+        if (this.catcodes[text] === 14) { // comment character
+            const nlIndex = input.indexOf('\n', this.tokenRegex.lastIndex);
+            if (nlIndex === -1) {
+                this.tokenRegex.lastIndex = input.length; // EOF
+                this.settings.reportNonstrict("commentAtEnd",
+                    "% comment has no terminating newline; LaTeX would " +
+                    "fail because of commenting the end of math mode (e.g. $)");
+            } else {
+                this.tokenRegex.lastIndex = nlIndex + 1;
+            }
+            return this.lex();
+        }
+
+        return new Token(text, new SourceLocation(this, pos,
+            this.tokenRegex.lastIndex));
+    }
+}