Files
paige-hugo/paige/node_modules/katex/src/unicodeScripts.js
2023-03-27 20:36:59 -07:00

127 lines
3.9 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// @flow
/*
* This file defines the Unicode scripts and script families that we
* support. To add new scripts or families, just add a new entry to the
* scriptData array below. Adding scripts to the scriptData array allows
* characters from that script to appear in \text{} environments.
*/
/**
* Each script or script family has a name and an array of blocks.
* Each block is an array of two numbers which specify the start and
* end points (inclusive) of a block of Unicode codepoints.
*/
type Script = {
name: string;
blocks: Array<Array<number>>;
};
/**
* Unicode block data for the families of scripts we support in \text{}.
* Scripts only need to appear here if they do not have font metrics.
*/
const scriptData: Array<Script> = [
{
// Latin characters beyond the Latin-1 characters we have metrics for.
// Needed for Czech, Hungarian and Turkish text, for example.
name: 'latin',
blocks: [
[0x0100, 0x024f], // Latin Extended-A and Latin Extended-B
[0x0300, 0x036f], // Combining Diacritical marks
],
},
{
// The Cyrillic script used by Russian and related languages.
// A Cyrillic subset used to be supported as explicitly defined
// symbols in symbols.js
name: 'cyrillic',
blocks: [[0x0400, 0x04ff]],
},
{
// Armenian
name: 'armenian',
blocks: [[0x0530, 0x058F]],
},
{
// The Brahmic scripts of South and Southeast Asia
// Devanagari (0900097F)
// Bengali (098009FF)
// Gurmukhi (0A000A7F)
// Gujarati (0A800AFF)
// Oriya (0B000B7F)
// Tamil (0B800BFF)
// Telugu (0C000C7F)
// Kannada (0C800CFF)
// Malayalam (0D000D7F)
// Sinhala (0D800DFF)
// Thai (0E000E7F)
// Lao (0E800EFF)
// Tibetan (0F000FFF)
// Myanmar (1000109F)
name: 'brahmic',
blocks: [[0x0900, 0x109F]],
},
{
name: 'georgian',
blocks: [[0x10A0, 0x10ff]],
},
{
// Chinese and Japanese.
// The "k" in cjk is for Korean, but we've separated Korean out
name: "cjk",
blocks: [
[0x3000, 0x30FF], // CJK symbols and punctuation, Hiragana, Katakana
[0x4E00, 0x9FAF], // CJK ideograms
[0xFF00, 0xFF60], // Fullwidth punctuation
// TODO: add halfwidth Katakana and Romanji glyphs
],
},
{
// Korean
name: 'hangul',
blocks: [[0xAC00, 0xD7AF]],
},
];
/**
* Given a codepoint, return the name of the script or script family
* it is from, or null if it is not part of a known block
*/
export function scriptFromCodepoint(codepoint: number): ?string {
for (let i = 0; i < scriptData.length; i++) {
const script = scriptData[i];
for (let i = 0; i < script.blocks.length; i++) {
const block = script.blocks[i];
if (codepoint >= block[0] && codepoint <= block[1]) {
return script.name;
}
}
}
return null;
}
/**
* A flattened version of all the supported blocks in a single array.
* This is an optimization to make supportedCodepoint() fast.
*/
const allBlocks: Array<number> = [];
scriptData.forEach(s => s.blocks.forEach(b => allBlocks.push(...b)));
/**
* Given a codepoint, return true if it falls within one of the
* scripts or script families defined above and false otherwise.
*
* Micro benchmarks shows that this is faster than
* /[\u3000-\u30FF\u4E00-\u9FAF\uFF00-\uFF60\uAC00-\uD7AF\u0900-\u109F]/.test()
* in Firefox, Chrome and Node.
*/
export function supportedCodepoint(codepoint: number): boolean {
for (let i = 0; i < allBlocks.length; i += 2) {
if (codepoint >= allBlocks[i] && codepoint <= allBlocks[i + 1]) {
return true;
}
}
return false;
}