Host FlexSearch

This commit is contained in:
Will Faught
2025-03-02 00:23:46 -08:00
parent 2e77b35940
commit 078157e62c
164 changed files with 23495 additions and 1 deletions

View File

@@ -0,0 +1,89 @@
import { IndexInterface } from "../../type.js";
import { regex, replace, collapse } from "../../lang.js";
import { encode as encode_balance } from "./balance.js";
export const rtl = /* normalize: */
/* collapse: */ /* normalize: */ /* collapse: */ /* normalize: */ /* collapse: */!1;
export const tokenize = "";
export default {
encode: encode,
rtl: !1,
tokenize: ""
// Phonetic Normalization
};const regex_ae = regex("ae"),
//regex_ai = regex("ai"),
//regex_ay = regex("ay"),
//regex_ey = regex("ey"),
regex_oe = regex("oe"),
//regex_ue = regex("ue"),
//regex_ie = regex("ie"),
//regex_sz = regex("sz"),
//regex_zs = regex("zs"),
//regex_ck = regex("ck"),
//regex_cc = regex("cc"),
regex_sh = regex("sh"),
regex_th = regex("th"),
//regex_dt = regex("dt"),
regex_ph = regex("ph"),
regex_pf = regex("pf"),
pairs = [regex_ae, "a",
// regex_ai, "ei",
// regex_ay, "ei",
// regex_ey, "ei",
regex_oe, "o",
// regex_ue, "u",
// regex_ie, "i",
// regex_sz, "s",
// regex_zs, "s",
regex_sh, "s",
// regex_ck, "k",
// regex_cc, "k",
regex_th, "t",
// regex_dt, "t",
regex_ph, "f", regex_pf, "f",
// regex_ou, "o",
// regex_uo, "u"
// regex("(?![aeiouy])h(?![aeiouy])"), "",
// regex("(?!^[aeiouy])h(?!^[aeiouy])"), ""
regex("(?![aeo])h(?![aeo])"), "", regex("(?!^[aeo])h(?!^[aeo])"), ""];
//regex_ou = regex("ou"),
//regex_uo = regex("uo");
/**
* @param {string|number} str
* @param {boolean=} _skip_postprocessing
* @this IndexInterface
*/
export function encode(str, _skip_postprocessing) {
if (str) {
str = encode_balance.call(this, str).join(" ");
if (2 < str.length) {
str = replace(str, pairs);
}
if (!_skip_postprocessing) {
if (1 < str.length) {
str = collapse(str);
}
if (str) {
str = str.split(" ");
}
}
}
return str || [];
}

View File

@@ -0,0 +1,119 @@
import { IndexInterface } from "../../type.js";
import { encode as encode_simple } from "./simple.js";
// custom soundex implementation
export const rtl = /* normalize: */ /* collapse: */
/* normalize: */ /* collapse: */ /* normalize: */ /* collapse: */!1;
export const tokenize = "strict";
export default {
encode: encode,
rtl: !1,
tokenize: "strict"
//const regex_whitespace = /[\W_]+/g;
};const regex_strip = /[^a-z0-9]+/,
soundex = {
b: "p",
//"p": "p",
//"f": "f",
v: "f", w: "f",
//"s": "s",
z: "s",
x: "s",
ß: "s",
d: "t",
//"t": "t",
//"l": "l",
//"m": "m",
n: "m",
c: "k",
g: "k",
j: "k",
//"k": "k",
q: "k",
//"r": "r",
//"h": "h",
//"a": "a",
//"e": "e",
i: "e",
y: "e",
//"o": "o",
u: "o"
};
// const pairs = [
// regex_whitespace, " ",
// regex_strip, ""
// ];
// modified
/**
* @param {string|number} str
* @this IndexInterface
*/
export function encode(str) {
str = encode_simple.call(this, str).join(" ");
// str = this.pipeline(
//
// /* string: */ normalize("" + str).toLowerCase(),
// /* normalize: */ false,
// /* split: */ false,
// /* collapse: */ false
// );
const result = [];
if (str) {
const words = str.split(regex_strip),
length = words.length;
for (let x = 0, tmp, count = 0; x < length; x++) {
if ((str = words[x]) && ( /*&& (str.length > 2)*/!this.filter || !this.filter[str])) {
tmp = str[0];
let code = soundex[tmp] || tmp,
previous = code; //str[0];
//soundex[code] || code;
for (let i = 1; i < str.length; i++) {
tmp = str[i];
const current = soundex[tmp] || tmp;
if (current && current !== previous) {
code += current;
previous = current;
// if(code.length === 7){
//
// break;
// }
}
}
result[count++] = code; //(code + "0000").substring(0, 4);
}
}
}
return result;
}

View File

@@ -0,0 +1,23 @@
import { IndexInterface } from "../../type.js";
import { pipeline, normalize, regex_whitespace } from "../../lang.js";
export const rtl = /* normalize: */
/* collapse: */ /* normalize: */ /* collapse: */ /* normalize: */ /* collapse: */
/* normalize: */
/* collapse: */!1;
export const tokenize = "";
export default {
encode: encode,
rtl: !1,
tokenize: ""
/**
* @param {string|number} str
* @this IndexInterface
*/
};export function encode(str) {
return pipeline.call(this,
/* string: */("" + str).toLowerCase(), !1, /* split: */regex_whitespace, !1);
}

View File

@@ -0,0 +1,65 @@
import { IndexInterface } from "../../type.js";
import { regex, replace, collapse } from "../../lang.js";
import { encode as encode_advanced } from "./advanced.js";
export const rtl = /* normalize: */ /* collapse: */ /* normalize: */ /* collapse: */ /* normalize: */ /* collapse: */ /* normalize: */ /* collapse: */!1;
export const tokenize = "";
export default {
encode: encode,
rtl: !1,
tokenize: ""
// Soundex Normalization
};const prefix = "(?!\\b)",
//soundex_b = regex(prefix + "p"),
// soundex_s = regex(prefix + "z"),
// soundex_k = regex(prefix + "[cgq]"),
// soundex_m = regex(prefix + "n"),
// soundex_t = regex(prefix + "d"),
// soundex_f = regex(prefix + "[vw]"),
//regex_vowel = regex(prefix + "[aeiouy]");
regex_vowel = regex("(?!\\b)[aeo]"),
pairs = [
// soundex_b, "b",
// soundex_s, "s",
// soundex_k, "k",
// soundex_m, "m",
// soundex_t, "t",
// soundex_f, "f",
// regex("(?![aeiouy])h(?![aeiouy])"), "",
// regex("(?!^[aeiouy])h(?!^[aeiouy])"), "",
regex_vowel, ""];
/**
* @param {string|number} str
* @this IndexInterface
*/
export function encode(str) {
if (str) {
str = encode_advanced.call(this, str, /* append: */ /* skip update: */ /* skip_update: */ /* skip post-processing: */!0);
if (1 < str.length) {
//str = replace(str, pairs);
str = str.replace(regex_vowel, "");
}
if (1 < str.length) {
str = collapse(str);
}
if (str) {
str = str.split(" ");
}
}
return str || [];
}

View File

@@ -0,0 +1,45 @@
import { IndexInterface } from "../../type.js";
import { pipeline, normalize, regex_whitespace, regex } from "../../lang.js";
export const rtl = /* normalize: */ /* collapse: */ /* normalize: */ /* collapse: */ /* normalize: */ /* collapse: */ /* normalize: */ /* collapse: */
/* collapse: */!1;
export const tokenize = "";
export default {
encode: encode,
rtl: !1,
tokenize: ""
// Charset Normalization
};const //regex_whitespace = /\W+/,
//regex_strip = regex("[^a-z0-9 ]"),
regex_a = regex("[àáâãäå]"),
regex_e = regex("[èéêë]"),
regex_i = regex("[ìíîï]"),
regex_o = regex("[òóôõöő]"),
regex_u = regex("[ùúûüű]"),
regex_y = regex("[ýŷÿ]"),
regex_n = regex("ñ"),
regex_c = regex("[çc]"),
regex_s = regex("ß"),
regex_and = regex(" & "),
pairs = [regex_a, "a", regex_e, "e", regex_i, "i", regex_o, "o", regex_u, "u", regex_y, "y", regex_n, "n", regex_c, "k", regex_s, "s", regex_and, " and "
//regex_whitespace, " "
//regex_strip, ""
];
/**
* @param {string|number} str
* @this IndexInterface
*/
export function encode(str) {
str = "" + str;
return pipeline.call(this,
/* string: */normalize(str).toLowerCase(),
/* normalize: */!str.normalize && pairs,
/* split: */regex_whitespace, !1);
}