fix: improve adjectives generation rules

This commit is contained in:
Azgaar 2022-06-04 19:48:17 +03:00
parent 55f8b2a491
commit 00ced6260b
4 changed files with 177 additions and 60 deletions

View file

@ -6093,7 +6093,8 @@
<script src="utils/numberUtils.js"></script>
<script src="utils/polyfills.js"></script>
<script src="utils/probabilityUtils.js?v=29052022"></script>
<script src="utils/stringUtils.js"></script>
<script src="utils/stringUtils.js?v=04062022"></script>
<script src="utils/languageUtils.js"></script>
<script src="utils/unitUtils.js"></script>
<script src="modules/voronoi.js"></script>

174
utils/languageUtils.js Normal file
View file

@ -0,0 +1,174 @@
"use strict";
// chars that serve as vowels
const VOWELS = `aeiouyɑ'əøɛœæɶɒɨɪɔɐʊɤɯаоиеёэыуюяàèìòùỳẁȁȅȉȍȕáéíóúýẃőűâêîôûŷŵäëïöüÿẅãẽĩõũỹąęįǫųāēīōūȳăĕĭŏŭǎěǐǒǔȧėȯẏẇạẹịọụỵẉḛḭṵṳ`;
function vowel(c) {
return VOWELS.includes(c);
}
// remove vowels from the end of the string
function trimVowels(string, minLength = 3) {
while (string.length > minLength && vowel(last(string))) {
string = string.slice(0, -1);
}
return string;
}
const adjectivizationRules = [
{name: "guo", probability: 1, condition: new RegExp(" Guo$"), action: noun => noun.slice(0, -4)},
{
name: "orszag",
probability: 1,
condition: new RegExp("orszag$"),
action: noun => (noun.length < 9 ? noun + "ian" : noun.slice(0, -6))
},
{
name: "stan",
probability: 1,
condition: new RegExp("stan$"),
action: noun => (noun.length < 9 ? noun + "i" : trimVowels(noun.slice(0, -4)))
},
{
name: "land",
probability: 1,
condition: new RegExp("land$"),
action: noun => {
if (noun.length > 9) return noun.slice(0, -4);
const root = trimVowels(noun.slice(0, -4), 0);
if (root.length < 3) return noun + "ic";
if (root.length < 4) return root + "lish";
return root + "ish";
}
},
{
name: "que",
probability: 1,
condition: new RegExp("que$"),
action: noun => noun.replace(/que$/, "can")
},
{
name: "a",
probability: 1,
condition: new RegExp("a$"),
action: noun => noun + "n"
},
{
name: "o",
probability: 1,
condition: new RegExp("o$"),
action: noun => noun.replace(/o$/, "an")
},
{
name: "u",
probability: 1,
condition: new RegExp("u$"),
action: noun => noun + "an"
},
{
name: "i",
probability: 1,
condition: new RegExp("i$"),
action: noun => noun + "an"
},
{
name: "e",
probability: 1,
condition: new RegExp("e$"),
action: noun => noun + "an"
},
{
name: "ay",
probability: 1,
condition: new RegExp("ay$"),
action: noun => noun + "an"
},
{
name: "os",
probability: 1,
condition: new RegExp("os$"),
action: noun => {
const root = trimVowels(noun.slice(0, -2), 0);
if (root.length < 4) return noun.slice(0, -1);
return root + "ian";
}
},
{
name: "es",
probability: 1,
condition: new RegExp("es$"),
action: noun => {
const root = trimVowels(noun.slice(0, -2), 0);
if (root.length > 7) return noun.slice(0, -1);
return root + "ian";
}
},
{
name: "l",
probability: 0.8,
condition: new RegExp("l$"),
action: noun => noun + "ese"
},
{
name: "n",
probability: 0.8,
condition: new RegExp("n$"),
action: noun => noun + "ese"
},
{
name: "ad",
probability: 0.8,
condition: new RegExp("ad$"),
action: noun => noun + "ian"
},
{
name: "an",
probability: 0.8,
condition: new RegExp("an$"),
action: noun => noun + "ian"
},
{
name: "ish",
probability: 0.25,
condition: new RegExp("^[a-zA-Z]{6}$"),
action: noun => trimVowels(noun.slice(0, -1)) + "ish"
},
{
name: "an",
probability: 0.5,
condition: new RegExp("^[a-zA-Z]{0-7}$"),
action: noun => trimVowels(noun) + "an"
}
];
// get adjective form from noun
function getAdjective(noun) {
for (const rule of adjectivizationRules) {
if (P(rule.probability) && rule.condition.test(noun)) {
return rule.action(noun);
}
}
return noun; // no rule applied, return noun as is
}
// get ordinal from integer: 1 => 1st
const nth = n => n + (["st", "nd", "rd"][((((n + 90) % 100) - 10) % 10) - 1] || "th");
// get two-letters code (abbreviation) from string
function abbreviate(name, restricted = []) {
const parsed = name.replace("Old ", "O ").replace(/[()]/g, ""); // remove Old prefix and parentheses
const words = parsed.split(" ");
const letters = words.join("");
let code = words.length === 2 ? words[0][0] + words[1][0] : letters.slice(0, 2);
for (let i = 1; i < letters.length - 1 && restricted.includes(code); i++) {
code = letters[0] + letters[i].toUpperCase();
}
return code;
}
// conjunct array: [A,B,C] => "A, B and C"
function list(array) {
if (!Intl.ListFormat) return array.join(", ");
const conjunction = new Intl.ListFormat(window.lang || "en", {style: "long", type: "conjunction"});
return conjunction.format(array);
}

View file

@ -13,64 +13,6 @@ function capitalize(string) {
return string.charAt(0).toUpperCase() + string.slice(1);
}
// check if char is vowel or can serve as vowel
function vowel(c) {
return `aeiouyɑ'əøɛœæɶɒɨɪɔɐʊɤɯаоиеёэыуюяàèìòùỳẁȁȅȉȍȕáéíóúýẃőűâêîôûŷŵäëïöüÿẅãẽĩõũỹąęįǫųāēīōūȳăĕĭŏŭǎěǐǒǔȧėȯẏẇạẹịọụỵẉḛḭṵṳ`.includes(c);
}
// remove vowels from the end of the string
function trimVowels(string) {
while (string.length > 3 && vowel(last(string))) {
string = string.slice(0, -1);
}
return string;
}
// get adjective form from noun
function getAdjective(string) {
// special cases for some suffixes
if (string.length > 8 && string.slice(-6) === "orszag") return string.slice(0, -6);
if (string.length > 6 && string.slice(-4) === "stan") return string.slice(0, -4);
if (P(0.5) && string.slice(-4) === "land") return string + "ic";
if (string.slice(-4) === " Guo") string = string.slice(0, -4);
// don't change is name ends on suffix
if (string.slice(-2) === "an") return string;
if (string.slice(-3) === "ese") return string;
if (string.slice(-1) === "i") return string;
const end = string.slice(-1); // last letter of string
if (end === "a") return (string += "n");
if (end === "o") return (string = trimVowels(string) + "an");
if (vowel(end) || end === "c") return (string += "an"); // ceiuy
if (end === "m" || end === "n") return (string += "ese");
if (end === "q") return (string += "i");
return trimVowels(string) + "ian";
}
// get ordinal out of integer: 1 => 1st
const nth = n => n + (["st", "nd", "rd"][((((n + 90) % 100) - 10) % 10) - 1] || "th");
// get two-letters code (abbreviation) from string
function abbreviate(name, restricted = []) {
const parsed = name.replace("Old ", "O ").replace(/[()]/g, ""); // remove Old prefix and parentheses
const words = parsed.split(" ");
const letters = words.join("");
let code = words.length === 2 ? words[0][0] + words[1][0] : letters.slice(0, 2);
for (let i = 1; i < letters.length - 1 && restricted.includes(code); i++) {
code = letters[0] + letters[i].toUpperCase();
}
return code;
}
// conjunct array: [A,B,C] => "A, B and C"
function list(array) {
if (!Intl.ListFormat) return array.join(", ");
const conjunction = new Intl.ListFormat(window.lang || "en", {style: "long", type: "conjunction"});
return conjunction.format(array);
}
// split string into 2 almost equal parts not breaking words
function splitInTwo(str) {
const half = str.length / 2;

View file

@ -1,7 +1,7 @@
"use strict";
// version and caching control
const version = "1.84.11"; // generator version, update each time
const version = "1.84.12"; // generator version, update each time
{
document.title += " v" + version;