fix: improve adjectives generation rules

2026-03-22 15:17:23 +01:00 · 2022-06-04 19:48:17 +03:00 · 2022-06-04 19:48:17 +03:00 · 00ced6260b
commit 00ced6260b
parent 55f8b2a491
4 changed files with 177 additions and 60 deletions
--- a/index.html
+++ b/index.html
@ -6093,7 +6093,8 @@
    <script src="utils/numberUtils.js"></script>
    <script src="utils/polyfills.js"></script>
    <script src="utils/probabilityUtils.js?v=29052022"></script>
-    <script src="utils/stringUtils.js"></script>
+    <script src="utils/stringUtils.js?v=04062022"></script>
+    <script src="utils/languageUtils.js"></script>
    <script src="utils/unitUtils.js"></script>

    <script src="modules/voronoi.js"></script>
--- a/utils/languageUtils.js
+++ b/utils/languageUtils.js
@ -0,0 +1,174 @@
+"use strict";
+
+// chars that serve as vowels
+const VOWELS = `aeiouyɑ'əøɛœæɶɒɨɪɔɐʊɤɯаоиеёэыуюяàèìòùỳẁȁȅȉȍȕáéíóúýẃőűâêîôûŷŵäëïöüÿẅãẽĩõũỹąęįǫųāēīōūȳăĕĭŏŭǎěǐǒǔȧėȯẏẇạẹịọụỵẉḛḭṵṳ`;
+function vowel(c) {
+  return VOWELS.includes(c);
+}
+
+// remove vowels from the end of the string
+function trimVowels(string, minLength = 3) {
+  while (string.length > minLength && vowel(last(string))) {
+    string = string.slice(0, -1);
+  }
+  return string;
+}
+
+const adjectivizationRules = [
+  {name: "guo", probability: 1, condition: new RegExp(" Guo$"), action: noun => noun.slice(0, -4)},
+  {
+    name: "orszag",
+    probability: 1,
+    condition: new RegExp("orszag$"),
+    action: noun => (noun.length < 9 ? noun + "ian" : noun.slice(0, -6))
+  },
+  {
+    name: "stan",
+    probability: 1,
+    condition: new RegExp("stan$"),
+    action: noun => (noun.length < 9 ? noun + "i" : trimVowels(noun.slice(0, -4)))
+  },
+  {
+    name: "land",
+    probability: 1,
+    condition: new RegExp("land$"),
+    action: noun => {
+      if (noun.length > 9) return noun.slice(0, -4);
+      const root = trimVowels(noun.slice(0, -4), 0);
+      if (root.length < 3) return noun + "ic";
+      if (root.length < 4) return root + "lish";
+      return root + "ish";
+    }
+  },
+  {
+    name: "que",
+    probability: 1,
+    condition: new RegExp("que$"),
+    action: noun => noun.replace(/que$/, "can")
+  },
+  {
+    name: "a",
+    probability: 1,
+    condition: new RegExp("a$"),
+    action: noun => noun + "n"
+  },
+  {
+    name: "o",
+    probability: 1,
+    condition: new RegExp("o$"),
+    action: noun => noun.replace(/o$/, "an")
+  },
+  {
+    name: "u",
+    probability: 1,
+    condition: new RegExp("u$"),
+    action: noun => noun + "an"
+  },
+  {
+    name: "i",
+    probability: 1,
+    condition: new RegExp("i$"),
+    action: noun => noun + "an"
+  },
+  {
+    name: "e",
+    probability: 1,
+    condition: new RegExp("e$"),
+    action: noun => noun + "an"
+  },
+  {
+    name: "ay",
+    probability: 1,
+    condition: new RegExp("ay$"),
+    action: noun => noun + "an"
+  },
+  {
+    name: "os",
+    probability: 1,
+    condition: new RegExp("os$"),
+    action: noun => {
+      const root = trimVowels(noun.slice(0, -2), 0);
+      if (root.length < 4) return noun.slice(0, -1);
+      return root + "ian";
+    }
+  },
+  {
+    name: "es",
+    probability: 1,
+    condition: new RegExp("es$"),
+    action: noun => {
+      const root = trimVowels(noun.slice(0, -2), 0);
+      if (root.length > 7) return noun.slice(0, -1);
+      return root + "ian";
+    }
+  },
+  {
+    name: "l",
+    probability: 0.8,
+    condition: new RegExp("l$"),
+    action: noun => noun + "ese"
+  },
+  {
+    name: "n",
+    probability: 0.8,
+    condition: new RegExp("n$"),
+    action: noun => noun + "ese"
+  },
+  {
+    name: "ad",
+    probability: 0.8,
+    condition: new RegExp("ad$"),
+    action: noun => noun + "ian"
+  },
+  {
+    name: "an",
+    probability: 0.8,
+    condition: new RegExp("an$"),
+    action: noun => noun + "ian"
+  },
+  {
+    name: "ish",
+    probability: 0.25,
+    condition: new RegExp("^[a-zA-Z]{6}$"),
+    action: noun => trimVowels(noun.slice(0, -1)) + "ish"
+  },
+  {
+    name: "an",
+    probability: 0.5,
+    condition: new RegExp("^[a-zA-Z]{0-7}$"),
+    action: noun => trimVowels(noun) + "an"
+  }
+];
+
+// get adjective form from noun
+function getAdjective(noun) {
+  for (const rule of adjectivizationRules) {
+    if (P(rule.probability) && rule.condition.test(noun)) {
+      return rule.action(noun);
+    }
+  }
+  return noun; // no rule applied, return noun as is
+}
+
+// get ordinal from integer: 1 => 1st
+const nth = n => n + (["st", "nd", "rd"][((((n + 90) % 100) - 10) % 10) - 1] || "th");
+
+// get two-letters code (abbreviation) from string
+function abbreviate(name, restricted = []) {
+  const parsed = name.replace("Old ", "O ").replace(/[()]/g, ""); // remove Old prefix and parentheses
+  const words = parsed.split(" ");
+  const letters = words.join("");
+
+  let code = words.length === 2 ? words[0][0] + words[1][0] : letters.slice(0, 2);
+  for (let i = 1; i < letters.length - 1 && restricted.includes(code); i++) {
+    code = letters[0] + letters[i].toUpperCase();
+  }
+  return code;
+}
+
+// conjunct array: [A,B,C] => "A, B and C"
+function list(array) {
+  if (!Intl.ListFormat) return array.join(", ");
+  const conjunction = new Intl.ListFormat(window.lang || "en", {style: "long", type: "conjunction"});
+  return conjunction.format(array);
+}
--- a/utils/stringUtils.js
+++ b/utils/stringUtils.js
@ -13,64 +13,6 @@ function capitalize(string) {
  return string.charAt(0).toUpperCase() + string.slice(1);
 }

-// check if char is vowel or can serve as vowel
-function vowel(c) {
-  return `aeiouyɑ'əøɛœæɶɒɨɪɔɐʊɤɯаоиеёэыуюяàèìòùỳẁȁȅȉȍȕáéíóúýẃőűâêîôûŷŵäëïöüÿẅãẽĩõũỹąęįǫųāēīōūȳăĕĭŏŭǎěǐǒǔȧėȯẏẇạẹịọụỵẉḛḭṵṳ`.includes(c);
-}
-
-// remove vowels from the end of the string
-function trimVowels(string) {
-  while (string.length > 3 && vowel(last(string))) {
-    string = string.slice(0, -1);
-  }
-  return string;
-}
-
-// get adjective form from noun
-function getAdjective(string) {
-  // special cases for some suffixes
-  if (string.length > 8 && string.slice(-6) === "orszag") return string.slice(0, -6);
-  if (string.length > 6 && string.slice(-4) === "stan") return string.slice(0, -4);
-  if (P(0.5) && string.slice(-4) === "land") return string + "ic";
-  if (string.slice(-4) === " Guo") string = string.slice(0, -4);
-
-  // don't change is name ends on suffix
-  if (string.slice(-2) === "an") return string;
-  if (string.slice(-3) === "ese") return string;
-  if (string.slice(-1) === "i") return string;
-
-  const end = string.slice(-1); // last letter of string
-  if (end === "a") return (string += "n");
-  if (end === "o") return (string = trimVowels(string) + "an");
-  if (vowel(end) || end === "c") return (string += "an"); // ceiuy
-  if (end === "m" || end === "n") return (string += "ese");
-  if (end === "q") return (string += "i");
-  return trimVowels(string) + "ian";
-}
-
-// get ordinal out of integer: 1 => 1st
-const nth = n => n + (["st", "nd", "rd"][((((n + 90) % 100) - 10) % 10) - 1] || "th");
-
-// get two-letters code (abbreviation) from string
-function abbreviate(name, restricted = []) {
-  const parsed = name.replace("Old ", "O ").replace(/[()]/g, ""); // remove Old prefix and parentheses
-  const words = parsed.split(" ");
-  const letters = words.join("");
-
-  let code = words.length === 2 ? words[0][0] + words[1][0] : letters.slice(0, 2);
-  for (let i = 1; i < letters.length - 1 && restricted.includes(code); i++) {
-    code = letters[0] + letters[i].toUpperCase();
-  }
-  return code;
-}
-
-// conjunct array: [A,B,C] => "A, B and C"
-function list(array) {
-  if (!Intl.ListFormat) return array.join(", ");
-  const conjunction = new Intl.ListFormat(window.lang || "en", {style: "long", type: "conjunction"});
-  return conjunction.format(array);
-}
-
 // split string into 2 almost equal parts not breaking words
 function splitInTwo(str) {
  const half = str.length / 2;
--- a/versioning.js
+++ b/versioning.js
@ -1,7 +1,7 @@
 "use strict";
 // version and caching control

-const version = "1.84.11"; // generator version, update each time
+const version = "1.84.12"; // generator version, update each time

 {
  document.title += " v" + version;