135 lines
5.2 KiB
JavaScript
Executable File
135 lines
5.2 KiB
JavaScript
Executable File
const DE_STEMS = [
|
|
'mit','und','oder','für','vom','zum','zur','gebraten','kartoffel','gemüse','suppe',
|
|
'kuchen','schwein','rind','hähnchen','huhn','fisch','nudel','soße','sosse','wurst',
|
|
'kürbis','braten','sahne','apfel','käse','fleisch','pilz','kirsch','joghurt','spätzle',
|
|
'knödel','kraut','schnitzel','püree','rahm','erdbeer','schoko','vanille','tomate',
|
|
'gurke','salat','zwiebel','paprika','reis','bohne','erbse','karotte','möhre','lauch',
|
|
'knoblauch','chili','gewürz','kräuter','pfeffer','salz','butter','milch','eier',
|
|
'pfanne','auflauf','gratin','ragout','gulasch','eintopf','filet','steak','brust',
|
|
'salami','schinken','speck','brokkoli','blumenkohl','zucchini','aubergine',
|
|
'spinat','spargel','olive','mandel','nuss','honig','senf','essig','öl','brot',
|
|
'brötchen','pfannkuchen','eis','torte','dessert','kompott','obst','frucht','beere',
|
|
'plunder', 'dip'
|
|
];
|
|
const EN_STEMS = [
|
|
'with','and','or','for','from','to','fried','potato','vegetable','soup','cake',
|
|
'pork','beef','chicken','fish','noodle','sauce','sausage','pumpkin','roast',
|
|
'cream','apple','cheese','meat','mushroom','cherry','yogurt','wedge','sweet',
|
|
'sour','dumpling','cabbage','mash','strawberr','choco','vanilla','tomat','cucumber',
|
|
'salad','onion','pepper','rice','bean','pea','carrot','leek','garlic','chili',
|
|
'spice','herb','salt','butter','milk','egg','pan','casserole','gratin','ragout',
|
|
'goulash','stew','filet','steak','breast','salami','ham','bacon','broccoli',
|
|
'cauliflower','zucchini','eggplant','spinach','asparagus','olive','almond','nut',
|
|
'honey','mustard','vinegar','oil','bread','bun','pancake','ice','tart','dessert',
|
|
'compote','fruit','berry', 'dip', 'danish'
|
|
];
|
|
|
|
function splitLanguage(text) {
|
|
if (!text) return { de: '', en: '', raw: '' };
|
|
|
|
const raw = text;
|
|
const formattedRaw = '• ' + text.replace(/\(([A-Z ]+)\)\s*(?=\S)/g, '($1)\n• ');
|
|
|
|
function scoreBlock(wordArray) {
|
|
let de = 0, en = 0;
|
|
wordArray.forEach(word => {
|
|
const w = word.toLowerCase().replace(/[^a-zäöüß]/g, '');
|
|
if (w) {
|
|
DE_STEMS.forEach(s => { if (w.includes(s)) de += w.length / s.length; });
|
|
EN_STEMS.forEach(s => { if (w.includes(s)) en += w.length / s.length; });
|
|
}
|
|
});
|
|
return { de, en };
|
|
}
|
|
|
|
function heuristicSplitEnDe(fragment) {
|
|
const words = fragment.trim().split(/\s+/);
|
|
if (words.length < 2) return { enPart: fragment, nextDe: '' };
|
|
|
|
let bestK = -1;
|
|
let maxScore = -9999;
|
|
|
|
for (let k = 1; k < words.length; k++) {
|
|
const left = words.slice(0, k);
|
|
const right = words.slice(k);
|
|
|
|
const leftScore = scoreBlock(left);
|
|
const rightScore = scoreBlock(right);
|
|
|
|
// Left side is EN, right side is DE. We want EN words on left, DE words on right.
|
|
const score = (leftScore.en - leftScore.de) + (rightScore.de - rightScore.en);
|
|
|
|
if (score > maxScore) {
|
|
maxScore = score;
|
|
bestK = k;
|
|
}
|
|
}
|
|
|
|
if (bestK !== -1 && maxScore > 0) {
|
|
return {
|
|
enPart: words.slice(0, bestK).join(' '),
|
|
nextDe: words.slice(bestK).join(' ')
|
|
};
|
|
}
|
|
return { enPart: fragment, nextDe: '' };
|
|
}
|
|
|
|
if (!text.includes(' / ')) {
|
|
const words = text.toLowerCase().split(/\s+/);
|
|
const score = scoreBlock(words);
|
|
if (score.en > score.de) {
|
|
return { de: '', en: formattedRaw, raw: formattedRaw };
|
|
}
|
|
return { de: formattedRaw, en: '', raw: formattedRaw };
|
|
}
|
|
|
|
const parts = text.split(' / ');
|
|
if (parts.length > 4) {
|
|
return { de: formattedRaw, en: '', raw: formattedRaw };
|
|
}
|
|
|
|
const deParts = [];
|
|
const enParts = [];
|
|
|
|
deParts.push(parts[0].trim());
|
|
|
|
const allergenRegex = /\(([A-Z ]+)\)\s*/;
|
|
|
|
for (let i = 1; i < parts.length; i++) {
|
|
const fragment = parts[i].trim();
|
|
const match = fragment.match(allergenRegex);
|
|
|
|
if (match) {
|
|
const allergenEnd = match.index + match[0].length;
|
|
const enPart = fragment.substring(0, match.index).trim();
|
|
const allergenCode = match[1];
|
|
const nextDe = fragment.substring(allergenEnd).trim();
|
|
|
|
enParts.push(enPart + '(' + allergenCode + ')');
|
|
if (deParts.length > 0) {
|
|
deParts[deParts.length - 1] = deParts[deParts.length - 1] + '(' + allergenCode + ')';
|
|
}
|
|
|
|
if (nextDe) {
|
|
deParts.push(nextDe);
|
|
}
|
|
} else {
|
|
console.log("No allergen match in:", fragment);
|
|
const split = heuristicSplitEnDe(fragment);
|
|
console.log("Split:", split);
|
|
enParts.push(split.enPart);
|
|
if (split.nextDe) {
|
|
deParts.push(split.nextDe);
|
|
}
|
|
}
|
|
}
|
|
|
|
return {
|
|
de: deParts.map(p => '• ' + p).join('\n'),
|
|
en: enParts.map(p => '• ' + p).join('\n'),
|
|
raw: formattedRaw
|
|
};
|
|
}
|
|
|
|
console.log(splitLanguage("Hühnersuppe mit Reis / Chicken soup with rice Jambalaya mit Tofu und Joghurtdip / Jambalaya with tofu and yogurt dip Mini Plunder / Mini danishes(ACGHO)"));
|