Показать сообщение отдельно
  #12 (permalink)  
Старый 16.03.2019, 00:06
Аватар для Malleys
Профессор
Отправить личное сообщение для Malleys Посмотреть профиль Найти все сообщения от Malleys
 
Регистрация: 20.12.2009
Сообщений: 1,714

<!DOCTYPE html>
<html lang="en">
<head>
	<meta charset="UTF-8">
	<style>

#inputField {
	font-size: 200%;
	width: 100%;
	box-sizing: border-box;
}
		
#patternsField > * {
	border: 1px solid #eee;
	border-radius: .4em;
	margin: 1em 0;
	padding: 1em;
	font: bold 1em sans-serif;
}

#patternsField > .matches {
	background-color: #4CAF50;
	color: white;
}

	</style>
</head>
<body>
	<input id="inputField" value="Если ли в нолечие лапата?">
	<div id="patternsField"></div>
	<script>

function Soundex(expression) {
	if(this instanceof Soundex === false)
		return new Soundex(expression);

	this.expression = expression;
	this.calculate(this.expression);
}

Soundex.prototype = {
	__proto__: Array.prototype,
	_codes: {
		"a":{
			"0":[0,-1,-1],
			"i":[[0,1,-1]],
			"j":[[0,1,-1]],
			"y":[[0,1,-1]],
			"u":[[0,7,-1]]
		},
		"b":[[7,7,7]],
		"c":{
			"0":[5,5,5],
			"1":[4,4,4],
			"z":{"0":[4,4,4],"s":[[4,4,4]]},
			"s":{"0":[4,4,4],"z":[[4,4,4]]},
			"k":[[5,5,5],[45,45,45]],
			"h":{"0":[5,5,5],"1":[4,4,4],"s":[[5,54,54]]}
		},
		"d":{
			"0":[3,3,3],
			"t":[[3,3,3]],
			"z":{"0":[4,4,4],"h":[[4,4,4]],"s":[[4,4,4]]},
			"s":{"0":[4,4,4],"h":[[4,4,4]],"z":[[4,4,4]]},
			"r":{"s":[[4,4,4]],"z":[[4,4,4]]}
		},
		"e":{
			"0":[0,-1,-1],
			"i":[[0,1,-1]],
			"j":[[0,1,-1]],
			"y":[[0,1,-1]],
			"u":[[1,1,-1]],
			"w":[[1,1,-1]]
		},
		"f":{"0":[7,7,7],"b":[[7,7,7]]},
		"g":[[5,5,5]],
		"h":[[5,5,-1]],
		"i":{
			"0":[0,-1,-1],
			"a":[[1,-1,-1]],
			"e":[[1,-1,-1]],
			"o":[[1,-1,-1]],
			"u":[[1,-1,-1]]
		},
		"j":[[4,4,4]],
		"k":{"0":[5,5,5],"h":[[5,5,5]],"s":[[5,54,54]]},
		"l":[[8,8,8]],
		"m":{"0":[6,6,6],"n":[[66,66,66]]},
		"n":{"0":[6,6,6],"m":[[66,66,66]]},
		"o":{"0":[0,-1,-1],"i":[[0,1,-1]],"j":[[0,1,-1]],"y":[[0,1,-1]]},
		"p":{"0":[7,7,7],"f":[[7,7,7]],"h":[[7,7,7]]},
		"q":[[5,5,5]],
		"r":{
			"0":[9,9,9],
			"d": {"0":[93,93,93],"c":[[95,95,95]]},
			"t": {"0":[93,93,93],"c":[[95,95,95]]},
			"z":[[94,94,94],[94,94,94]],
			"s":[[94,94,94],[94,94,94]]
		},
		"s":{
			"0":[4,4,4],
			"l":{"0":[2,43,43]},
			"z":{
				"0":[4,4,4],
				"t":[[2,43,43]],
				"c":{"z":[[2,4,4]],"s":[[2,4,4]]},
				"d":[[2,43,43]]
			},
			"d":[[2,43,43]],
			"t":{
				"0":[2,43,43],
				"r":{"z":[[2,4,4]],"s":[[2,4,4]]},
				"n":[[26,46,46]],
				"c":{"h":[[2,4,4]]},
				"s":{"h":[[2,4,4]],"c":{"h":[[2,4,4]]}}
			},
			"c":{
				"0":[2,4,4],
				"h":{
					"0":[4,4,4],
					"t":{
						"0":[2,43,43],
						"s":{"c":{"h":[[2,4,4]]},"h":[[2,4,4]]},
						"c":{"h":[[2,4,4]]}
					},
					"d":[[2,43,43]]
				}
			},
			"h":{
				"0":[4,4,4],
				"t":{"0":[2,43,43],"c":{"h":[[2,4,4]]},"s":{"h":[[2,4,4]]}},
				"c":{"h":[[2,4,4]]},
				"d":[[2,43,43]]
			}
		},
		"t":{
			"0":[3,3,3],
			"c":{"0":[4,4,4],"h":[[4,4,4]]},
			"z":{"0":[4,4,4],"s":[[4,4,4]]},
			"s":{"0":[4,4,4],"z":[[4,4,4]],"h":[[4,4,4]],"c":{"h":[[4,4,4]]}},
			"t":{
				"s":{"0":[4,4,4],"z":[[4,4,4]],"c":{"h":[[4,4,4]]}},
				"c":{"h":[[4,4,4]]},
				"z":[[4,4,4]]
			},
			"h":[[3,3,3]],
			"r":{"z":[[4,4,4]],"s":[[4,4,4]]}
		},
		"u":{
			"0":[0,-1,-1],
			"e":[[0,-1,-1]],
			"i":[[0,1,-1]],
			"j":[[0,1,-1]],
			"y":[[0,1,-1]]
		},
		"v":[[7,7,7]],
		"w":[[7,7,7]],
		"x":[[5,54,54]],
		"y":[[1,-1,-1]],
		"z":{
			"0":[4,4,4],
			"d":{"0":[2,43,43],"z":{"0":[2,4,4],"h":[[2,4,4]]}},
			"h":{"0":[4,4,4],"d":{"0":[2,43,43],"z":{"h":[[2,4,4]]}}},
			"s":{"0":[4,4,4],"h":[[4,4,4]],"c":{"h":[[4,4,4]]}}
		}
	},
	_map: {
		а: "a", б: "b", в: "v", г: "g", д: "d", е: "e", ё: "e", ж: "zh", з: "z",
		и: "i", й: "i", к: "k", л: "l", м: "m", н: "n", о: "o", п: "p", р: "r",
		с: "s", т: "t", у: "u", ф: "f", х: "h", ц: "c", ч: "ch", ш: "sh", щ: "sch",
		ь: "'", ы: "y", ъ: "'", э: "e", ю: "yu", я: "ya"
	},
	_accented: {
		a: /[AaªÀ-Åà-åĀ-ąǍǎȀ-ȃȦȧᴬᵃḀḁẚẠ-ảₐ⒜ⒶⓐAa]/ig,
		b: /[BbᴮᵇḂ-ḇℬ⒝ⒷⓑBb]/ig,
		c: /[CcÇçĆ-čᶜ℀ℂ℃ℭⅭⅽ⒞Ⓒⓒ㏄-㏇Cc]/ig,
		d: /[DdĎďDŽ-džDZ-dzᴰᵈḊ-ḓⅅⅆⅮⅾ⒟ⒹⓓDd]/ig,
		e: /[EeÈ-Ëè-ëĒ-ěȄ-ȇȨȩᴱᵉḘ-ḛẸ-ẽₑℯℰⅇ⒠ⒺⓔEe]/ig,
		f: /[FfᶠḞḟ℉ℱ℻⒡Ⓕⓕff-fflFf]/ig,
		g: /[GgĜ-ģǦǧǴǵᴳᵍḠḡℊ⒢ⒼⓖGg]/ig,
		h: /[HhĤĥȞȟʰᴴḢ-ḫẖℋ-ℎ⒣ⒽⓗHh]/ig,
		i: /[IiÌ-Ïì-ïĨ-İIJijǏǐȈ-ȋᴵᵢḬḭỈ-ịⁱℐℑℹⅈ⒤ⒾⓘIi]/ig,
		j: /[JjIJ-ĵLJ-njǰʲᴶⅉ⒥ⒿⓙⱼJj]/ig,
		k: /[KkĶķǨǩᴷᵏḰ-ḵK⒦ⓀⓚKk]/ig,
		l: /[LlĹ-ŀLJ-ljˡᴸḶḷḺ-ḽℒℓⅬⅼ⒧ⓁⓛLl]/ig,
		m: /[MmᴹᵐḾ-ṃ℠™ℳⅯⅿ⒨ⓂⓜMm]/ig,
		n: /[NnÑñŃ-ʼnNJ-njǸǹᴺṄ-ṋⁿℕ№⒩ⓃⓝNn]/ig,
		o: /[OoºÒ-Öò-öŌ-őƠơǑǒǪǫȌ-ȏȮȯᴼᵒỌ-ỏₒℴ⒪ⓄⓞOo]/ig,
		p: /[PpᴾᵖṔ-ṗℙ⒫Ⓟⓟ㎀㎊㎩Pp]/ig,
		q: /[Qqℚ⒬Ⓠⓠ㏃Qq]/ig,
		r: /[RrŔ-řȐ-ȓʳᴿᵣṘ-ṛṞṟ₨ℛ-ℝ⒭ⓇⓡRr]/ig,
		s: /[SsŚ-šſȘșˢṠ-ṣ₨℁℠⒮ⓈⓢstSs]/ig,
		t: /[TtŢ-ťȚțᵀᵗṪ-ṱẗ℡™⒯Ⓣⓣ㏏ſtstTt]/ig,
		u: /[UuÙ-Üù-üŨ-ųƯưǓǔȔ-ȗᵁᵘᵤṲ-ṷỤ-ủ℆⒰Ⓤⓤ㍳㍺Uu]/ig,
		v: /[VvᵛᵥṼ-ṿ⒱ⓋⓥⱽVv]/ig,
		w: /[WwŴŵʷᵂẀ-ẉẘ⒲ⓌⓦWw]/ig,
		x: /[XxˣẊ-ẍₓ⒳Ⓧⓧ㏓Xx]/ig,
		y: /[YyÝýÿŶ-ŸȲȳʸẎẏẙỲ-ỹ⒴ⓎⓨYy]/ig,
		z: /[ZzŹ-žDZ-dzᶻẐ-ẕℤℨ⒵ⓏⓩZz]/ig
	},
	_cache: {},
	word(str, isCyrillic = true) {
		var length = str.length,
			result = "",
			i = 0,
			previousCode = -1,
			maxLength = 6,
			code;

		while(i < length) {
			var current = last = this._codes[str[i]];

			for(var j = k = 1; k <= maxLength; k++) {
				if(!str[i + k] || !current[str[i + k]]) break;

				current = current[str[i + k]];

				if(current[0]) {
					last = current;
					j = k + 1;
				}
			}

			if(i === 0)
				code = last[0][0];
			else
				code = (isCyrillic || "1" in last === false ? last[0] : last[1])[
					str[i + j] && this._codes[str[i + j]][0][0] === 0 ? 1 : 2
				];

			if(code !== -1 && code !== previousCode) result += code;

			previousCode = code;
			i += j;
		}

		while(result && result.length < 3)
			result += "0";
	 
		return result;
	},

	calculate(str) {
		str = str.trim().toLowerCase();
		var cyrillicMatch = str.match(/[ёа-я]/g);

		if(cyrillicMatch)
			str = this.transliterate(str, cyrillicMatch);

		for(var letter in this._accented)
			str = str.replace(this._accented[letter], letter);

		str = str
			.replace(/[^\sa-z]/g, "")
			.replace(/\s{2,}/g, " ");

		if(!str) return [];

		str = str.split(" ").map(word => {
			if(word in this._cache === false)
				this._cache[word] = this.word(word, cyrillicMatch !== null);

			return this._cache[word];
		});

		Object.assign(this, str);
		this.length = str.length;
	},

	transliterate(str, matches) {
		for(const match of matches)
			str = str.replace(match, this._map[match]);

		return str;
	},

	toString() {
		return this.join(" ");
	}
};

function Rx(template, ...substitutions) {
	if(this instanceof Rx === false) return new Rx(template, ...substitutions);

	this.raw = template[0] + substitutions.map((v, i) => `(${v.source})${template[i + 1]}`).join("");
	
	this.pattern = (template.slice(1).map((part, index) => `(${substitutions[index].source}) ${Soundex(part)}`));
	this.pattern.unshift(Soundex(template[0]));
	this.pattern = new RegExp(this.pattern.join(" ").trim());
}

Rx.prototype = {
	test(expression) {
		return this.pattern.test(Soundex(expression));
	},

	match(expression) {
		return (this.pattern.exec(Soundex(expression)) || []).slice(1);
	}
};

// пример
const patterns = [
	Rx`Есть ли в наличии ${/[\s\S]+/}?`,
	Rx`Есть ли ${/[\s\S]+/} в наличии?`,
];

inputField.addEventListener("input", () => {
	patternsField.innerHTML = patterns.map(pattern => {
		const ok = pattern.test(inputField.value);
		return `<div class="${ok ? "matches" : ""}">${pattern.raw}</div>`;
	}).join("");
});

inputField.dispatchEvent(new Event("input"));
	
	</script>
</body>
</html>


[1]: Paramonov V., Shigarov A., Ruzhnikov G., Belykh P. Polyphon: An Algorithm for Phonetic String Matching in Russian Language // Communications in Computer and Information Science. Springer. 2016. Vol. 639, pp. 568-579.

Последний раз редактировалось Malleys, 16.03.2019 в 00:14. Причина: Добавил пример
Ответить с цитированием