给定2个字符串,如何计算变化(插入、删除、替换)?【levenshtein distance 算法】
生活随笔
收集整理的這篇文章主要介紹了
给定2个字符串,如何计算变化(插入、删除、替换)?【levenshtein distance 算法】
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
給定2個字符,計算字符串發生了那些變化(插入、刪除、替換)
import { insert, remove, update } from "ramda"; // 計算差異度 export function levenshteinDistance<T = any>(a: T[],b: T[],compose: (a: T, b: T) => boolean = (a, b) => a === b ): number {return levenshteinDistanceMatrix(a, b, compose)[b.length][a.length]; } // 計算差異矩陣,詳見算法levenshtein distance export function levenshteinDistanceMatrix<T = any>(a: T[],b: T[],compose: (a: T, b: T) => boolean = (a, b) => a === b ): number[][] {const distanceMatrix = Array(b.length + 1).fill(null).map(() => Array(a.length + 1).fill(null));for (let i = 0; i <= a.length; i += 1) {distanceMatrix[0][i] = i;}for (let j = 0; j <= b.length; j += 1) {distanceMatrix[j][0] = j;}for (let j = 1; j <= b.length; j += 1) {for (let i = 1; i <= a.length; i += 1) {const indicator = compose(a[i - 1],b[j - 1])? 0: 1;const min = Math.min(distanceMatrix[j][i - 1] + 1,distanceMatrix[j - 1][i] + 1,distanceMatrix[j - 1][i - 1] + indicator);distanceMatrix[j][i] = min;}}return distanceMatrix; } // 字符串變化類型 export enum LevenshteinOperatorType {// 刪除deletion,// 插入insertion,// 替換substitution } // 刪除數據結構 export interface LevenshteinDeletion<T = any> {type: LevenshteinOperatorType.deletion;index: number;value: T; } // 插入數據結構 export interface LevenshteinInsertion<T = any> {type: LevenshteinOperatorType.insertion;index: number;value: T; } // 替換數據結構 export interface LevenshteinSubstitution<T = any> {type: LevenshteinOperatorType.substitution;index: number;value: {old: T;new: T;}; }export type LevenshteinOperator<T = any> =| LevenshteinDeletion<T>| LevenshteinInsertion<T>| LevenshteinSubstitution<T>; //是否刪除 export function isLevenshteinDeletion<T = any>(val: LevenshteinOperator<T> ): val is LevenshteinDeletion<T> {return val.type === LevenshteinOperatorType.deletion; } // 是否插入 export function isLevenshteinInsertion<T = any>(val: LevenshteinOperator<T> ): val is LevenshteinInsertion<T> {return val.type === LevenshteinOperatorType.insertion; } // 是否替換 export function isLevenshteinSubstitution<T = any>(val: LevenshteinOperator<T> ): val is LevenshteinSubstitution<T> {return val.type === LevenshteinOperatorType.substitution; } // 默認最大值 const max = 9999999999; export function levenshteinOperators<T = any>(a: T[],b: T[],compose: (a: T, b: T) => boolean = (a, b) => a === b ): LevenshteinOperator<T>[] {const res = levenshteinDistanceMatrix<T>(a, b, compose);// const dd = res.map(col => `|${col.join("|")}|`);// console.log(`${dd.join("\n")}`);// 求最小值坐標let i = b.length; // 行let j = a.length; //列let copy = a;let operators: LevenshteinOperator<T>[] = [];while (i > 0 || j > 0) {let deletion = max,insertion = max,substitution = max;const indicator = res[i][j];if (j > 0) {deletion = res[i][j - 1];}if (i > 0) {insertion = res[i - 1][j];}if (i > 0 && j > 0) {substitution = res[i - 1][j - 1];}const min = Math.min(deletion, insertion, substitution);if (min === insertion) {if (min !== indicator) {operators.push({type: LevenshteinOperatorType.insertion,value: b[i - 1],index: j - 1});}i -= 1;} else if (min === substitution) {if (min !== indicator) {operators.push({type: LevenshteinOperatorType.substitution,value: { old: a[j - 1], new: b[i - 1] },index: j - 1});}i -= 1;j -= 1;} else if (min === deletion) {if (min !== indicator) {operators.push({type: LevenshteinOperatorType.deletion,value: a[j - 1],index: j - 1});}j -= 1;}}return operators; } // 根據操作符,生成目標字符串 export function levenshteinTest<T>(a: T[],operators: LevenshteinOperator<T>[] ) {operators.map(opt => {if (isLevenshteinDeletion(opt)) {a = remove(opt.index, 1, a);}if (isLevenshteinInsertion(opt)) {a = insert(opt.index, opt.value, a);}if (isLevenshteinSubstitution(opt)) {a = update(opt.index, opt.value.new, a);}});return a; } 復制代碼import { expect } from "chai"; import {levenshteinOperators,isLevenshteinSubstitution } from "./levenshteinDistance";const a = `let item = 2;`; const b = `let item = 3;`; const ops = levenshteinOperators(a.split(""), b.split(""));describe("", () => {it("", () => {// 操作步驟長度為1expect(ops.length).equal(1);// 是替換操作符expect(isLevenshteinSubstitution(ops[0])).equal(true);// 將2替換為3expect((ops[0] as LevenshteinSubstitution).value.old).equal("2");expect((ops[0] as LevenshteinSubstitution).value.new).equal("3");}); }); 復制代碼例子
levenshteinDistance
| 1 | 【2】插入112 | 2 | 3 | 4 | 5 |
| 2 | 【2】插入, | 2 | 3 | 4 | 5 |
| 3 | 【3】插入223 | 3 | 3 | 4 | 5 |
| 4 | 【4】插入, | 4 | 4 | 4 | 5 |
| 5 | 【4】不變let | 5 | 5 | 5 | 5 |
| 6 | 5 | 【4】不變item | 5 | 6 | 6 |
| 7 | 6 | 5 | 【4】不變= | 5 | 6 |
| 8 | 7 | 6 | 5 | 【5】替換233->2 | 6 |
| 9 | 8 | 7 | 6 | 6 | 【5】不變; |
const old = "let item = 233;"; const newStr = "112,223,let item = 2;";
《新程序員》:云原生和全面數字化實踐50位技術專家共同創作,文字、視頻、音頻交互閱讀總結
以上是生活随笔為你收集整理的给定2个字符串,如何计算变化(插入、删除、替换)?【levenshtein distance 算法】的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: Linux组管理和权限管理
- 下一篇: python函数解释