基于Levenshtein距离算法的编辑距离算法
public static final int getDistance(String word, String similar) { //get the weights for each possible operation final int costOfDeletingSourceCharacter = config.getInteger(Configuration.COST_REMOVE_CHAR); final int costOfInsertingSourceCharacter = config.getInteger(Configuration.COST_INSERT_CHAR); final int costOfSubstitutingLetters = config.getInteger(Configuration.COST_SUBST_CHARS); final int costOfSwappingLetters = config.getInteger(Configuration.COST_SWAP_CHARS); final int costOfChangingCase = config.getInteger(Configuration.COST_CHANGE_CASE); int a_size = word.length() + 1; int b_size = similar.length() + 1; int[][] matrix = new int[a_size][b_size]; matrix[0][0] = 0; for (int i = 1; i != a_size; ++i) matrix[i][0] = matrix[i - 1][0] + costOfInsertingSourceCharacter; //initialize the first column for (int j = 1; j != b_size; ++j) matrix[0][j] = matrix[0][j - 1] + costOfDeletingSourceCharacter; //initalize the first row word = " " + word; similar = " " + similar; for (int i = 1; i != a_size; ++i) { char sourceChar = word.charAt(i); for (int j = 1; j != b_size; ++j) { char otherChar = similar.charAt(j); if (sourceChar == otherChar) { matrix[i][j] = matrix[i - 1][j - 1]; //no change required, so just carry the current cost up continue; } int costOfSubst = costOfSubstitutingLetters + matrix[i - 1][j - 1]; //if needed, add up the cost of doing a swap int costOfSwap = Integer.MAX_VALUE; boolean isSwap = (i != 1) && (j != 1) && sourceChar == similar.charAt(j - 1) && word.charAt(i - 1) == otherChar; if (isSwap) costOfSwap = costOfSwappingLetters + matrix[i - 2][j - 2]; int costOfDelete = costOfDeletingSourceCharacter + matrix[i][j - 1]; int costOfInsertion = costOfInsertingSourceCharacter + matrix[i - 1][j]; int costOfCaseChange = Integer.MAX_VALUE; String strSrcChar = "" + sourceChar; String strOtherChar = "" + otherChar; if (strSrcChar.compareToIgnoreCase(strOtherChar) == 0) costOfCaseChange = costOfChangingCase + matrix[i - 1][j - 1]; matrix[i][j] = minimum(costOfSubst, costOfSwap, costOfDelete, costOfInsertion, costOfCaseChange); } } int cost = matrix[a_size - 1][b_size - 1]; if (false) System.out.println(dumpMatrix(word, similar, matrix)); return cost; }
Dec 08, 2023 04:07:30 PM
Billie Eilish - the youngest person and second person ever to win the four main Grammy categories, was born in December 18, 2001, find out more about her net worth on idol net worth