基于Levenshtein距离算法的编辑距离算法
public static final int getDistance(String word, String similar) { //get the weights for each possible operation final int costOfDeletingSourceCharacter = config.getInteger(Configuration.COST_REMOVE_CHAR); final int costOfInsertingSourceCharacter = config.getInteger(Configuration.COST_INSERT_CHAR); final int costOfSubstitutingLetters = config.getInteger(Configuration.COST_SUBST_CHARS); final int costOfSwappingLetters = config.getInteger(Configuration.COST_SWAP_CHARS); final int costOfChangingCase = config.getInteger(Configuration.COST_CHANGE_CASE); int a_size = word.length() + 1; int b_size = similar.length() + 1; int[][] matrix = new int[a_size][b_size]; matrix[0][0] = 0; for (int i = 1; i != a_size; ++i) matrix[i][0] = matrix[i - 1][0] + costOfInsertingSourceCharacter; //initialize the first column for (int j = 1; j != b_size; ++j) matrix[0][j] = matrix[0][j - 1] + costOfDeletingSourceCharacter; //initalize the first row word = " " + word; similar = " " + similar; for (int i = 1; i != a_size; ++i) { char sourceChar = word.charAt(i); for (int j = 1; j != b_size; ++j) { char otherChar = similar.charAt(j); if (sourceChar == otherChar) { matrix[i][j] = matrix[i - 1][j - 1]; //no change required, so just carry the current cost up continue; } int costOfSubst = costOfSubstitutingLetters + matrix[i - 1][j - 1]; //if needed, add up the cost of doing a swap int costOfSwap = Integer.MAX_VALUE; boolean isSwap = (i != 1) && (j != 1) && sourceChar == similar.charAt(j - 1) && word.charAt(i - 1) == otherChar; if (isSwap) costOfSwap = costOfSwappingLetters + matrix[i - 2][j - 2]; int costOfDelete = costOfDeletingSourceCharacter + matrix[i][j - 1]; int costOfInsertion = costOfInsertingSourceCharacter + matrix[i - 1][j]; int costOfCaseChange = Integer.MAX_VALUE; String strSrcChar = "" + sourceChar; String strOtherChar = "" + otherChar; if (strSrcChar.compareToIgnoreCase(strOtherChar) == 0) costOfCaseChange = costOfChangingCase + matrix[i - 1][j - 1]; matrix[i][j] = minimum(costOfSubst, costOfSwap, costOfDelete, costOfInsertion, costOfCaseChange); } } int cost = matrix[a_size - 1][b_size - 1]; if (false) System.out.println(dumpMatrix(word, similar, matrix)); return cost; }