基于Levenshtein距离算法的编辑距离算法
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 | public static final int getDistance(String word, String similar) { //get the weights for each possible operation final int costOfDeletingSourceCharacter = config.getInteger(Configuration.COST_REMOVE_CHAR); final int costOfInsertingSourceCharacter = config.getInteger(Configuration.COST_INSERT_CHAR); final int costOfSubstitutingLetters = config.getInteger(Configuration.COST_SUBST_CHARS); final int costOfSwappingLetters = config.getInteger(Configuration.COST_SWAP_CHARS); final int costOfChangingCase = config.getInteger(Configuration.COST_CHANGE_CASE); int a_size = word.length() + 1 ; int b_size = similar.length() + 1 ; int [][] matrix = new int [a_size][b_size]; matrix[ 0 ][ 0 ] = 0 ; for ( int i = 1 ; i != a_size; ++i) matrix[i][ 0 ] = matrix[i - 1 ][ 0 ] + costOfInsertingSourceCharacter; //initialize the first column for ( int j = 1 ; j != b_size; ++j) matrix[ 0 ][j] = matrix[ 0 ][j - 1 ] + costOfDeletingSourceCharacter; //initalize the first row word = " " + word; similar = " " + similar; for ( int i = 1 ; i != a_size; ++i) { char sourceChar = word.charAt(i); for ( int j = 1 ; j != b_size; ++j) { char otherChar = similar.charAt(j); if (sourceChar == otherChar) { matrix[i][j] = matrix[i - 1 ][j - 1 ]; //no change required, so just carry the current cost up continue ; } int costOfSubst = costOfSubstitutingLetters + matrix[i - 1 ][j - 1 ]; //if needed, add up the cost of doing a swap int costOfSwap = Integer.MAX_VALUE; boolean isSwap = (i != 1 ) && (j != 1 ) && sourceChar == similar.charAt(j - 1 ) && word.charAt(i - 1 ) == otherChar; if (isSwap) costOfSwap = costOfSwappingLetters + matrix[i - 2 ][j - 2 ]; int costOfDelete = costOfDeletingSourceCharacter + matrix[i][j - 1 ]; int costOfInsertion = costOfInsertingSourceCharacter + matrix[i - 1 ][j]; int costOfCaseChange = Integer.MAX_VALUE; String strSrcChar = "" + sourceChar; String strOtherChar = "" + otherChar; if (strSrcChar.compareToIgnoreCase(strOtherChar) == 0 ) costOfCaseChange = costOfChangingCase + matrix[i - 1 ][j - 1 ]; matrix[i][j] = minimum(costOfSubst, costOfSwap, costOfDelete, costOfInsertion, costOfCaseChange); } } int cost = matrix[a_size - 1 ][b_size - 1 ]; if ( false ) System.out.println(dumpMatrix(word, similar, matrix)); return cost; } |