基于Levenshtein距离算法的编辑距离算法

 

getDistance
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
public static final int getDistance(String word, String similar) {
 
  //get the weights for each possible operation
  final int costOfDeletingSourceCharacter = config.getInteger(Configuration.COST_REMOVE_CHAR);
  final int costOfInsertingSourceCharacter = config.getInteger(Configuration.COST_INSERT_CHAR);
  final int costOfSubstitutingLetters = config.getInteger(Configuration.COST_SUBST_CHARS);
  final int costOfSwappingLetters = config.getInteger(Configuration.COST_SWAP_CHARS);
  final int costOfChangingCase = config.getInteger(Configuration.COST_CHANGE_CASE);
 
  int a_size = word.length() + 1;
  int b_size = similar.length() + 1;
  int[][] matrix = new int[a_size][b_size];
  matrix[0][0] = 0;
 
  for (int i = 1; i != a_size; ++i)
    matrix[i][0] = matrix[i - 1][0] + costOfInsertingSourceCharacter; //initialize the first column
 
  for (int j = 1; j != b_size; ++j)
    matrix[0][j] = matrix[0][j - 1] + costOfDeletingSourceCharacter; //initalize the first row
 
  word = " " + word;
  similar = " " + similar;
 
  for (int i = 1; i != a_size; ++i) {
    char sourceChar = word.charAt(i);
    for (int j = 1; j != b_size; ++j) {
 
      char otherChar = similar.charAt(j);
      if (sourceChar == otherChar) {
        matrix[i][j] = matrix[i - 1][j - 1]; //no change required, so just carry the current cost up
        continue;
      }
 
      int costOfSubst = costOfSubstitutingLetters + matrix[i - 1][j - 1];
      //if needed, add up the cost of doing a swap
      int costOfSwap = Integer.MAX_VALUE;
      boolean isSwap = (i != 1) && (j != 1) && sourceChar == similar.charAt(j - 1) && word.charAt(i - 1) == otherChar;
      if (isSwap)
        costOfSwap = costOfSwappingLetters + matrix[i - 2][j - 2];
 
      int costOfDelete = costOfDeletingSourceCharacter + matrix[i][j - 1];
      int costOfInsertion = costOfInsertingSourceCharacter + matrix[i - 1][j];
 
      int costOfCaseChange = Integer.MAX_VALUE;
      String strSrcChar = "" + sourceChar;
      String strOtherChar = "" + otherChar;
 
      if (strSrcChar.compareToIgnoreCase(strOtherChar) == 0)
        costOfCaseChange = costOfChangingCase + matrix[i - 1][j - 1];
 
      matrix[i][j] = minimum(costOfSubst, costOfSwap, costOfDelete, costOfInsertion, costOfCaseChange);
    }
  }
  int cost = matrix[a_size - 1][b_size - 1];
 
  if (false)
    System.out.println(dumpMatrix(word, similar, matrix));
 
  return cost;
}