基于Levenshtein距离算法的编辑距离算法

deltamaster posted @ 14 年前 in 人工智能 with tags 人工智能 模糊搜索 模式识别 编辑距离 , 2876 阅读

 

getDistance
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
public static final int getDistance(String word, String similar) {
 
  //get the weights for each possible operation
  final int costOfDeletingSourceCharacter = config.getInteger(Configuration.COST_REMOVE_CHAR);
  final int costOfInsertingSourceCharacter = config.getInteger(Configuration.COST_INSERT_CHAR);
  final int costOfSubstitutingLetters = config.getInteger(Configuration.COST_SUBST_CHARS);
  final int costOfSwappingLetters = config.getInteger(Configuration.COST_SWAP_CHARS);
  final int costOfChangingCase = config.getInteger(Configuration.COST_CHANGE_CASE);
 
  int a_size = word.length() + 1;
  int b_size = similar.length() + 1;
  int[][] matrix = new int[a_size][b_size];
  matrix[0][0] = 0;
 
  for (int i = 1; i != a_size; ++i)
    matrix[i][0] = matrix[i - 1][0] + costOfInsertingSourceCharacter; //initialize the first column
 
  for (int j = 1; j != b_size; ++j)
    matrix[0][j] = matrix[0][j - 1] + costOfDeletingSourceCharacter; //initalize the first row
 
  word = " " + word;
  similar = " " + similar;
 
  for (int i = 1; i != a_size; ++i) {
    char sourceChar = word.charAt(i);
    for (int j = 1; j != b_size; ++j) {
 
      char otherChar = similar.charAt(j);
      if (sourceChar == otherChar) {
        matrix[i][j] = matrix[i - 1][j - 1]; //no change required, so just carry the current cost up
        continue;
      }
 
      int costOfSubst = costOfSubstitutingLetters + matrix[i - 1][j - 1];
      //if needed, add up the cost of doing a swap
      int costOfSwap = Integer.MAX_VALUE;
      boolean isSwap = (i != 1) && (j != 1) && sourceChar == similar.charAt(j - 1) && word.charAt(i - 1) == otherChar;
      if (isSwap)
        costOfSwap = costOfSwappingLetters + matrix[i - 2][j - 2];
 
      int costOfDelete = costOfDeletingSourceCharacter + matrix[i][j - 1];
      int costOfInsertion = costOfInsertingSourceCharacter + matrix[i - 1][j];
 
      int costOfCaseChange = Integer.MAX_VALUE;
      String strSrcChar = "" + sourceChar;
      String strOtherChar = "" + otherChar;
 
      if (strSrcChar.compareToIgnoreCase(strOtherChar) == 0)
        costOfCaseChange = costOfChangingCase + matrix[i - 1][j - 1];
 
      matrix[i][j] = minimum(costOfSubst, costOfSwap, costOfDelete, costOfInsertion, costOfCaseChange);
    }
  }
  int cost = matrix[a_size - 1][b_size - 1];
 
  if (false)
    System.out.println(dumpMatrix(word, similar, matrix));
 
  return cost;
}
* 本文在CC BY-SA(署名-相同方式共享)协议下发布。
net worth 说:
大约 1 年前

Billie Eilish - the youngest person and second person ever to win the four main Grammy categories, was born in December 18, 2001, find out more about her net worth on idol net worth


登录 *


loading captcha image...
(输入验证码)
or Ctrl+Enter