基于Levenshtein距离算法的编辑距离算法

deltamaster posted @ Sep 16, 2011 03:31:15 PM in 人工智能 with tags 人工智能 模糊搜索 模式识别 编辑距离 , 2800 阅读

 

  public static final int getDistance(String word, String similar) {

    //get the weights for each possible operation
    final int costOfDeletingSourceCharacter = config.getInteger(Configuration.COST_REMOVE_CHAR);
    final int costOfInsertingSourceCharacter = config.getInteger(Configuration.COST_INSERT_CHAR);
    final int costOfSubstitutingLetters = config.getInteger(Configuration.COST_SUBST_CHARS);
    final int costOfSwappingLetters = config.getInteger(Configuration.COST_SWAP_CHARS);
    final int costOfChangingCase = config.getInteger(Configuration.COST_CHANGE_CASE);

    int a_size = word.length() + 1;
    int b_size = similar.length() + 1;
    int[][] matrix = new int[a_size][b_size];
    matrix[0][0] = 0;

    for (int i = 1; i != a_size; ++i)
      matrix[i][0] = matrix[i - 1][0] + costOfInsertingSourceCharacter; //initialize the first column

    for (int j = 1; j != b_size; ++j)
      matrix[0][j] = matrix[0][j - 1] + costOfDeletingSourceCharacter; //initalize the first row

    word = " " + word;
    similar = " " + similar;

    for (int i = 1; i != a_size; ++i) {
      char sourceChar = word.charAt(i);
      for (int j = 1; j != b_size; ++j) {

        char otherChar = similar.charAt(j);
        if (sourceChar == otherChar) {
          matrix[i][j] = matrix[i - 1][j - 1]; //no change required, so just carry the current cost up
          continue;
        }

        int costOfSubst = costOfSubstitutingLetters + matrix[i - 1][j - 1];
        //if needed, add up the cost of doing a swap
        int costOfSwap = Integer.MAX_VALUE;
        boolean isSwap = (i != 1) && (j != 1) && sourceChar == similar.charAt(j - 1) && word.charAt(i - 1) == otherChar;
        if (isSwap)
          costOfSwap = costOfSwappingLetters + matrix[i - 2][j - 2];

        int costOfDelete = costOfDeletingSourceCharacter + matrix[i][j - 1];
        int costOfInsertion = costOfInsertingSourceCharacter + matrix[i - 1][j];

        int costOfCaseChange = Integer.MAX_VALUE;
        String strSrcChar = "" + sourceChar;
        String strOtherChar = "" + otherChar;

        if (strSrcChar.compareToIgnoreCase(strOtherChar) == 0)
          costOfCaseChange = costOfChangingCase + matrix[i - 1][j - 1];

        matrix[i][j] = minimum(costOfSubst, costOfSwap, costOfDelete, costOfInsertion, costOfCaseChange);
      }
    }
    int cost = matrix[a_size - 1][b_size - 1];

    if (false)
      System.out.println(dumpMatrix(word, similar, matrix));

    return cost;
  }
* 本文在CC BY-SA(署名-相同方式共享)协议下发布。
net worth 说:
Dec 08, 2023 04:07:30 PM

Billie Eilish - the youngest person and second person ever to win the four main Grammy categories, was born in December 18, 2001, find out more about her net worth on idol net worth


登录 *


loading captcha image...
(输入验证码)
or Ctrl+Enter