- using System;
- using System.Text.RegularExpressions;
- using System.Threading.Tasks;
- namespace Levenshtein
- {
- /// <summary>
- /// 分析完成事件委托
- /// </summary>
- /// <param name="sim">相似度</param>
- public delegate void AnalyzerCompletedHander(double sim);
- /// <summary>
- /// 文章相似度工具
- /// </summary>
- public class LevenshteinDistance:IDisposable
- {
- private string str1;
- private string str2;
- private int[,] index;
- int k;
- Task<double> task;
- /// <summary>
- /// 分析完成事件
- /// </summary>
- public event AnalyzerCompletedHander AnalyzerCompleted;
- /// <summary>
- /// 获取或设置文章1
- /// </summary>
- public string Str1
- {
- get { return str1; }
- set
- {
- str1 = Format(value);
- index = new int[str1.Length, str2.Length];
- }
- }
- /// <summary>
- /// 获取或设置文章2
- /// </summary>
- public string Str2
- {
- get { return str2; }
- set
- {
- str2 = Format(value);
- index = new int[str1.Length, str2.Length];
- }
- }
- /// <summary>
- /// 运算总次数
- /// </summary>
- public int TotalTimes
- {
- get { return str1.Length * str2.Length; }
- }
- /// <summary>
- /// 是否完成
- /// </summary>
- public bool IsCompleted
- {
- get { return task.IsCompleted; }
- }
- /// <summary>
- /// 实例化
- /// </summary>
- /// <param name="str1">文章1</param>
- /// <param name="str2">文章2</param>
- public LevenshteinDistance(string str1, string str2)
- {
- this.str1 = Format(str1);
- this.str2 = Format(str2);
- index = new int[str1.Length, str2.Length];
- }
- public LevenshteinDistance()
- {
- }
- /// <summary>
- /// 异步开始任务
- /// </summary>
- public void Start()
- {
- task = new Task<double>(Analyzer);
- task.Start();
- task.ContinueWith(o => Completed(o.Result));
- }
- /// <summary>
- /// 同步开始任务
- /// </summary>
- /// <returns>相似度</returns>
- public double StartAyns()
- {
- task = new Task<double>(Analyzer);
- task.Start();
- task.Wait();
- return task.Result;
- }
- private void Completed(double s)
- {
- if (AnalyzerCompleted != null)
- {
- AnalyzerCompleted(s);
- }
- }
- private double Analyzer()
- {
- if (str1.Length == 0 || str2.Length == 0)
- return 0;
- for (int i = 0; i < str1.Length; i++)
- {
- for (int j = 0; j < str2.Length; j++)
- {
- k = str1[i] == str2[j] ? 0 : 1;
- if (i == 0&&j==0)
- {
- continue;
- }
- else if (i == 0)
- {
- index[i, j] = k + index[i, j - 1];
- continue;
- }
- else if (j == 0)
- {
- index[i, j] = k + index[i - 1, j];
- continue;
- }
- int temp = Min(index[i, j - 1],
- index[i - 1, j],
- index[i - 1, j - 1]);
- index[i, j] = temp + k;
- }
- }
- float similarty = 1 - (float)index[str1.Length - 1, str2.Length - 1]
- / (str1.Length > str2.Length ? str1.Length : str2.Length);
- return similarty;
- }
- private string Format(string str)
- {
- str = Regex.Replace(str, @"[^a-zA-Z0-9\\u4e00-\\u9fa5\\s]", "");
- return str;
- }
- private int Min(int a, int b, int c)
- {
- int temp = a < b ? a : b;
- temp = temp < c ? temp : c;
- return temp;
- }
- public void Dispose()
- {
- task.Dispose();
- }
- }
- }
- //该片段来自于http://www.codesnippet.cn/detail/120820135085.html
来源: http://www.codesnippet.cn/detail/120820135085.html