读书人

请问一个关于雷同率的有关问题

发布时间: 2013-06-19 10:26:41 作者: rapoo

请教一个关于雷同率的问题
首先感谢过节了还在这里辛苦的老师们,预祝您们新春快乐。
问题如下,有表
1abcddsbaca...
2bbadcadbdd...
3adbcaadbbd...
...
比如我要查找雷同率超过95%的记录,该怎么做啊
另外,还有错同率的问题,请教该怎么解决
非常感谢各位提供帮助。 雷同率?错同率
[解决办法]
字符串相似度算法 Levenshtein Distance 大全
http://rosettacode.org/wiki/Levenshtein_distance

VFP 版


Clear
loFuzzy = Createobject("FuzzyMatch")
? "差异字符数:"
?? loFuzzy.EditDistance("1中文字符串", "中文字符串1")
? "相似百分比:"
?? loFuzzy.PercentMatch("1中文字符串", "中文字符串1")

Define Class FuzzyMatch As Custom

** <summary>
** Computes the edit distance between two strings
** using the Levenshtein algorithm. The edit distance is how many
** changes need to be made to make them identical. The edit distance
** for identical strings is 0.0, the edit distance for completely
** different strings depends on what letters need to change or
** be added in order to make them the same.
** </summary>
Function EditDistance ( ;
sSource As String, ;
sTarget As String ) As Decimal
Local ;
iLenSource As Integer, ;
iLenTarget As Integer, ;
iRow As Integer, ;
iCol As Integer

iLenSource = Len( sSource )
iLenTarget = Len( sTarget )

If iLenSource == 0.0
Return iLenTarget
Endif
If iLenTarget == 0.0
Return iLenSource
Endif

If iLenSource < iLenTarget
sTemp = sSource
sSource = sTarget
sTarget = sTemp



iLenTemp = iLenSource
iLenSource = iLenTarget
iLenTarget = iLenTemp
Endif

Dimension Levenshtein( iLenTarget+1, iLenSource+1 )

For iRow = 1 To iLenTarget + 1
Levenshtein[iRow,1] = iRow -1
Endfor

For iCol = 1 To iLenSource + 1
Levenshtein[1,iCol] = iCol -1
Endfor

For iCol = 2 To iLenSource + 1
For iRow = 2 To iLenTarget + 1
Local ;
dCost As Decimal, ;
cColChar As String, ;
cRowChar As String

dCost = 0.0

cColChar = Substr( sSource, iCol-1, 1 )
cRowChar = Substr( sTarget, iRow-1, 1 )

If cColChar != cRowChar
dCost = 1.0
Endif

Levenshtein[iRow,iCol] = ;
min( ;
Levenshtein[iRow-1,iCol] + 1, ;
Levenshtein[iRow,iCol-1] + 1, ;
Levenshtein[iRow-1,iCol-1] + dCost )
Endfor
Endfor
Return Levenshtein[iLenTarget+1,iLenSource+1]
Endfunc

** <summary>
** Compares two strings and returns the % match using


** the EditDistance function.
** </summary>
Function PercentMatch( ;
sSource As String, ;
sTarget As String ) As Decimal
Local ;
iLenSource As Integer, ;
iLenTarget As Integer, ;
dResult As Decimal, ;
dEditDistance As Decimal

iLenSource = Len( sSource )
iLenTarget = Len( sTarget )

If iLenSource + iLenTarget == 0
Return 0.0
Else
dEditDistance = This.EditDistance( sSource, sTarget)
dResult = (1.0-(dEditDistance/Max(iLenSource,iLenTarget))) * 100.0
Return dResult
Endif
Endfunc
Enddefine

读书人网 >VFP

热点推荐