Function getCommonCharacters(string1, string2, allowedDistance)
str1_len = StrLen(string1);
str2_len = StrLen(string2);
temp_string2 = string2;
commonCharacters = "";
For i = 1 To str1_len Do
// compare if char does match inside given allowedDistance
// and if it does add it to commonCharacters
lowwerBound = Max(1, i - allowedDistance - 1);
upperBound = Min(i + allowedDistance, str2_len);
noMatch = True;
j = lowwerBound;
While j <= upperBound And noMatch Do
If Mid(temp_string2,j,1) = Mid(string1,i,1) Then
noMatch = False;
commonCharacters = commonCharacters + Mid(string1,i,1);
EndIf;
j = j + 1;
EndDo;
EndDo;
return commonCharacters;
EndFunction
Function Jaro( string1, string2 )
str1_len = StrLen(string1);
str2_len = StrLen(string2);
// theoretical distance
distance = Int(Max( str1_len, str2_len ) / 2.0) - 1;
// get common characters
commons1 = getCommonCharacters( string1, string2, distance );
commons2 = getCommonCharacters( string2, string1, distance );
commons1_len = StrLen(commons1);
commons2_len = StrLen(commons2);
If commons1_len = 0 Or commons2_len = 0 Then
Return 0;
EndIf;
// calculate transpositions
transpositions = 0;
upperBound = min(commons1_len, commons2_len);
For i = 1 To upperBound Do
If Mid(commons1,i,1) <> Mid(commons2,i,1) Then
transpositions = transpositions + 1;
EndIf;
EndDo;
transpositions = transpositions/2;
// return the Jaro distance
return (commons1_len/str1_len + commons2_len/str2_len + (commons1_len - transpositions)/(commons1_len)) / 3.0;
EndFunction
Function getPrefixLength( string1, string2, MINPREFIXLENGTH = 4)
n = min(MINPREFIXLENGTH, StrLen(string1), StrLen(string2));
For i = 1 To n Do
If ( Mid(string1,i,1) <> Mid(string2,i,1) ) Then
// return index of first occurrence of different characters
Return i-1;
EndIf;
EndDo;
Return n;
EndFunction
Function JaroWinkler(string1, string2, PREFIXSCALE = 0.1)
JaroDistance = Jaro(string1, string2);
prefixLength = getPrefixLength(string1, string2);
return JaroDistance + prefixLength * PREFIXSCALE * (1.0 - JaroDistance);
EndFunction
Но этот алгоритм не работает с перестановками слов.
Сравнение алгоритмов по функционалу [необходимо зарегистрироваться для просмотра ссылки]