renamed C folder
This commit is contained in:
74
C_C++/levenshtein_dist_-_usage.cpp
Normal file
74
C_C++/levenshtein_dist_-_usage.cpp
Normal file
@ -0,0 +1,74 @@
|
||||
// @BAKE g++ $@
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
using namespace std;
|
||||
|
||||
// https://github.com/Meteorix/pylcs
|
||||
|
||||
vector<string> utf8_split(const string &str){
|
||||
vector<string> split;
|
||||
int len = str.length();
|
||||
int left = 0;
|
||||
int right = 1;
|
||||
|
||||
for (int i = 0; i < len; i++){
|
||||
if (right >= len || ((str[right] & 0xc0) != 0x80)){
|
||||
string s = str.substr(left, right - left);
|
||||
split.push_back(s);
|
||||
// printf("%s %d %d\n", s.c_str(), left, right);
|
||||
left = right;
|
||||
}
|
||||
right ++;
|
||||
}
|
||||
return split;
|
||||
}
|
||||
|
||||
// https://github.com/schiffma/distlib
|
||||
int mini(int a, int b, int c){
|
||||
return(min(a, min(b,c)));
|
||||
}
|
||||
|
||||
int levenshtein_dist(const string &word1, const string &word2){
|
||||
///
|
||||
/// Please use lower-case strings
|
||||
/// word1 : first word
|
||||
/// word2 : second word
|
||||
///
|
||||
|
||||
//int size1 = word1.size(), size2 = word2.size();
|
||||
|
||||
vector<string> word1_ = utf8_split(word1);
|
||||
vector<string> word2_ = utf8_split(word2);
|
||||
int size1 = word1_.size();
|
||||
int size2 = word2_.size();
|
||||
|
||||
int suppr_dist, insert_dist, subs_dist;
|
||||
int* dist = new int[(size1+1)*(size2+1)];
|
||||
|
||||
for(int i=0; i<size1+1; ++i)
|
||||
dist[(size2+1)*i] = i;
|
||||
for(int j=0; j<size2+1; ++j)
|
||||
dist[j] = j;
|
||||
for(int i=1; i<size1+1; ++i){
|
||||
for(int j=1; j<size2+1; ++j){
|
||||
suppr_dist = dist[(size2+1)*(i-1)+j] + 1;
|
||||
insert_dist = dist[(size2+1)*i+j-1] + 1;
|
||||
subs_dist = dist[(size2+1)*(i-1)+j-1];
|
||||
if(word1_[i-1]!=word2_[j-1]){ // word indexes are implemented differently.
|
||||
subs_dist += 1;
|
||||
}
|
||||
dist[(size2+1)*i+j] = mini(suppr_dist, insert_dist, subs_dist);
|
||||
}
|
||||
}
|
||||
// --------------------------------------------------------
|
||||
int res = dist[(size1+1)*(size2+1) - 1];
|
||||
delete dist;
|
||||
return(res);
|
||||
}
|
||||
|
||||
signed main() {
|
||||
return levenshtein_dist("ginger", "pilger");
|
||||
}
|
Reference in New Issue
Block a user