Files
C_C++
Cerbian
Tcl
autoconfig
cpp_zh
emil_header_guards
flex
linking
1st_day_of_month.cpp
JJbY.cpp
Tp6G.cpp
alternatice_bracket.c
arrows.cpp
auto_variad.c
c.php
c_old_argument_notation.c
cnn.c
comment_as_space.c
comp.c
conditional_const.c
const.c
cpp_regex_error.cpp
current_year.cpp
dda2.cpp
dog.jpg
dog2.jpg
eh.cpp
else_while.c
for_ctags.cpp
for_nop_true.c
free_null.c
function_pointer_strategy.c
function_pointer_strategy2.c
gcc_include_next.c
gdb_graph.c
getopt_test.c
getter_example.cpp
gnu_decimals.c
gnu_history.c
gnu_regex.c
gnu_regex2.c
header.h
index_in_initializer_list_compiler_extension.c
is_this_what_you_are_asking_emil.c
knr.c
levenshtein_dist_-_usage.cpp
macro_paren.c
map_initialization.cpp
ncurses_labels.c
ncurses_mv_win.cpp
ncurses_plus_readline.cpp
ncurses_resize2.cpp
ncurses_resize_hello_world.cpp
ncurses_resize_test.c
ncurses_scroll.c
nf.c
null_printf.c
null_printf.cpp
portable_namespace.c
portable_namespace.h
pta.c
scoping_showcase.c
screen_size_(without_curses).c
sdl_render_to_texture.cpp
sentinel_pack.c
setjmp_test.cpp
strdup.c
tcc_int.c
test.c
typedef.c
unctrl.c
undefined_reference.c
usb.c
vasprintf.c
void_main.c
x.cpp
xtermio.c
xtp.cpp
Java
Misc.
Python
Vim
Webdev
git
.gitignore
Makefile
tests/C_C++/levenshtein_dist_-_usage.cpp
2024-07-22 19:37:02 +02:00

75 lines
1.9 KiB
C++

// @BAKE g++ $@
#include <string>
#include <vector>
#include <algorithm>
using namespace std;
// https://github.com/Meteorix/pylcs
vector<string> utf8_split(const string &str){
vector<string> split;
int len = str.length();
int left = 0;
int right = 1;
for (int i = 0; i < len; i++){
if (right >= len || ((str[right] & 0xc0) != 0x80)){
string s = str.substr(left, right - left);
split.push_back(s);
// printf("%s %d %d\n", s.c_str(), left, right);
left = right;
}
right ++;
}
return split;
}
// https://github.com/schiffma/distlib
int mini(int a, int b, int c){
return(min(a, min(b,c)));
}
int levenshtein_dist(const string &word1, const string &word2){
///
/// Please use lower-case strings
/// word1 : first word
/// word2 : second word
///
//int size1 = word1.size(), size2 = word2.size();
vector<string> word1_ = utf8_split(word1);
vector<string> word2_ = utf8_split(word2);
int size1 = word1_.size();
int size2 = word2_.size();
int suppr_dist, insert_dist, subs_dist;
int* dist = new int[(size1+1)*(size2+1)];
for(int i=0; i<size1+1; ++i)
dist[(size2+1)*i] = i;
for(int j=0; j<size2+1; ++j)
dist[j] = j;
for(int i=1; i<size1+1; ++i){
for(int j=1; j<size2+1; ++j){
suppr_dist = dist[(size2+1)*(i-1)+j] + 1;
insert_dist = dist[(size2+1)*i+j-1] + 1;
subs_dist = dist[(size2+1)*(i-1)+j-1];
if(word1_[i-1]!=word2_[j-1]){ // word indexes are implemented differently.
subs_dist += 1;
}
dist[(size2+1)*i+j] = mini(suppr_dist, insert_dist, subs_dist);
}
}
// --------------------------------------------------------
int res = dist[(size1+1)*(size2+1) - 1];
delete dist;
return(res);
}
signed main() {
return levenshtein_dist("ginger", "pilger");
}