#define _POSIX_C_SOURCE 200809L /* strtok_r, strndup */ #include #include /* malloc */ #include "levenshtein.h" /* * TODO: use this for quote duplicate removal */ static int levenshtein_dist(const char *s, const char *t, int *d, int ls, int lt, int i, int j) { int x, y; int *n = d + i * ls + j; if (*n >= 0) return *n; if (i == ls) x = lt - j; else if (j == lt) x = ls - i; else if (s[i] == t[j]) x = levenshtein_dist(s, t, d, ls, lt, i + 1, j + 1); else { x = levenshtein_dist(s, t, d, ls, lt, i + 1, j + 1); if ((y = levenshtein_dist(s, t, d, ls, lt, i, j + 1)) < x) x = y; if ((y = levenshtein_dist(s, t, d, ls, lt, i + 1, j)) < x) x = y; x++; } return *n = x; } int levenshtein(const char *s, const char *t) { int i, j, n; int ls = (int)strnlen(s, 128), lt = (int)strnlen(t, 128); int *d; d = (int*)malloc(sizeof(int) * (unsigned long)((ls + 1) * (lt + 1))); for (i = 0; i <= ls; i++) for (j = 0; j <= lt; j++) *(d + i * ls + j) = -1; n = levenshtein_dist(s, t, d, ls, lt, 0, 0); free(d); d = NULL; return n; }