2021-03-31 08:47:18 -04:00
|
|
|
#define _POSIX_C_SOURCE 200809L /* strnlen */
|
|
|
|
#include <string.h> /* memset */
|
2021-03-09 01:39:19 -05:00
|
|
|
#include <stdlib.h> /* malloc */
|
|
|
|
|
|
|
|
#include "levenshtein.h"
|
|
|
|
|
|
|
|
/* <https://rosettacode.org/wiki/Levenshtein_distance#C>
|
|
|
|
* TODO: use this for quote duplicate removal */
|
|
|
|
static int levenshtein_dist(const char *s, const char *t, int *d, int ls, int lt, int i, int j) {
|
|
|
|
int x, y;
|
|
|
|
int *n = d + i * ls + j;
|
|
|
|
|
|
|
|
if (*n >= 0)
|
|
|
|
return *n;
|
|
|
|
|
|
|
|
if (i == ls)
|
|
|
|
x = lt - j;
|
|
|
|
else if (j == lt)
|
|
|
|
x = ls - i;
|
|
|
|
else if (s[i] == t[j])
|
|
|
|
x = levenshtein_dist(s, t, d, ls, lt, i + 1, j + 1);
|
|
|
|
else {
|
|
|
|
x = levenshtein_dist(s, t, d, ls, lt, i + 1, j + 1);
|
|
|
|
|
|
|
|
if ((y = levenshtein_dist(s, t, d, ls, lt, i, j + 1)) < x)
|
|
|
|
x = y;
|
|
|
|
if ((y = levenshtein_dist(s, t, d, ls, lt, i + 1, j)) < x)
|
|
|
|
x = y;
|
|
|
|
x++;
|
|
|
|
}
|
|
|
|
return *n = x;
|
|
|
|
}
|
|
|
|
|
|
|
|
int levenshtein(const char *s, const char *t) {
|
|
|
|
int i, j, n;
|
|
|
|
int ls = (int)strnlen(s, 128), lt = (int)strnlen(t, 128);
|
|
|
|
int *d;
|
2021-03-31 08:47:18 -04:00
|
|
|
size_t size;
|
|
|
|
size = sizeof(int) * (unsigned long)((ls + 1) * (lt + 1));
|
|
|
|
d = (int *)malloc(size);
|
|
|
|
memset(d, 0, size);
|
2021-03-09 01:39:19 -05:00
|
|
|
for (i = 0; i <= ls; i++)
|
|
|
|
for (j = 0; j <= lt; j++)
|
|
|
|
*(d + i * ls + j) = -1;
|
|
|
|
n = levenshtein_dist(s, t, d, ls, lt, 0, 0);
|
|
|
|
free(d);
|
|
|
|
d = NULL;
|
|
|
|
return n;
|
|
|
|
}
|