47 lines
1.3 KiB
C
47 lines
1.3 KiB
C
|
#define _POSIX_C_SOURCE 200809L /* strtok_r, strndup */
|
||
|
#include <string.h>
|
||
|
#include <stdlib.h> /* malloc */
|
||
|
|
||
|
#include "levenshtein.h"
|
||
|
|
||
|
/* <https://rosettacode.org/wiki/Levenshtein_distance#C>
|
||
|
* TODO: use this for quote duplicate removal */
|
||
|
static int levenshtein_dist(const char *s, const char *t, int *d, int ls, int lt, int i, int j) {
|
||
|
int x, y;
|
||
|
int *n = d + i * ls + j;
|
||
|
|
||
|
if (*n >= 0)
|
||
|
return *n;
|
||
|
|
||
|
if (i == ls)
|
||
|
x = lt - j;
|
||
|
else if (j == lt)
|
||
|
x = ls - i;
|
||
|
else if (s[i] == t[j])
|
||
|
x = levenshtein_dist(s, t, d, ls, lt, i + 1, j + 1);
|
||
|
else {
|
||
|
x = levenshtein_dist(s, t, d, ls, lt, i + 1, j + 1);
|
||
|
|
||
|
if ((y = levenshtein_dist(s, t, d, ls, lt, i, j + 1)) < x)
|
||
|
x = y;
|
||
|
if ((y = levenshtein_dist(s, t, d, ls, lt, i + 1, j)) < x)
|
||
|
x = y;
|
||
|
x++;
|
||
|
}
|
||
|
return *n = x;
|
||
|
}
|
||
|
|
||
|
int levenshtein(const char *s, const char *t) {
|
||
|
int i, j, n;
|
||
|
int ls = (int)strnlen(s, 128), lt = (int)strnlen(t, 128);
|
||
|
int *d;
|
||
|
d = (int*)malloc(sizeof(int) * (unsigned long)((ls + 1) * (lt + 1)));
|
||
|
for (i = 0; i <= ls; i++)
|
||
|
for (j = 0; j <= lt; j++)
|
||
|
*(d + i * ls + j) = -1;
|
||
|
n = levenshtein_dist(s, t, d, ls, lt, 0, 0);
|
||
|
free(d);
|
||
|
d = NULL;
|
||
|
return n;
|
||
|
}
|