quote-bot/levenshtein.c

47 lines
1.3 KiB
C
Raw Permalink Normal View History

2021-03-09 01:39:19 -05:00
#define _POSIX_C_SOURCE 200809L /* strtok_r, strndup */
#include <string.h>
#include <stdlib.h> /* malloc */
#include "levenshtein.h"
/* <https://rosettacode.org/wiki/Levenshtein_distance#C>
* TODO: use this for quote duplicate removal */
static int levenshtein_dist(const char *s, const char *t, int *d, int ls, int lt, int i, int j) {
int x, y;
int *n = d + i * ls + j;
if (*n >= 0)
return *n;
if (i == ls)
x = lt - j;
else if (j == lt)
x = ls - i;
else if (s[i] == t[j])
x = levenshtein_dist(s, t, d, ls, lt, i + 1, j + 1);
else {
x = levenshtein_dist(s, t, d, ls, lt, i + 1, j + 1);
if ((y = levenshtein_dist(s, t, d, ls, lt, i, j + 1)) < x)
x = y;
if ((y = levenshtein_dist(s, t, d, ls, lt, i + 1, j)) < x)
x = y;
x++;
}
return *n = x;
}
int levenshtein(const char *s, const char *t) {
int i, j, n;
int ls = (int)strnlen(s, 128), lt = (int)strnlen(t, 128);
int *d;
d = (int*)malloc(sizeof(int) * (unsigned long)((ls + 1) * (lt + 1)));
for (i = 0; i <= ls; i++)
for (j = 0; j <= lt; j++)
*(d + i * ls + j) = -1;
n = levenshtein_dist(s, t, d, ls, lt, 0, 0);
free(d);
d = NULL;
return n;
}