^ change me
Levenshtein distance, also sometimes called "edit distance," is a way of measuring the similarity of two strings. Computing the Levenstein distance between two strings results in an integer. A 0 indicates the strings are identical. The higher the number, the more edits (inserts, deletes, swaps) need to take place to convert one string to the other.
A Levenshtein program is being added to the languages comparison repository. All implementations should use a variant of the Wagner-Fischer algorithm, that allows for using only two rows at-a-time with a length based on the shorter string.
#include#include #include // Can either define your own min function // or use a language / standard library function int min(int a, int b, int c) { int min = a; if (b < min) min = b; if (c < min) min = c; return min; } int levenshtein_distance(const char *str1t, const char *str2t) { // Get lengths of both strings int mt = strlen(str1t); int nt = strlen(str2t); // Assign shorter one to str1, longer one to str2 const char* str1 = mt <= nt ? str1t : str2t; const char* str2 = mt <= nt ? str2t : str1t; // store the lengths of shorter in m, longer in n int m = str1 == str1t ? mt : nt; int n = str1 == str1t ? nt : mt; // Create two rows, previous and current int prev[m+1]; int curr[m+1]; // initialize the previous row for (int i = 0; i <= m; i++) { prev[i] = i; } // Iterate and compute distance for (int i = 1; i <= n; i++) { curr[0] = i; for (int j = 1; j <= m; j++) { int cost = (str1[j-1] == str2[i-1]) ? 0 : 1; curr[j] = min( prev[j] + 1, // Deletion curr[j-1] + 1, // Insertion prev[j-1] + cost // Substitution ); } for (int j = 0; j <= m; j++) { prev[j] = curr[j]; } } // Return final distance, stored in prev[m] return prev[m]; } int main(int argc, char *argv[]) { int min_distance = -1; int times = 0; // Iterate through all combinations of command line args for (int i = 1; i < argc; i++) { for (int j = 1; j < argc; j++) { // Don't compare the same string to itself if (i != j) { int distance = levenshtein_distance(argv[i], argv[j]); if (min_distance == -1 || min_distance > distance) { min_distance = distance; } times++; } } } // The only output from the program should be the times (number of comparisons) // and min distance calculated of all comparisons. Two total lines of output, // formatted exactly like this. printf("times: %d\n", times); printf("min_distance: %d\n", min_distance); return 0; }
NOTE: Since being released, the Levenshtein program for the language comparison has been updated to a more performant algorithm.
It now uses only two rows at-a-time instead of building the full matrix.
See the README in the repository for more details, linked above.
Submit a new language or improve an existing implementation via the
GitHub repository.