package main import ( "fmt" "math/rand" "time" ) // from: https://github.com/peter-evans/patience/blob/main/lcs.go func LCS[T comparable](a, b []T, equals func(T, T) bool) [][2]int { // Initialize the LCS table. lcs := make([][]int, len(a)+1) for i := 0; i <= len(a); i++ { lcs[i] = make([]int, len(b)+1) } // Populate the LCS table. for i := 1; i < len(lcs); i++ { for j := 1; j < len(lcs[i]); j++ { if equals(a[i-1], b[j-1]) { lcs[i][j] = lcs[i-1][j-1] + 1 } else { lcs[i][j] = max(lcs[i-1][j], lcs[i][j-1]) } } } // Backtrack to find the LCS. i, j := len(a), len(b) s := make([][2]int, 0, lcs[i][j]) for i > 0 && j > 0 { switch { case equals(a[i-1], b[j-1]): s = append(s, [2]int{i - 1, j - 1}) i-- j-- case lcs[i-1][j] > lcs[i][j-1]: i-- default: j-- } } // Reverse the backtracked LCS. for i, j := 0, len(s)-1; i < j; i, j = i+1, j-1 { s[i], s[j] = s[j], s[i] } return s } func LCSDiff[T comparable](a, b []T, equals func(T, T) bool, diff func(T), same func(T, T)) { res := LCS(a, b, equals) i2 := 0 isame := 0 for isame < len(res) { // process [i1..isamee1[ and [i2..isame2[ for i2 < res[isame][1] { diff(b[i2]) i2++ } // process same elements -> no diff so normally not same(a[res[isame][0]], b[res[isame][1]]) i2++ isame++ } for i2 < len(b) { diff(b[i2]) i2++ } } const letters = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" func randString(n int) string { res := make([]byte, n) for i := range n { res[i] = letters[rand.Int()%len(letters)] } return string(res) } func main() { input1 := []string{"ab", "cd", "myidenticalstring"} input2 := []string{"ab", "ce", "myidenticalstrings", "fg"} t0 := time.Now() res := LCS(input1, input2, func(s1 string, s2 string) bool { res := LCS([]rune(s1), []rune(s2), func(r rune, r2 rune) bool { return r == r2 }) score := min(float64(len(res))/float64(len(s1)), float64(len(res))/float64(len(s2))) return score > 0.90 }) dt := time.Now().Sub(t0).Microseconds() fmt.Printf("time %v us, len %v\n", dt, len(res)) for i, pair := range res { fmt.Printf("%d: %v\n", i, pair) } // [0..i1[ and [0..i2[ already handled // isame: res[0..isame[ have been handled //i1 := 0 i2 := 0 isame := 0 for isame < len(res) { // process [i1..isamee1[ and [i2..isame2[ for i2 < res[isame][1] { fmt.Printf("DIFF|ADD %s\n", input2[i2]) i2++ } // process same elements -> no diff so normally not fmt.Printf("SAME %s %s\n", input1[res[isame][0]], input2[res[isame][1]]) i2++ isame++ } for i2 < len(input2) { fmt.Printf("DIFF|ADD %s\n", input2[i2]) i2++ } LCSDiff(input1, input2, func(s1 string, s2 string) bool { res := LCS([]rune(s1), []rune(s2), func(r rune, r2 rune) bool { return r == r2 }) score := min(float64(len(res))/float64(len(s1)), float64(len(res))/float64(len(s2))) return score > 0.90 }, func(s string) { fmt.Printf("DIFF %v\n", s) }, func(s1 string, s2 string) { fmt.Printf("SAME %v %v\n", s1, s2) }) }