138 lines
3.0 KiB
Go
138 lines
3.0 KiB
Go
package main
|
|
|
|
import (
|
|
"fmt"
|
|
"math/rand"
|
|
"time"
|
|
)
|
|
|
|
// from: https://github.com/peter-evans/patience/blob/main/lcs.go
|
|
func LCS[T comparable](a, b []T, equals func(T, T) bool) [][2]int {
|
|
// Initialize the LCS table.
|
|
lcs := make([][]int, len(a)+1)
|
|
for i := 0; i <= len(a); i++ {
|
|
lcs[i] = make([]int, len(b)+1)
|
|
}
|
|
|
|
// Populate the LCS table.
|
|
for i := 1; i < len(lcs); i++ {
|
|
for j := 1; j < len(lcs[i]); j++ {
|
|
if equals(a[i-1], b[j-1]) {
|
|
lcs[i][j] = lcs[i-1][j-1] + 1
|
|
} else {
|
|
lcs[i][j] = max(lcs[i-1][j], lcs[i][j-1])
|
|
}
|
|
}
|
|
}
|
|
|
|
// Backtrack to find the LCS.
|
|
i, j := len(a), len(b)
|
|
s := make([][2]int, 0, lcs[i][j])
|
|
|
|
for i > 0 && j > 0 {
|
|
switch {
|
|
case equals(a[i-1], b[j-1]):
|
|
s = append(s, [2]int{i - 1, j - 1})
|
|
i--
|
|
j--
|
|
case lcs[i-1][j] > lcs[i][j-1]:
|
|
i--
|
|
default:
|
|
j--
|
|
}
|
|
}
|
|
|
|
// Reverse the backtracked LCS.
|
|
for i, j := 0, len(s)-1; i < j; i, j = i+1, j-1 {
|
|
s[i], s[j] = s[j], s[i]
|
|
}
|
|
|
|
return s
|
|
}
|
|
|
|
func LCSDiff[T comparable](a, b []T, equals func(T, T) bool,
|
|
diff func(T), same func(T, T)) {
|
|
res := LCS(a, b, equals)
|
|
|
|
i2 := 0
|
|
isame := 0
|
|
for isame < len(res) {
|
|
// process [i1..isamee1[ and [i2..isame2[
|
|
for i2 < res[isame][1] {
|
|
diff(b[i2])
|
|
i2++
|
|
}
|
|
// process same elements -> no diff so normally not
|
|
same(a[res[isame][0]], b[res[isame][1]])
|
|
i2++
|
|
isame++
|
|
}
|
|
for i2 < len(b) {
|
|
diff(b[i2])
|
|
i2++
|
|
}
|
|
}
|
|
|
|
const letters = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
|
|
|
|
func randString(n int) string {
|
|
res := make([]byte, n)
|
|
for i := range n {
|
|
res[i] = letters[rand.Int()%len(letters)]
|
|
}
|
|
return string(res)
|
|
}
|
|
|
|
func main() {
|
|
input1 := []string{"ab", "cd", "myidenticalstring"}
|
|
input2 := []string{"ab", "ce", "myidenticalstrings", "fg"}
|
|
t0 := time.Now()
|
|
res := LCS(input1, input2, func(s1 string, s2 string) bool {
|
|
res := LCS([]rune(s1), []rune(s2), func(r rune, r2 rune) bool {
|
|
return r == r2
|
|
})
|
|
score := min(float64(len(res))/float64(len(s1)),
|
|
float64(len(res))/float64(len(s2)))
|
|
return score > 0.90
|
|
})
|
|
dt := time.Now().Sub(t0).Microseconds()
|
|
fmt.Printf("time %v us, len %v\n", dt, len(res))
|
|
for i, pair := range res {
|
|
fmt.Printf("%d: %v\n", i, pair)
|
|
}
|
|
|
|
// [0..i1[ and [0..i2[ already handled
|
|
// isame: res[0..isame[ have been handled
|
|
//i1 := 0
|
|
i2 := 0
|
|
isame := 0
|
|
for isame < len(res) {
|
|
// process [i1..isamee1[ and [i2..isame2[
|
|
for i2 < res[isame][1] {
|
|
fmt.Printf("DIFF|ADD %s\n", input2[i2])
|
|
i2++
|
|
}
|
|
// process same elements -> no diff so normally not
|
|
fmt.Printf("SAME %s %s\n", input1[res[isame][0]], input2[res[isame][1]])
|
|
i2++
|
|
isame++
|
|
}
|
|
for i2 < len(input2) {
|
|
fmt.Printf("DIFF|ADD %s\n", input2[i2])
|
|
i2++
|
|
}
|
|
|
|
LCSDiff(input1, input2, func(s1 string, s2 string) bool {
|
|
res := LCS([]rune(s1), []rune(s2), func(r rune, r2 rune) bool {
|
|
return r == r2
|
|
})
|
|
score := min(float64(len(res))/float64(len(s1)),
|
|
float64(len(res))/float64(len(s2)))
|
|
return score > 0.90
|
|
}, func(s string) {
|
|
fmt.Printf("DIFF %v\n", s)
|
|
}, func(s1 string, s2 string) {
|
|
fmt.Printf("SAME %v %v\n", s1, s2)
|
|
})
|
|
}
|