From ebf888d91df8a0286e39a92b73d33ae21d2e17f4 Mon Sep 17 00:00:00 2001 From: Erik Brakkee Date: Sun, 8 Mar 2026 15:39:52 +0100 Subject: [PATCH] added yamltool sort --- cmd/lcstest/main.go | 137 +++++++++++++++++++++++++++++++++++++++ cmd/yamltool/diff.go | 27 ++++++-- cmd/yamltool/merge.go | 11 +++- cmd/yamltool/parse.go | 16 ++--- cmd/yamltool/sort.go | 55 ++++++++++++++++ cmd/yamltool/support.go | 23 +++++++ cmd/yamltool/yamltool.go | 10 +++ 7 files changed, 259 insertions(+), 20 deletions(-) create mode 100644 cmd/lcstest/main.go create mode 100644 cmd/yamltool/sort.go create mode 100644 cmd/yamltool/support.go diff --git a/cmd/lcstest/main.go b/cmd/lcstest/main.go new file mode 100644 index 0000000..2376f81 --- /dev/null +++ b/cmd/lcstest/main.go @@ -0,0 +1,137 @@ +package main + +import ( + "fmt" + "math/rand" + "time" +) + +// from: https://github.com/peter-evans/patience/blob/main/lcs.go +func LCS[T comparable](a, b []T, equals func(T, T) bool) [][2]int { + // Initialize the LCS table. + lcs := make([][]int, len(a)+1) + for i := 0; i <= len(a); i++ { + lcs[i] = make([]int, len(b)+1) + } + + // Populate the LCS table. + for i := 1; i < len(lcs); i++ { + for j := 1; j < len(lcs[i]); j++ { + if equals(a[i-1], b[j-1]) { + lcs[i][j] = lcs[i-1][j-1] + 1 + } else { + lcs[i][j] = max(lcs[i-1][j], lcs[i][j-1]) + } + } + } + + // Backtrack to find the LCS. + i, j := len(a), len(b) + s := make([][2]int, 0, lcs[i][j]) + + for i > 0 && j > 0 { + switch { + case equals(a[i-1], b[j-1]): + s = append(s, [2]int{i - 1, j - 1}) + i-- + j-- + case lcs[i-1][j] > lcs[i][j-1]: + i-- + default: + j-- + } + } + + // Reverse the backtracked LCS. + for i, j := 0, len(s)-1; i < j; i, j = i+1, j-1 { + s[i], s[j] = s[j], s[i] + } + + return s +} + +func LCSDiff[T comparable](a, b []T, equals func(T, T) bool, + diff func(T), same func(T, T)) { + res := LCS(a, b, equals) + + i2 := 0 + isame := 0 + for isame < len(res) { + // process [i1..isamee1[ and [i2..isame2[ + for i2 < res[isame][1] { + diff(b[i2]) + i2++ + } + // process same elements -> no diff so normally not + same(a[res[isame][0]], b[res[isame][1]]) + i2++ + isame++ + } + for i2 < len(b) { + diff(b[i2]) + i2++ + } +} + +const letters = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + +func randString(n int) string { + res := make([]byte, n) + for i := range n { + res[i] = letters[rand.Int()%len(letters)] + } + return string(res) +} + +func main() { + input1 := []string{"ab", "cd", "myidenticalstring"} + input2 := []string{"ab", "ce", "myidenticalstrings", "fg"} + t0 := time.Now() + res := LCS(input1, input2, func(s1 string, s2 string) bool { + res := LCS([]rune(s1), []rune(s2), func(r rune, r2 rune) bool { + return r == r2 + }) + score := min(float64(len(res))/float64(len(s1)), + float64(len(res))/float64(len(s2))) + return score > 0.90 + }) + dt := time.Now().Sub(t0).Microseconds() + fmt.Printf("time %v us, len %v\n", dt, len(res)) + for i, pair := range res { + fmt.Printf("%d: %v\n", i, pair) + } + + // [0..i1[ and [0..i2[ already handled + // isame: res[0..isame[ have been handled + //i1 := 0 + i2 := 0 + isame := 0 + for isame < len(res) { + // process [i1..isamee1[ and [i2..isame2[ + for i2 < res[isame][1] { + fmt.Printf("DIFF|ADD %s\n", input2[i2]) + i2++ + } + // process same elements -> no diff so normally not + fmt.Printf("SAME %s %s\n", input1[res[isame][0]], input2[res[isame][1]]) + i2++ + isame++ + } + for i2 < len(input2) { + fmt.Printf("DIFF|ADD %s\n", input2[i2]) + i2++ + } + + LCSDiff(input1, input2, func(s1 string, s2 string) bool { + res := LCS([]rune(s1), []rune(s2), func(r rune, r2 rune) bool { + return r == r2 + }) + score := min(float64(len(res))/float64(len(s1)), + float64(len(res))/float64(len(s2))) + return score > 0.90 + }, func(s string) { + fmt.Printf("DIFF %v\n", s) + }, func(s1 string, s2 string) { + fmt.Printf("SAME %v %v\n", s1, s2) + }) +} diff --git a/cmd/yamltool/diff.go b/cmd/yamltool/diff.go index 499603e..849e636 100644 --- a/cmd/yamltool/diff.go +++ b/cmd/yamltool/diff.go @@ -2,10 +2,11 @@ package main import ( "fmt" - "github.com/goccy/go-yaml" - "github.com/spf13/cobra" "os" "reflect" + + "github.com/goccy/go-yaml" + "github.com/spf13/cobra" ) // hack to be able to compare slices and dictionires that cannot be put into a map. @@ -45,6 +46,11 @@ func subtract(yaml2 yaml.MapSlice, yaml1 yaml.MapSlice) yaml.MapSlice { v1set[strval(v)] = true } s := make([]any, 0) + // TODO + // convert both slices to lists of strings + // apply LCS to the list of strings with approximate equality + // added elements: -> output fully + // approximately equal elements: -> when identical, skip, otherwise, output diffs (recurse) for _, v2value := range v2.([]any) { k2 := strval(v2value) if v1set[k2] { @@ -78,13 +84,22 @@ func diff(cmd *cobra.Command, args []string) error { file1 := args[0] file2 := args[1] - yaml1, err := parse(read(file1)) + data1, err := read(file1) if err != nil { - panic(fmt.Errorf("%s: %w", file1, err)) + return err } - yaml2, err := parse(read(file2)) + data2, err := read(file2) if err != nil { - panic(fmt.Errorf("%s: %w", file2, err)) + return err + } + + yaml1, err := parse(data1) + if err != nil { + return fmt.Errorf("%s: %w", file1, err) + } + yaml2, err := parse(data2) + if err != nil { + return fmt.Errorf("%s: %w", file2, err) } diff1 := subtract(yaml2, yaml1) diff --git a/cmd/yamltool/merge.go b/cmd/yamltool/merge.go index 5211077..c6ab929 100644 --- a/cmd/yamltool/merge.go +++ b/cmd/yamltool/merge.go @@ -2,10 +2,11 @@ package main import ( "fmt" - "github.com/goccy/go-yaml" - "github.com/spf13/cobra" "os" "reflect" + + "github.com/goccy/go-yaml" + "github.com/spf13/cobra" ) type MyMap yaml.MapSlice @@ -54,7 +55,11 @@ func mergeMap(yaml1 yaml.MapSlice, yaml2 yaml.MapSlice) yaml.MapSlice { func merge(cmd *cobra.Command, args []string) error { res := make(yaml.MapSlice, 0) for _, arg := range args { - config, err := parse(read(arg)) + data, err := read(arg) + if err != nil { + return err + } + config, err := parse(data) if err != nil { return fmt.Errorf("%s: %w", arg, err) } diff --git a/cmd/yamltool/parse.go b/cmd/yamltool/parse.go index 85b34d2..1a958a4 100644 --- a/cmd/yamltool/parse.go +++ b/cmd/yamltool/parse.go @@ -6,18 +6,8 @@ import ( "github.com/goccy/go-yaml" "github.com/spf13/cobra" - - "os" ) -func read(file string) []byte { - data, err := os.ReadFile(file) - if err != nil { - panic(err) - } - return data -} - func parse(data []byte) (yaml.MapSlice, error) { var result yaml.MapSlice decoder := yaml.NewDecoder(bytes.NewReader(data), @@ -31,7 +21,11 @@ func parse(data []byte) (yaml.MapSlice, error) { func parseFiles(cmd *cobra.Command, args []string) error { for _, arg := range args { - _, err := parse(read(arg)) + data, err := read(arg) + if err != nil { + return err + } + _, err = parse(data) if err != nil { fmt.Printf("%s: %v\n", arg, err.Error()) } diff --git a/cmd/yamltool/sort.go b/cmd/yamltool/sort.go new file mode 100644 index 0000000..83c18fe --- /dev/null +++ b/cmd/yamltool/sort.go @@ -0,0 +1,55 @@ +package main + +import ( + "cmp" + "fmt" + "reflect" + "slices" + + "github.com/goccy/go-yaml" + "github.com/spf13/cobra" + + "os" +) + +type MapSlice yaml.MapSlice + +func (s MapSlice) Sort() { + slices.SortFunc(s, func(a, b yaml.MapItem) int { + keya := fmt.Sprintf("%s", a.Key) + keyb := fmt.Sprintf("%s", b.Key) + return cmp.Compare(keya, keyb) + }) + for _, item := range s { + switch { + case reflect.TypeOf(item.Value) == reflect.TypeOf(yaml.MapSlice{}): + ((MapSlice)(item.Value.(yaml.MapSlice))).Sort() + case Type(item.Value) == Slice: + for _, v := range item.Value.([]any) { + ms, ok := v.(yaml.MapSlice) + if ok { + ((MapSlice)(ms)).Sort() + } + } + } + } +} + +func sortYaml(cmd *cobra.Command, args []string) error { + for _, arg := range args { + data, err := read(arg) + if err != nil { + return err + } + doc, err := parse(data) + if err != nil { + return fmt.Errorf("%s: %v\n", arg, err.Error()) + } + ((MapSlice)(doc)).Sort() + if len(args) > 1 { + fmt.Printf("---\n") + } + encode(os.Stdout, doc) + } + return nil +} diff --git a/cmd/yamltool/support.go b/cmd/yamltool/support.go new file mode 100644 index 0000000..3a81911 --- /dev/null +++ b/cmd/yamltool/support.go @@ -0,0 +1,23 @@ +package main + +import ( + "fmt" + "io" + "os" +) + +func read(file string) ([]byte, error) { + + if file == "-" { + data, err := io.ReadAll(os.Stdin) + if err != nil { + return nil, fmt.Errorf("Error reading from stdin") + } + return data, nil + } + data, err := os.ReadFile(file) + if err != nil { + return nil, fmt.Errorf("Error reading from '%s'", file) + } + return data, nil +} diff --git a/cmd/yamltool/yamltool.go b/cmd/yamltool/yamltool.go index 692a580..fde30b6 100644 --- a/cmd/yamltool/yamltool.go +++ b/cmd/yamltool/yamltool.go @@ -67,6 +67,16 @@ Shows the additions and modifications in compared to `, } cmd.AddCommand(parse) + sort := &cobra.Command{ + Use: "sort [file1] ... [fileN]", + Short: "Sort the yaml output by sorting based on map key ", + Long: `Sort yaml files, this makes it easier to also use regular diff`, + RunE: func(cmd *cobra.Command, args []string) error { + return sortYaml(cmd, args) + }, + } + cmd.AddCommand(sort) + diff.PersistentFlags().IntVarP(&VERBOSITY, "array-output-level", "v", 3, `Array output level: , 0: no output, only exit status,