diff options
author | Treehugger Robot <treehugger-gerrit@google.com> | 2021-10-19 19:11:35 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2021-10-19 19:11:35 +0000 |
commit | 3bd9c7d36a1a9e6498558ee33df09176eb3237ad (patch) | |
tree | cda44cfe6775e42762fb774b1a377f93a1d88c6b | |
parent | f478b8487bc8de24e700b8febe445cc6ec0a15ae (diff) | |
parent | 26c705f764c26809d145c7cfaacb7bed54aafb6f (diff) | |
download | build-3bd9c7d36a1a9e6498558ee33df09176eb3237ad.tar.gz |
Merge "A tool to facilitate large ninja files comparison."
-rw-r--r-- | tools/canoninja/README.md | 151 | ||||
-rw-r--r-- | tools/canoninja/canoninja.go | 130 | ||||
-rw-r--r-- | tools/canoninja/canoninja_test.go | 47 | ||||
-rw-r--r-- | tools/canoninja/cmd/main.go | 36 | ||||
-rw-r--r-- | tools/canoninja/go.mod | 1 |
5 files changed, 365 insertions, 0 deletions
diff --git a/tools/canoninja/README.md b/tools/canoninja/README.md new file mode 100644 index 0000000000..506acf76d1 --- /dev/null +++ b/tools/canoninja/README.md @@ -0,0 +1,151 @@ +# Ninja File Canonicalizer + +Suppose we have a tool that generates a Ninja file from some other description (think Kati and makefiles), and during +the testing we discovered a regression. Furthermore, suppose that the generated Ninja file is large (think millions of +lines). And, the new Ninja file has build statements and rules in a slightly different order. As the tool generates the +rule names, the real differences in the output of the `diff` command are drowned in noise. Enter Canoninja. + +Canoninja renames each Ninja rule to the hash of its contents. After that, we can just sort the build statements, and a +simple `comm` command immediately reveal the essential difference between the files. + +## Example + +Consider the following makefile + +```makefile +second := +first: foo +foo: + @echo foo +second: bar +bar: + @echo bar +``` + +Depending on Kati version converting it to Ninja file will yield either: + +``` +$ cat /tmp/1.ninja +# Generated by kati 06f2569b2d16628608c000a76e3d495a5a5528cb + +pool local_pool + depth = 72 + +build _kati_always_build_: phony + +build first: phony foo +rule rule0 + description = build $out + command = /bin/sh -c "echo foo" +build foo: rule0 +build second: phony bar +rule rule1 + description = build $out + command = /bin/sh -c "echo bar" +build bar: rule1 + +default first +``` + +or + +``` +$ cat 2.ninja +# Generated by kati 371194da71b3e191fea6f2ccceb7b061bd0de310 + +pool local_pool + depth = 72 + +build _kati_always_build_: phony + +build second: phony bar +rule rule0 + description = build $out + command = /bin/sh -c "echo bar" +build bar: rule0 +build first: phony foo +rule rule1 + description = build $out + command = /bin/sh -c "echo foo" +build foo: rule1 + +default first +``` + +This is a quirk in Kati, see https://github.com/google/kati/issues/238 + +Trying to find out the difference between the targets even after sorting them isn't too helpful: + +``` +diff <(grep '^build' /tmp/1.ninja|sort) <(grep '^build' /tmp/2.ninja | sort) +1c1 +< build bar: rule1 +--- +> build bar: rule0 +3c3 +< build foo: rule0 +--- +> build foo: rule1 +``` + +However, running these files through `canoninja` yields + +``` +$ canoninja /tmp/1.ninja +# Generated by kati 06f2569b2d16628608c000a76e3d495a5a5528cb + +pool local_pool + depth = 72 + +build _kati_always_build_: phony + +build first: phony foo +rule R2f9981d3c152fc255370dc67028244f7bed72a03 + description = build $out + command = /bin/sh -c "echo foo" +build foo: R2f9981d3c152fc255370dc67028244f7bed72a03 +build second: phony bar +rule R62640f3f9095cf2da5b9d9e2a82f746cc710c94c + description = build $out + command = /bin/sh -c "echo bar" +build bar: R62640f3f9095cf2da5b9d9e2a82f746cc710c94c + +default first +``` + +and + +``` +~/go/bin/canoninja /tmp/2.ninja +# Generated by kati 371194da71b3e191fea6f2ccceb7b061bd0de310 + +pool local_pool + depth = 72 + +build _kati_always_build_: phony + +build second: phony bar +rule R62640f3f9095cf2da5b9d9e2a82f746cc710c94c + description = build $out + command = /bin/sh -c "echo bar" +build bar: R62640f3f9095cf2da5b9d9e2a82f746cc710c94c +build first: phony foo +rule R2f9981d3c152fc255370dc67028244f7bed72a03 + description = build $out + command = /bin/sh -c "echo foo" +build foo: R2f9981d3c152fc255370dc67028244f7bed72a03 + +default first +``` + +and when we extract only build statements and sort them, we see that both Ninja files define the same graph: + +```shell +$ diff <(~/go/bin/canoninja /tmp/1.ninja | grep '^build' | sort) \ + <(~/go/bin/canoninja /tmp/2.ninja | grep '^build' | sort) +``` + +# Todo + +* Optionally output only the build statements, optionally sorted +* Handle continuation lines correctly diff --git a/tools/canoninja/canoninja.go b/tools/canoninja/canoninja.go new file mode 100644 index 0000000000..681a69438c --- /dev/null +++ b/tools/canoninja/canoninja.go @@ -0,0 +1,130 @@ +package canoninja + +import ( + "bytes" + "crypto/sha1" + "encoding/hex" + "fmt" + "io" +) + +var ( + rulePrefix = []byte("rule ") + buildPrefix = []byte("build ") + phonyRule = []byte("phony") +) + +func Generate(path string, buffer []byte, sink io.Writer) error { + // Break file into lines + from := 0 + var lines [][]byte + for from < len(buffer) { + line := getLine(buffer[from:]) + lines = append(lines, line) + from += len(line) + } + + // FOr each rule, calculate and remember its digest + ruleDigest := make(map[string]string) + for i := 0; i < len(lines); { + if bytes.HasPrefix(lines[i], rulePrefix) { + // Find ruleName + rn := ruleName(lines[i]) + if len(rn) == 0 { + return fmt.Errorf("%s:%d: rule name is missing or on the next line", path, i+1) + } + sRuleName := string(rn) + if _, ok := ruleDigest[sRuleName]; ok { + return fmt.Errorf("%s:%d: the rule %s has been already defined", path, i+1, sRuleName) + } + // Calculate rule text digest as a digests of line digests. + var digests []byte + doDigest := func(b []byte) { + h := sha1.New() + h.Write(b) + digests = h.Sum(digests) + + } + // For the first line, digest everything after rule's name + doDigest(lines[i][cap(lines[i])+len(rn)-cap(rn):]) + for i++; i < len(lines) && lines[i][0] == ' '; i++ { + doDigest(lines[i]) + } + h := sha1.New() + h.Write(digests) + ruleDigest[sRuleName] = "R" + hex.EncodeToString(h.Sum(nil)) + + } else { + i++ + } + } + + // Rewrite rule names. + for i, line := range lines { + if bytes.HasPrefix(line, buildPrefix) { + brn := getBuildRuleName(line) + if bytes.Equal(brn, phonyRule) { + sink.Write(line) + continue + } + if len(brn) == 0 { + return fmt.Errorf("%s:%d: build statement lacks rule name", path, i+1) + } + sink.Write(line[0 : cap(line)-cap(brn)]) + if digest, ok := ruleDigest[string(brn)]; ok { + sink.Write([]byte(digest)) + } else { + return fmt.Errorf("%s:%d: no rule for this build target", path, i+1) + } + sink.Write(line[cap(line)+len(brn)-cap(brn):]) + } else if bytes.HasPrefix(line, rulePrefix) { + rn := ruleName(line) + // Write everything before it + sink.Write(line[0 : cap(line)-cap(rn)]) + sink.Write([]byte(ruleDigest[string(rn)])) + sink.Write(line[cap(line)+len(rn)-cap(rn):]) + } else { + //goland:noinspection GoUnhandledErrorResult + sink.Write(line) + } + } + return nil +} + +func getLine(b []byte) []byte { + if n := bytes.IndexByte(b, '\n'); n >= 0 { + return b[:n+1] + } + return b +} + +// Returns build statement's rule name +func getBuildRuleName(line []byte) []byte { + n := bytes.IndexByte(line, ':') + if n <= 0 { + return nil + } + ruleName := line[n+1:] + if ruleName[0] == ' ' { + ruleName = bytes.TrimLeft(ruleName, " ") + } + if n := bytes.IndexAny(ruleName, " \t\r\n"); n >= 0 { + ruleName = ruleName[0:n] + } + return ruleName +} + +// Returns rule statement's rule name +func ruleName(lineAfterRule []byte) []byte { + ruleName := lineAfterRule[len(rulePrefix):] + if len(ruleName) == 0 { + return ruleName + } + if ruleName[0] == ' ' { + ruleName = bytes.TrimLeft(ruleName, " ") + } + if n := bytes.IndexAny(ruleName, " \t\r\n"); n >= 0 { + ruleName = ruleName[0:n] + } + return ruleName +} diff --git a/tools/canoninja/canoninja_test.go b/tools/canoninja/canoninja_test.go new file mode 100644 index 0000000000..3c45f8cf4b --- /dev/null +++ b/tools/canoninja/canoninja_test.go @@ -0,0 +1,47 @@ +package canoninja + +import ( + "bytes" + "testing" +) + +func TestGenerate(t *testing.T) { + tests := []struct { + name string + in []byte + wantSink string + wantErr bool + }{ + { + name: "1", + in: []byte(` +rule rule1 + abcd +rule rule2 + abcd +build x: rule1 +`), + wantSink: ` +rule R9c97aba7f61994be6862f5ea9a62d26130c7f48b + abcd +rule R9c97aba7f61994be6862f5ea9a62d26130c7f48b + abcd +build x: R9c97aba7f61994be6862f5ea9a62d26130c7f48b +`, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + sink := &bytes.Buffer{} + err := Generate("<file>", tt.in, sink) + if (err != nil) != tt.wantErr { + t.Errorf("Generate() error = %v, wantErr %v", err, tt.wantErr) + return + } + if gotSink := sink.String(); gotSink != tt.wantSink { + t.Errorf("Generate() gotSink = %v, want %v", gotSink, tt.wantSink) + } + }) + } +} diff --git a/tools/canoninja/cmd/main.go b/tools/canoninja/cmd/main.go new file mode 100644 index 0000000000..71802efa6a --- /dev/null +++ b/tools/canoninja/cmd/main.go @@ -0,0 +1,36 @@ +package main + +/* + Canoninja reads a Ninja file and changes the rule names to be the digest of the rule contents. + Feed it to a filter that extracts only build statements, sort them, and you will have a crude + but effective tool to find small differences between two Ninja files. +*/ + +import ( + "canoninja" + "flag" + "fmt" + "os" +) + +func main() { + flag.Parse() + files := flag.Args() + if len(files) == 0 { + files = []string{"/dev/stdin"} + } + rc := 0 + for _, f := range files { + if buffer, err := os.ReadFile(f); err == nil { + err = canoninja.Generate(f, buffer, os.Stdout) + if err != nil { + fmt.Fprintln(os.Stderr, err) + rc = 1 + } + } else { + fmt.Fprintf(os.Stderr, "%s: %s\n", f, err) + rc = 1 + } + } + os.Exit(rc) +} diff --git a/tools/canoninja/go.mod b/tools/canoninja/go.mod new file mode 100644 index 0000000000..c5a924e7c3 --- /dev/null +++ b/tools/canoninja/go.mod @@ -0,0 +1 @@ +module canoninja |