aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTreehugger Robot <treehugger-gerrit@google.com>2021-10-19 19:11:35 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2021-10-19 19:11:35 +0000
commit3bd9c7d36a1a9e6498558ee33df09176eb3237ad (patch)
treecda44cfe6775e42762fb774b1a377f93a1d88c6b
parentf478b8487bc8de24e700b8febe445cc6ec0a15ae (diff)
parent26c705f764c26809d145c7cfaacb7bed54aafb6f (diff)
downloadbuild-3bd9c7d36a1a9e6498558ee33df09176eb3237ad.tar.gz
Merge "A tool to facilitate large ninja files comparison."
-rw-r--r--tools/canoninja/README.md151
-rw-r--r--tools/canoninja/canoninja.go130
-rw-r--r--tools/canoninja/canoninja_test.go47
-rw-r--r--tools/canoninja/cmd/main.go36
-rw-r--r--tools/canoninja/go.mod1
5 files changed, 365 insertions, 0 deletions
diff --git a/tools/canoninja/README.md b/tools/canoninja/README.md
new file mode 100644
index 0000000000..506acf76d1
--- /dev/null
+++ b/tools/canoninja/README.md
@@ -0,0 +1,151 @@
+# Ninja File Canonicalizer
+
+Suppose we have a tool that generates a Ninja file from some other description (think Kati and makefiles), and during
+the testing we discovered a regression. Furthermore, suppose that the generated Ninja file is large (think millions of
+lines). And, the new Ninja file has build statements and rules in a slightly different order. As the tool generates the
+rule names, the real differences in the output of the `diff` command are drowned in noise. Enter Canoninja.
+
+Canoninja renames each Ninja rule to the hash of its contents. After that, we can just sort the build statements, and a
+simple `comm` command immediately reveal the essential difference between the files.
+
+## Example
+
+Consider the following makefile
+
+```makefile
+second :=
+first: foo
+foo:
+ @echo foo
+second: bar
+bar:
+ @echo bar
+```
+
+Depending on Kati version converting it to Ninja file will yield either:
+
+```
+$ cat /tmp/1.ninja
+# Generated by kati 06f2569b2d16628608c000a76e3d495a5a5528cb
+
+pool local_pool
+ depth = 72
+
+build _kati_always_build_: phony
+
+build first: phony foo
+rule rule0
+ description = build $out
+ command = /bin/sh -c "echo foo"
+build foo: rule0
+build second: phony bar
+rule rule1
+ description = build $out
+ command = /bin/sh -c "echo bar"
+build bar: rule1
+
+default first
+```
+
+or
+
+```
+$ cat 2.ninja
+# Generated by kati 371194da71b3e191fea6f2ccceb7b061bd0de310
+
+pool local_pool
+ depth = 72
+
+build _kati_always_build_: phony
+
+build second: phony bar
+rule rule0
+ description = build $out
+ command = /bin/sh -c "echo bar"
+build bar: rule0
+build first: phony foo
+rule rule1
+ description = build $out
+ command = /bin/sh -c "echo foo"
+build foo: rule1
+
+default first
+```
+
+This is a quirk in Kati, see https://github.com/google/kati/issues/238
+
+Trying to find out the difference between the targets even after sorting them isn't too helpful:
+
+```
+diff <(grep '^build' /tmp/1.ninja|sort) <(grep '^build' /tmp/2.ninja | sort)
+1c1
+< build bar: rule1
+---
+> build bar: rule0
+3c3
+< build foo: rule0
+---
+> build foo: rule1
+```
+
+However, running these files through `canoninja` yields
+
+```
+$ canoninja /tmp/1.ninja
+# Generated by kati 06f2569b2d16628608c000a76e3d495a5a5528cb
+
+pool local_pool
+ depth = 72
+
+build _kati_always_build_: phony
+
+build first: phony foo
+rule R2f9981d3c152fc255370dc67028244f7bed72a03
+ description = build $out
+ command = /bin/sh -c "echo foo"
+build foo: R2f9981d3c152fc255370dc67028244f7bed72a03
+build second: phony bar
+rule R62640f3f9095cf2da5b9d9e2a82f746cc710c94c
+ description = build $out
+ command = /bin/sh -c "echo bar"
+build bar: R62640f3f9095cf2da5b9d9e2a82f746cc710c94c
+
+default first
+```
+
+and
+
+```
+~/go/bin/canoninja /tmp/2.ninja
+# Generated by kati 371194da71b3e191fea6f2ccceb7b061bd0de310
+
+pool local_pool
+ depth = 72
+
+build _kati_always_build_: phony
+
+build second: phony bar
+rule R62640f3f9095cf2da5b9d9e2a82f746cc710c94c
+ description = build $out
+ command = /bin/sh -c "echo bar"
+build bar: R62640f3f9095cf2da5b9d9e2a82f746cc710c94c
+build first: phony foo
+rule R2f9981d3c152fc255370dc67028244f7bed72a03
+ description = build $out
+ command = /bin/sh -c "echo foo"
+build foo: R2f9981d3c152fc255370dc67028244f7bed72a03
+
+default first
+```
+
+and when we extract only build statements and sort them, we see that both Ninja files define the same graph:
+
+```shell
+$ diff <(~/go/bin/canoninja /tmp/1.ninja | grep '^build' | sort) \
+ <(~/go/bin/canoninja /tmp/2.ninja | grep '^build' | sort)
+```
+
+# Todo
+
+* Optionally output only the build statements, optionally sorted
+* Handle continuation lines correctly
diff --git a/tools/canoninja/canoninja.go b/tools/canoninja/canoninja.go
new file mode 100644
index 0000000000..681a69438c
--- /dev/null
+++ b/tools/canoninja/canoninja.go
@@ -0,0 +1,130 @@
+package canoninja
+
+import (
+ "bytes"
+ "crypto/sha1"
+ "encoding/hex"
+ "fmt"
+ "io"
+)
+
+var (
+ rulePrefix = []byte("rule ")
+ buildPrefix = []byte("build ")
+ phonyRule = []byte("phony")
+)
+
+func Generate(path string, buffer []byte, sink io.Writer) error {
+ // Break file into lines
+ from := 0
+ var lines [][]byte
+ for from < len(buffer) {
+ line := getLine(buffer[from:])
+ lines = append(lines, line)
+ from += len(line)
+ }
+
+ // FOr each rule, calculate and remember its digest
+ ruleDigest := make(map[string]string)
+ for i := 0; i < len(lines); {
+ if bytes.HasPrefix(lines[i], rulePrefix) {
+ // Find ruleName
+ rn := ruleName(lines[i])
+ if len(rn) == 0 {
+ return fmt.Errorf("%s:%d: rule name is missing or on the next line", path, i+1)
+ }
+ sRuleName := string(rn)
+ if _, ok := ruleDigest[sRuleName]; ok {
+ return fmt.Errorf("%s:%d: the rule %s has been already defined", path, i+1, sRuleName)
+ }
+ // Calculate rule text digest as a digests of line digests.
+ var digests []byte
+ doDigest := func(b []byte) {
+ h := sha1.New()
+ h.Write(b)
+ digests = h.Sum(digests)
+
+ }
+ // For the first line, digest everything after rule's name
+ doDigest(lines[i][cap(lines[i])+len(rn)-cap(rn):])
+ for i++; i < len(lines) && lines[i][0] == ' '; i++ {
+ doDigest(lines[i])
+ }
+ h := sha1.New()
+ h.Write(digests)
+ ruleDigest[sRuleName] = "R" + hex.EncodeToString(h.Sum(nil))
+
+ } else {
+ i++
+ }
+ }
+
+ // Rewrite rule names.
+ for i, line := range lines {
+ if bytes.HasPrefix(line, buildPrefix) {
+ brn := getBuildRuleName(line)
+ if bytes.Equal(brn, phonyRule) {
+ sink.Write(line)
+ continue
+ }
+ if len(brn) == 0 {
+ return fmt.Errorf("%s:%d: build statement lacks rule name", path, i+1)
+ }
+ sink.Write(line[0 : cap(line)-cap(brn)])
+ if digest, ok := ruleDigest[string(brn)]; ok {
+ sink.Write([]byte(digest))
+ } else {
+ return fmt.Errorf("%s:%d: no rule for this build target", path, i+1)
+ }
+ sink.Write(line[cap(line)+len(brn)-cap(brn):])
+ } else if bytes.HasPrefix(line, rulePrefix) {
+ rn := ruleName(line)
+ // Write everything before it
+ sink.Write(line[0 : cap(line)-cap(rn)])
+ sink.Write([]byte(ruleDigest[string(rn)]))
+ sink.Write(line[cap(line)+len(rn)-cap(rn):])
+ } else {
+ //goland:noinspection GoUnhandledErrorResult
+ sink.Write(line)
+ }
+ }
+ return nil
+}
+
+func getLine(b []byte) []byte {
+ if n := bytes.IndexByte(b, '\n'); n >= 0 {
+ return b[:n+1]
+ }
+ return b
+}
+
+// Returns build statement's rule name
+func getBuildRuleName(line []byte) []byte {
+ n := bytes.IndexByte(line, ':')
+ if n <= 0 {
+ return nil
+ }
+ ruleName := line[n+1:]
+ if ruleName[0] == ' ' {
+ ruleName = bytes.TrimLeft(ruleName, " ")
+ }
+ if n := bytes.IndexAny(ruleName, " \t\r\n"); n >= 0 {
+ ruleName = ruleName[0:n]
+ }
+ return ruleName
+}
+
+// Returns rule statement's rule name
+func ruleName(lineAfterRule []byte) []byte {
+ ruleName := lineAfterRule[len(rulePrefix):]
+ if len(ruleName) == 0 {
+ return ruleName
+ }
+ if ruleName[0] == ' ' {
+ ruleName = bytes.TrimLeft(ruleName, " ")
+ }
+ if n := bytes.IndexAny(ruleName, " \t\r\n"); n >= 0 {
+ ruleName = ruleName[0:n]
+ }
+ return ruleName
+}
diff --git a/tools/canoninja/canoninja_test.go b/tools/canoninja/canoninja_test.go
new file mode 100644
index 0000000000..3c45f8cf4b
--- /dev/null
+++ b/tools/canoninja/canoninja_test.go
@@ -0,0 +1,47 @@
+package canoninja
+
+import (
+ "bytes"
+ "testing"
+)
+
+func TestGenerate(t *testing.T) {
+ tests := []struct {
+ name string
+ in []byte
+ wantSink string
+ wantErr bool
+ }{
+ {
+ name: "1",
+ in: []byte(`
+rule rule1
+ abcd
+rule rule2
+ abcd
+build x: rule1
+`),
+ wantSink: `
+rule R9c97aba7f61994be6862f5ea9a62d26130c7f48b
+ abcd
+rule R9c97aba7f61994be6862f5ea9a62d26130c7f48b
+ abcd
+build x: R9c97aba7f61994be6862f5ea9a62d26130c7f48b
+`,
+ wantErr: false,
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ sink := &bytes.Buffer{}
+ err := Generate("<file>", tt.in, sink)
+ if (err != nil) != tt.wantErr {
+ t.Errorf("Generate() error = %v, wantErr %v", err, tt.wantErr)
+ return
+ }
+ if gotSink := sink.String(); gotSink != tt.wantSink {
+ t.Errorf("Generate() gotSink = %v, want %v", gotSink, tt.wantSink)
+ }
+ })
+ }
+}
diff --git a/tools/canoninja/cmd/main.go b/tools/canoninja/cmd/main.go
new file mode 100644
index 0000000000..71802efa6a
--- /dev/null
+++ b/tools/canoninja/cmd/main.go
@@ -0,0 +1,36 @@
+package main
+
+/*
+ Canoninja reads a Ninja file and changes the rule names to be the digest of the rule contents.
+ Feed it to a filter that extracts only build statements, sort them, and you will have a crude
+ but effective tool to find small differences between two Ninja files.
+*/
+
+import (
+ "canoninja"
+ "flag"
+ "fmt"
+ "os"
+)
+
+func main() {
+ flag.Parse()
+ files := flag.Args()
+ if len(files) == 0 {
+ files = []string{"/dev/stdin"}
+ }
+ rc := 0
+ for _, f := range files {
+ if buffer, err := os.ReadFile(f); err == nil {
+ err = canoninja.Generate(f, buffer, os.Stdout)
+ if err != nil {
+ fmt.Fprintln(os.Stderr, err)
+ rc = 1
+ }
+ } else {
+ fmt.Fprintf(os.Stderr, "%s: %s\n", f, err)
+ rc = 1
+ }
+ }
+ os.Exit(rc)
+}
diff --git a/tools/canoninja/go.mod b/tools/canoninja/go.mod
new file mode 100644
index 0000000000..c5a924e7c3
--- /dev/null
+++ b/tools/canoninja/go.mod
@@ -0,0 +1 @@
+module canoninja