diff options
author | Inna Palant <ipalant@google.com> | 2024-03-28 05:41:27 +0000 |
---|---|---|
committer | Inna Palant <ipalant@google.com> | 2024-03-28 05:41:27 +0000 |
commit | 5e6f103c616029220267ff7523aad38344a88411 (patch) | |
tree | 0b9a4453a315d92909fa1fb2812ddc3566efc11d | |
parent | f184abd6d45c08d7dae97d7c64d7b8c05365ed5d (diff) | |
parent | 110d191d73e8b42aec329a358d332cd6862cbc78 (diff) | |
download | n2-5e6f103c616029220267ff7523aad38344a88411.tar.gz |
Merge remote-tracking branch 'origin/upstream'
Import b/328273370
67 files changed, 8489 insertions, 0 deletions
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..654296c --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,36 @@ +name: CI + +on: + push: + branches: + - main + pull_request: + +env: + CARGO_TERM_COLOR: always + +jobs: + test: + strategy: + matrix: + os: [ubuntu-latest, windows-latest] + runs-on: ${{ matrix.os }} + env: + RUST_BACKTRACE: 1 + + steps: + - uses: actions/checkout@v2 + - uses: dtolnay/rust-toolchain@1.70.0 + with: + components: rustfmt + - name: Check formatting + run: cargo fmt -- --check + - name: Cargo cache + uses: actions/cache@v2 + with: + key: ${{ runner.os }}-cargo-${{ hashFiles('Cargo.lock') }} + path: ~/.cargo/registry + - name: Build + run: cargo build --verbose + - name: Run tests + run: cargo test --verbose diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..94b2d8f --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,48 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "type": "lldb", + "request": "launch", + "name": "Debug executable 'n2'", + "cargo": { + "args": [ + "build", + "--bin=n2", + "--package=n2" + ], + "filter": { + "name": "n2", + "kind": "bin" + } + }, + "args": [ + "-C", + "../projects/ninja" + ], + "cwd": "${workspaceFolder}" + }, + { + "type": "lldb", + "request": "launch", + "name": "Debug unit tests in executable 'n2'", + "cargo": { + "args": [ + "test", + "--no-run", + "--bin=n2", + "--package=n2" + ], + "filter": { + "name": "n2", + "kind": "bin" + } + }, + "args": [], + "cwd": "${workspaceFolder}" + } + ] +}
\ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..08e675c --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,6 @@ +{ + "editor.rulers": [ + 80 + ], + "editor.detectIndentation": false, +}
\ No newline at end of file diff --git a/Android.bp b/Android.bp new file mode 100644 index 0000000..a1b1b4b --- /dev/null +++ b/Android.bp @@ -0,0 +1,33 @@ + +// TODO: Add a module for the tests. The tests require libfiletime, which is not in android. + +rust_library_host { + name: "libn2", + crate_name: "n2", + srcs: ["src/lib.rs"], + edition: "2018", + cargo_env_compat: true, + // n2 prints the value of CARGO_PKG_VERSION when using the --version argument, we need to set + // this property to define the environment variable, but don't actually care about the value. + cargo_pkg_version: "android", + rustlibs: [ + "libanyhow", + "libargh", + "liblibc", + "librustc_hash", + ], +} + +rust_binary_host { + name: "n2", + crate_name: "n2", + srcs: ["src/main.rs"], + edition: "2018", + rustlibs: [ + "libn2", + "libanyhow", + "libargh", + "liblibc", + "librustc_hash", + ], +} diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..19c25c1 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,901 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" +dependencies = [ + "memchr", +] + +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + +[[package]] +name = "anstyle" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87" + +[[package]] +name = "anyhow" +version = "1.0.51" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b26702f315f53b6071259e15dd9d64528213b44d61de1ec926eca7715d62203" + +[[package]] +name = "argh" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab257697eb9496bf75526f0217b5ed64636a9cfafa78b8365c71bd283fcef93e" +dependencies = [ + "argh_derive", + "argh_shared", +] + +[[package]] +name = "argh_derive" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b382dbd3288e053331f03399e1db106c9fb0d8562ad62cb04859ae926f324fa6" +dependencies = [ + "argh_shared", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "argh_shared" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64cb94155d965e3d37ffbbe7cc5b82c3dd79dd33bd48e536f73d2cfb8d85506f" + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" + +[[package]] +name = "bumpalo" +version = "3.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" + +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + +[[package]] +name = "cc" +version = "1.0.72" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22a9137b95ea06864e018375b72adfb7db6e6f68cfc8df5a04d00288050485ee" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "ciborium" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "effd91f6c78e5a4ace8a5d3c0b6bfaec9e2baaef55f3efc00e45fb2e477ee926" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdf919175532b369853f5d5e20b26b43112613fd6fe7aee757e35f7a44642656" + +[[package]] +name = "ciborium-ll" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "defaa24ecc093c77630e6c15e17c51f5e187bf35ee514f4e2d67baaa96dae22b" +dependencies = [ + "ciborium-io", + "half", +] + +[[package]] +name = "clap" +version = "4.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfaff671f6b22ca62406885ece523383b9b64022e341e53e009a62ebc47a45f2" +dependencies = [ + "clap_builder", +] + +[[package]] +name = "clap_builder" +version = "4.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a216b506622bb1d316cd51328dce24e07bdff4a6128a47c7e7fad11878d5adbb" +dependencies = [ + "anstyle", + "clap_lex", +] + +[[package]] +name = "clap_lex" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1" + +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "is-terminal", + "itertools", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fca89a0e215bab21874660c67903c5f143333cab1da83d041c7ded6053774751" +dependencies = [ + "cfg-if", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e3681d554572a651dda4186cd47240627c3d0114d45a95f6ad27f2f22e7548d" +dependencies = [ + "autocfg", + "cfg-if", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3a430a770ebd84726f584a90ee7f020d28db52c6d02138900f22341f866d39c" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "either" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" + +[[package]] +name = "errno" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a" +dependencies = [ + "errno-dragonfly", + "libc", + "windows-sys 0.48.0", +] + +[[package]] +name = "errno-dragonfly" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "fastrand" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3fcf0cee53519c866c09b5de1f6c56ff9d647101f81c1964fa632e148896cdf" +dependencies = [ + "instant", +] + +[[package]] +name = "filetime" +version = "0.2.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ee447700ac8aa0b2f2bd7bc4462ad686ba06baa6727ac149a2d6277f0d240fd" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall 0.4.1", + "windows-sys 0.52.0", +] + +[[package]] +name = "half" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" + +[[package]] +name = "hermit-abi" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" + +[[package]] +name = "instant" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "io-lifetimes" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys 0.48.0", +] + +[[package]] +name = "is-terminal" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" +dependencies = [ + "hermit-abi", + "rustix 0.38.3", + "windows-sys 0.48.0", +] + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" + +[[package]] +name = "jemalloc-sys" +version = "0.5.3+5.3.0-patched" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9bd5d616ea7ed58b571b2e209a65759664d7fb021a0819d7a790afc67e47ca1" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "jemallocator" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16c2514137880c52b0b4822b563fadd38257c1f380858addb74a400889696ea6" +dependencies = [ + "jemalloc-sys", + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.66" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cee9c64da59eae3b50095c18d3e74f8b73c0b86d2792824ff01bbce68ba229ca" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "libc" +version = "0.2.146" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f92be4933c13fd498862a9e02a3055f8a8d9c039ce33db97306fd5a6caa7f29b" + +[[package]] +name = "linux-raw-sys" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" + +[[package]] +name = "linux-raw-sys" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4cd1a83af159aa67994778be9070f0ae1bd732942279cabb14f86f986a21456" + +[[package]] +name = "log" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" + +[[package]] +name = "memchr" +version = "2.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" + +[[package]] +name = "n2" +version = "0.1.0" +dependencies = [ + "anyhow", + "argh", + "criterion", + "filetime", + "jemallocator", + "libc", + "rustc-hash", + "tempfile", + "windows-sys 0.48.0", +] + +[[package]] +name = "num-traits" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "oorandom" +version = "11.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" + +[[package]] +name = "plotters" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2c224ba00d7cadd4d5c660deaf2098e5e80e07846537c51f9cfa4be50c1fd45" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e76628b4d3a7581389a35d5b6e2139607ad7c75b17aed325f210aa91f4a9609" + +[[package]] +name = "plotters-svg" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38f6d39893cca0701371e3c27294f09797214b86f1fb951b89ade8ec04e2abab" +dependencies = [ + "plotters-backend", +] + +[[package]] +name = "proc-macro2" +version = "1.0.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dec2b086b7a862cf4de201096214fa870344cf922b2b30c167badb3af3195406" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rayon" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "redox_syscall" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "redox_syscall" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "regex" +version = "1.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "rustix" +version = "0.37.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b96e891d04aa506a6d1f318d2771bcb1c7dfda84e126660ace067c9b474bb2c0" +dependencies = [ + "bitflags 1.3.2", + "errno", + "io-lifetimes", + "libc", + "linux-raw-sys 0.3.8", + "windows-sys 0.48.0", +] + +[[package]] +name = "rustix" +version = "0.38.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac5ffa1efe7548069688cd7028f32591853cd7b5b756d41bcffd2353e4fc75b4" +dependencies = [ + "bitflags 2.4.1", + "errno", + "libc", + "linux-raw-sys 0.4.12", + "windows-sys 0.48.0", +] + +[[package]] +name = "ryu" +version = "1.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "serde" +version = "1.0.164" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e8c8cf938e98f769bc164923b06dce91cea1751522f46f8466461af04c9027d" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.164" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9735b638ccc51c28bf6914d90a2e9725b377144fc612c49a611fddd1b631d68" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.20", +] + +[[package]] +name = "serde_json" +version = "1.0.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46266871c240a00b8f503b877622fe33430b3c7d963bdc0f2adc511e54a1eae3" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcb8d4cebc40aa517dfb69618fa647a346562e67228e2236ae0042ee6ac14775" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tempfile" +version = "3.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31c0432476357e58790aaa47a8efb0c5138f137343f3b5f23bd36a27e3b0a6d6" +dependencies = [ + "autocfg", + "cfg-if", + "fastrand", + "redox_syscall 0.3.5", + "rustix 0.37.20", + "windows-sys 0.48.0", +] + +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "unicode-ident" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0" + +[[package]] +name = "walkdir" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ed0d4f68a3015cc185aff4db9506a015f4b96f95303897bfa23f846db54064e" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b56f625e64f3a1084ded111c4d5f477df9f8c92df113852fa5a374dbda78826" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn 2.0.20", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0162dbf37223cd2afce98f3d0785506dcb8d266223983e4b5b525859e6e182b2" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.20", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ab9b36309365056cd639da3134bf87fa8f3d86008abf99e612384a6eecd459f" + +[[package]] +name = "web-sys" +version = "0.3.66" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50c24a44ec86bb68fbecd1b3efed7e85ea5621b39b35ef2766b66cd984f8010f" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.0", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.0", +] + +[[package]] +name = "windows-targets" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5" +dependencies = [ + "windows_aarch64_gnullvm 0.48.0", + "windows_aarch64_msvc 0.48.0", + "windows_i686_gnu 0.48.0", + "windows_i686_msvc 0.48.0", + "windows_x86_64_gnu 0.48.0", + "windows_x86_64_gnullvm 0.48.0", + "windows_x86_64_msvc 0.48.0", +] + +[[package]] +name = "windows-targets" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +dependencies = [ + "windows_aarch64_gnullvm 0.52.0", + "windows_aarch64_msvc 0.52.0", + "windows_i686_gnu 0.52.0", + "windows_i686_msvc 0.52.0", + "windows_x86_64_gnu 0.52.0", + "windows_x86_64_gnullvm 0.52.0", + "windows_x86_64_msvc 0.52.0", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..b91b120 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,55 @@ +[package] +name = "n2" +version = "0.1.0" +categories = ["development-tools", "development-tools::build-utils"] +edition = "2018" +exclude = [".github/*", ".vscode/*"] +homepage = "https://github.com/evmar/n2" +keywords = ["ninja", "build"] +license = "Apache-2.0" +readme = "README.md" +repository = "https://github.com/evmar/n2" +# https://github.com/evmar/n2/issues/74 +# Note: if we bump this, may need to bump .github/workflows/ci.yml version too. +rust-version = "1.70.0" +description = "a ninja compatible build system" + +[dependencies] +anyhow = "1.0" +argh = "0.1.10" +libc = "0.2" +rustc-hash = "1.1.0" + +[target.'cfg(windows)'.dependencies.windows-sys] +version = "0.48" +features = [ + "Win32_Foundation", + "Win32_Security", + "Win32_System_Console", + "Win32_System_Diagnostics_Debug", + "Win32_System_Pipes", + "Win32_System_Threading", +] + +[target.'cfg(not(any(windows, target_arch = "wasm32")))'.dependencies] +jemallocator = "0.5.0" + +[dev-dependencies] +tempfile = "3.6.0" +criterion = { version = "0.5.1", features = ["html_reports"] } +filetime = "0.2" + +[profile.release] +debug = true +lto = true + +[[bench]] +name = "parse" +harness = false + +# For Criterion reasons, we need bench=false for our lib/bin. +[lib] +bench = false +[[bin]] +name = "n2" +bench = false @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/METADATA b/METADATA new file mode 100644 index 0000000..d86949a --- /dev/null +++ b/METADATA @@ -0,0 +1,14 @@ +name: "n2" +description: "an alternative ninja implementation" + +third_party { + homepage: "https://github.com/evmar/n2" + identifier { + type: "Git" + value: "https://github.com/evmar/n2" + primary_source: true + } + version: "8881a661ae63e1124eb47b45f08d9fa1225a5f00" + license_type: NOTICE + last_upgrade_date { year: 2024 month: 3 day: 21 } +} diff --git a/MODULE_LICENSE_APACHE2 b/MODULE_LICENSE_APACHE2 new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/MODULE_LICENSE_APACHE2 @@ -0,0 +1,2 @@ +include platform/build/soong:main:/OWNERS +include platform/system/core:main:/janitors/OWNERS diff --git a/README.md b/README.md new file mode 100644 index 0000000..96de9b5 --- /dev/null +++ b/README.md @@ -0,0 +1,79 @@ +# n2, an alternative ninja implementation + +![CI status](https://github.com/evmar/n2/actions/workflows/ci.yml/badge.svg) + +n2 (pronounced "into") implements enough of [Ninja](https://ninja-build.org/) to +successfully build some projects that build with Ninja. Compared to Ninja, n2 +missing some features but is faster to build and has a better UI; see +[a more detailed comparison](doc/comparison.md). + +> [Here's a small demo](https://asciinema.org/a/F2E7a6nX4feoSSWVI4oFAm21T) of n2 +> building some of Clang. + +## Install + +``` +$ cargo install --git https://github.com/evmar/n2 +# (installs into ~/.cargo/bin/) + +$ n2 -C some/build/dir some-target +``` + +### Using with CMake + +When CMake generates Ninja files it attempts run a program named `ninja` with +some particular Ninja behaviors. In particular, it attempts to inform Ninja/n2 +that its generated build files are up to date so that the build system doesn't +attempt to rebuild them. + +n2 can emulate the expected CMake behavior when invoked as `ninja`. To do this +you create a symlink named `ninja` somewhere in your `$PATH`, such that CMake +can discover it. + +- UNIX: `ln -s path/to/n2 ninja` +- Windows(cmd): `mklink ninja.exe path\to\n2` +- Windows(PowerShell): `New-Item -Type Symlink ninja.exe -Target path\to\n2` + +> **Warning**\ +> If you don't have Ninja installed at all, you must install such a symlink +> because CMake attempts to invoke `ninja` itself! + +## The console output + +While building, n2 displays build progress like this: + +``` +[=========================--------- ] 2772/4459 done, 8/930 running +Building foo/bar (2s) +Building foo/baz +``` + +The progress bar always covers all build steps needed for the targets, +regardless of whether they need to be executed or not. + +The bar shows three categories of state: + +- **Done:** The `=` signs show the build steps that are already up to date. +- **In progress:** The `-` signs show steps that are in-progress; if you had + enough CPUs they would all be executing. The `8/930 running` after shows that + n2 is currently executing 8 of the 930 available steps. +- **Unknown:** The remaining empty space indicates steps whose status is yet to + be known, as they depend on the in progress steps. For example, if an + intermediate step doesn't write its outputs n2 may not need to execute the + dependent steps. + +The lines below the progress bar show some build steps that are currrently +running, along with how long they've been running if it has been a while. Their +text is controlled by the input `build.ninja` file. + +## More reading + +I wrote n2 to +[explore some alternative ideas](http://neugierig.org/software/blog/2022/03/n2.html) +I had around how to structure a build system. In a very real sense the +exploration is more important than the actual software itself, so you can view +the design notes as one of the primary artifacts of this. + +- [Design notes](doc/design_notes.md). +- [Development tips](doc/development.md). +- [Comparison with Ninja / missing features](doc/comparison.md). diff --git a/benches/parse.rs b/benches/parse.rs new file mode 100644 index 0000000..bdd7323 --- /dev/null +++ b/benches/parse.rs @@ -0,0 +1,92 @@ +use criterion::{criterion_group, criterion_main, Criterion}; +use std::{io::Write, path::PathBuf, str::FromStr}; + +pub fn bench_canon(c: &mut Criterion) { + // TODO switch to canon_path_fast + c.bench_function("canon plain", |b| { + b.iter(|| { + let path = "examples/OrcV2Examples/OrcV2CBindingsVeryLazy/\ + CMakeFiles/OrcV2CBindingsVeryLazy.dir/OrcV2CBindingsVeryLazy.c.o"; + n2::canon::canon_path(path); + }) + }); + + c.bench_function("canon with parents", |b| { + b.iter(|| { + let path = "examples/OrcV2Examples/OrcV2CBindingsVeryLazy/\ + ../../../\ + CMakeFiles/OrcV2CBindingsVeryLazy.dir/OrcV2CBindingsVeryLazy.c.o"; + n2::canon::canon_path(path); + }) + }); +} + +fn generate_build_ninja() -> Vec<u8> { + let mut buf: Vec<u8> = Vec::new(); + write!(buf, "rule cc\n command = touch $out",).unwrap(); + for i in 0..1000 { + write!( + buf, + "build $out/foo/bar{}.o: cc $src/long/file/name{}.cc + depfile = $out/foo/bar{}.o.d\n", + i, i, i + ) + .unwrap(); + } + buf +} + +fn bench_parse_synthetic(c: &mut Criterion) { + let mut input = generate_build_ninja(); + input.push(0); + c.bench_function("parse synthetic build.ninja", |b| { + b.iter(|| { + let mut parser = n2::parse::Parser::new(&input); + loop { + if parser.read().unwrap().is_none() { + break; + } + } + }) + }); +} + +fn bench_parse_file(c: &mut Criterion, input: &[u8]) { + c.bench_function("parse benches/build.ninja", |b| { + b.iter(|| { + let mut parser = n2::parse::Parser::new(&input); + loop { + if parser.read().unwrap().is_none() { + break; + } + } + }) + }); +} + +pub fn bench_parse(c: &mut Criterion) { + match n2::scanner::read_file_with_nul("benches/build.ninja".as_ref()) { + Ok(input) => bench_parse_file(c, &input), + Err(err) => { + eprintln!("failed to read benches/build.ninja: {}", err); + eprintln!("using synthetic build.ninja"); + bench_parse_synthetic(c) + } + }; +} + +fn bench_load_synthetic(c: &mut Criterion) { + let mut input = generate_build_ninja(); + input.push(0); + c.bench_function("load synthetic build.ninja", |b| { + b.iter(|| { + let mut loader = n2::load::Loader::new(); + loader + .parse(PathBuf::from_str("build.ninja").unwrap(), &input) + .unwrap(); + }) + }); +} + +criterion_group!(benches, bench_canon, bench_parse, bench_load_synthetic); +criterion_main!(benches); diff --git a/doc/build1.png b/doc/build1.png Binary files differnew file mode 100644 index 0000000..ac4f333 --- /dev/null +++ b/doc/build1.png diff --git a/doc/build2.png b/doc/build2.png Binary files differnew file mode 100644 index 0000000..07f6cc3 --- /dev/null +++ b/doc/build2.png diff --git a/doc/build3.png b/doc/build3.png Binary files differnew file mode 100644 index 0000000..6330f59 --- /dev/null +++ b/doc/build3.png diff --git a/doc/build4.png b/doc/build4.png Binary files differnew file mode 100644 index 0000000..8764afa --- /dev/null +++ b/doc/build4.png diff --git a/doc/build5.png b/doc/build5.png Binary files differnew file mode 100644 index 0000000..2f55388 --- /dev/null +++ b/doc/build5.png diff --git a/doc/comparison.md b/doc/comparison.md new file mode 100644 index 0000000..9d362b9 --- /dev/null +++ b/doc/comparison.md @@ -0,0 +1,39 @@ +# Feature comparison with Ninja + +n2 is intended to be able to build any project that Ninja can load. Here is a +comparison of things n2 does worse and better than Ninja. + +## Improvements + +Here are some things n2 improves over Ninja: + +- Builds are more incremental: n2 starts running tasks as soon as an out of date + one is found, rather than gathering all the out of date tasks before executing + as Ninja does. +- Fancier status output, modeled after Bazel. + [Here's a small demo](https://asciinema.org/a/F2E7a6nX4feoSSWVI4oFAm21T). +- `-d trace` generates a performance trace that can be visualized by Chrome's + `about:tracing` or alternatives (speedscope, perfetto). + +## Missing + +- Windows is incomplete. + - Ninja has special handling of backslashed paths that + [n2 doesn't yet follow](https://github.com/evmar/n2/issues/42). +- Dynamic dependencies. +- `console` pool. n2 currently just treats `console` as an ordinary pool of + depth 1, and only shows console output after the task completes. In practice + this means commands that print progress when run currently show nothing until + they're complete. +- `subninja` is only partially implemented. + +### Missing flags + +- `-l`, load average throttling +- `-n`, dry run + +#### Missing subcommands + +Most of `-d` (debugging), `-t` (tools). + +No `-w` (warnings). diff --git a/doc/design_notes.md b/doc/design_notes.md new file mode 100644 index 0000000..afe5dad --- /dev/null +++ b/doc/design_notes.md @@ -0,0 +1,313 @@ +# Design notes + +(I wrote some high-level design observations in +[a blog post](https://neugierig.org/software/blog/2022/03/n2.html), whose +contents maybe ought to migrate here.) + +## Build states + +While building, we have a bunch of `Build` objects that represent individual +build steps that go through a series of states. Here I give a picture about how +those states work. + +Imagine a hypothetical build, represented here by a series of boxes with arrows +representing outputs. n2 models both builds and files in its graph because build +steps may produce more than one output, so it's more complex than this, but it +will do for this discussion. Builds start in the `Unknown` state, just a default +state shown here in gray. + +<img src='build1.png' width='267'> + +The user requests bringing some build up to date which we mark by the `Want` +state, and traverse the dependencies backwards to mark all inputs also as +`Want`. + +<img src='build2.png' width='267'> + +Any build that doesn't depend on another build is marked `Ready`. + +<img src='build3.png' width='267'> + +The main loop pops a `Ready` build and examines its inputs/output to judge +whether it needs to be brought up to date. Here, we examined the upper left +build and determined it was already up to date and marked it `Done`. + +For each downstream output of that build, we check whether all the inputs to +that output are `Done` and if so, we mark it `Ready`. This makes the build to +the right `Ready`. Note that `Ready` means "ready to be checked for +up-to-date-ness", and is purely a function of which builds we've checked off and +not any on-disk file state. + +<img src='build4.png' width='267'> + +In the next iteration of the loop we pop the lower left `Ready` and determine it +is out of date. It then moves to state `Queued` and added to the relevant pool. + +<img src='build5.png' width='267'> + +Concurrently with visiting `Ready` builds, if we have available execution +parallelism, we examine all the pools that have spare depth for any `Queued` +builds and start executing those tasks, moving the build to the `Running` state. +For example, this build might be in the `console` pool, which has `depth=1` +meaning that pool can only run one build step at a time, which means it might +remain `Queued` if we're already running a build in that pool. + +And similarly, if any running builds complete, we move them to the `Done` state +and repeat the same logic done above to mark downstream builds `Ready`. + +There is more to this. For example there's a `Failed` state, which is used in +builds with the `-k` flag, that lets us keep running even when some build steps +fail. But the above is most of the picture. + +## Missing files + +What happens if a file referenced in a build rule isn't present? + +For the purpose of ordering: a build is "ready" when all dependent builds have +been brought up to date, and that is independent of whether the files are +present or not. + +Ninja was pretty lax about missing files. If one step generated a file and a +later step consumed it, nothing verified whether that file was actually +produced; Ninja just ran the steps in order. So to follow Ninja, n2 also allows +any file required by a build step to be absent as long as it is declared to be +generated by another build step. + +In particular, here are some use cases: + +- A missing output, after a build runs, is allowed. This is used in build files + as a marker for build rules that want to always run. +- Build steps that only have order-only dependencies on another step don't need + any files on disk for n2 manage their relative order. +- Discovered inputs may disappear without breaking builds. Imagine a C file that + includes a header. After building, we note that changes to the header should + prompt a rebuild. But if you remove the include and the header file at the + same time, we should still allow you to build despite the historical + dependency. + +But if any of those files are missing we call the step dirty without bothering +to compute a hash. In this manner we never compute a hash involving any missing +files. + +Further, CMake generates Ninja files that claim a build step generates a +[depfile](https://ninja-build.org/manual.html#_depfile) when it doesn't. Ninja +treats this as an empty depfile, not an error. (See +[#80](https://github.com/evmar/n2/issues/80).) + +## Parsing + +Parsing .ninja files is part of the critical path for n2, because it must be +complete before any other work can be done. Some properties of the n2 parser +that break abstraction to this end: + +- There is no separate lexer. Ninja syntax is not really lexer friendly in the + first place. +- When possible, `$variable` expansions happen as they're encountered, so that + we don't need to build up a parsed representation of strings and carry around + variable environments. +- Parsed entities that deal with paths are generic over a `Path` type, and path + strings are converted to Paths as they are parsed. (In practice the `Path` + type is `graph::FileId`, but the parsing code isn't aware of this type + directly.) This (and the previous bullet) allows the parser to reuse a single + `String` buffer when parsing paths, which is the bulk of what the parser does. + +## Unicode + +Ninja +[was intentionally "encoding agnostic"](https://ninja-build.org/manual.html#ref_lexer), +which is to say it treated input build files as any byte stream that is ASCII +compatible. In other words, any string of bytes found in a `build.ninja` is +passed verbatim through printing to stdout and to the OS for path operations, +which meant Ninja was compatible with both UTF-8 and other encodings. The intent +is that those other encodings occur on Linuxes, especially in East Asia, and +also it means Ninja doesn't need any specific handling of even UTF-8. + +It looks like since my time, +[Ninja on Windows changed its input files to require UTF-8](https://github.com/ninja-build/ninja/pull/1915). +As you can see from the comments on that bug, this area is unfortunately pretty +subtle. + +Windows is particularly fiddly not only because its native path representation +is UTF-16 -- which is incompatible with the original byte stream assumption made +by Ninja and which requires conversions -- but further because Ninja needs to +parse the `/showIncludes` output from the MSVC compiler, which is localized. See +the `msvc_deps_prefix` variable in +[the Ninja docs on deps handling](https://ninja-build.org/manual.html#_deps); +there have been lots of bug reports over the years from people with Chinese +output that is failing to parse right due to Windows code page mess. + +In any case, n2 doesn't support any of this for now, and instead just follows +Ninja in treating paths as bytes. (n2 doesn't support `/showIncludes` or MSVC at +all yet.) + +It's possibly a better design to require input files to always be UTF-8, though +I think I'd want to better understand the `/showIncludes` situation. (The above +Ninja bug observed this was a breaking change in Ninja, too.) Possibly we could +mandate "input files are UTF-8, and if you need something other than UTF-8 in +your `msvc_deps_prefix` it's on you to escape the exact byte sequence you +desire". However, I'm not clear on whether all possible Windows paths are +representable as UTF-8 or if we'd need to go down the WTF-8 route for full +compatibility. + +## Tracking build state + +While building, we have a bunch of `Build` objects that represent individual +build steps that go through a series of states. To represent these well I went +through a few patterns and eventually came up with a design I'm pretty happy +with. + +First, for each `Build` we store its current state. This lets us quickly answer +questions like "is the build id X ready or not?" (You could imagine storing this +directly in the `Build` or in a side HashMap from id to state, but that's an +implementation detail.) We use this for things like tracking whether we've +already visited a given `Build` when doing a traveral of the graph while +loading. This also has the benefit of ensuring a given `Build` is always in +exactly one known state. + +Second, we store data structures on the side for states where we care about +having quicker views onto this state. The idea here is that depending on the +particular needs of a given state we can model those needs specially. For +example, we need to be able to grab the next `Ready` build to work on it, so +there's a `VecDeque` holding those, while builds that go into the `Queued` state +queue into separate run pools, and builds that are `Running` are just tracked +with an integer counter on the run pool. + +## Spawning subprocesses + +Ninja (and n2) use `posix_spawn` to spawn subprocesses (on non-Windows). I saw a +blog post recently that called `posix_spawn` something like a bloated wrapper +for simpler primitives, but another view on it is is that `posix_spawn` has the +platform-local libc's author's best knowledge about how to safely run processes. + +In particular, I was surprised to learn that Rust's built-in process spawning +library [leaks file descriptors](https://github.com/evmar/n2/issues/14). (See +also [the upstream Rust bug](https://github.com/rust-lang/rust/issues/95584), +which says Cargo also ran into this.) + +## Subprocess command lines + +Ninja (and n2) model the commands they execute as strings, in that the build +file language has you construct a `command = ...` string. However, on Unixes +commands are fundamentally arrays of arguments. To convert a string to an array +there must be some sort of parsing, and for this purpose we use `/bin/sh` to +execute subcommands. + +`/bin/sh` is well-specified by POSIX and has the semantics everyone expects for +how commands work, from quoting to `$FOO`-style environment expansion to +`a && b` job control, pipelines, and so on. And it's also used everywhere on +Unixes so it is fast. + +However it has some downsides, particularly in that it means the person +generating the `.ninja` file has to know about these rules as well. For example, +consider a build rule like this: + +``` +build foo$ bar baz: ... + command = pummel $out +``` + +Per the Ninja syntax rules (which are _not_ the shell rules), that build has two +outputs, the files `foo bar` and `baz`. The Ninja variable `$out` then expands +to the string `foo bar baz`, and once the shell parses the command line the +`pummel` command receives three arguments in argv: `foo`, `bar`, and `baz`, +which is not what you wanted. + +For these kinds of problems there are further shell tricks around handling +spaces like how the string `"$@"` is expanded. In Ninja's case, we have the +escape hatch that the `.ninja` file author can just provide the extra text +explicitly, like in +[this bug report and workaround](https://github.com/ninja-build/ninja/issues/1312). +It's pretty unsatisfying though. + +In principle, it might be a better design for the Ninja language to instead have +an array type that would allow us to construct an argv array directly. This +would avoid shell-related quoting issues and save us a `/bin/sh` invocation on +each process spawn. However, this is a pretty large departure from how Ninja +currently works. + +Additionally, on Windows the platform behavior is reversed. On Windows command +lines are fundamentally strings, and it's up to each executable to interpret +those strings as they want. (Does this seem like a recipe for bugs? Yes it does. +See [this blog post](https://neugierig.org/software/blog/2011/10/quoting.html)'s +"Command lines" section for more on this.) So even if Ninja did work with arrays +we'd then need some sort of stringification for Windows. + +On the subject of Windows: Ninja (and n2) spawn processes directly without an +intervening shell. This has the consequence that commands like +`my-compiler && echo hello` do not what you'd expect; the opposite of the Ninja +behavior on Unixes. The `build.ninja` author instead has to themselves write out +`cmd /c my-command` if they want the shell involved. We did this only because +(in my recollection) the overhead of spawning `cmd` on Windows was noticeable. +And further, `cmd` randomly limits its argument to 8kb. + +PS: in writing this section I noticed that cmd has +[terrifying quoting rules](https://stackoverflow.com/questions/355988/how-do-i-deal-with-quote-characters-when-using-cmd-exe). + +## Variable scope + +Ninja syntactic structures (`build`, `rule`) are at some level just lists of +key-value bindings that ultimately combine to set properties on individual build +steps, such as the `command = ...` command line. + +Additionally, bindings can be referenced by other bindings via the `$foo` or +`${foo}` syntax. This means variable lookup traverses through a hierarchy of +scopes. The intent was this was simple enough that the behavior is +straightforward, which in retrospect's insight really just means +"underspecified". (Forgive me! I hacked Ninja together in a few weekends and +made the all too easy mistake of "this is so simple I don't really need to think +it through".) + +### Basics + +First, here's a high level description that conveys the idea but omits details. + +Consider a build file like the following: + +``` +var = $A + +rule r + command = $B + +build output-$C: r + var2 = $D +``` + +The `build` block stamps out a build step using the rule `r`, and the properties +of that build step are found by looking up attributes like `command`. + +When evaluating the expressions marked `$A`/`$B`/`$C`/`$D` above, these are the +scopes to consider, in order of nesting: + +1. The toplevel scope, which defines `var` above. +1. The build variable scope, which defines `var2` above. +1. The build file list scope, which defines the implicit `$in` etc. variables. +1. The rule scope, which defines `command` above. + +References found in each may refer to the scopes above in the list. For example, +at the time the `command = ...` is evaluated, the in-scope variables include +`$in`, `$var2`, and `$var`. + +### Details + +Unfortunately, [Hyrum's Law](https://www.hyrumslaw.com/) means that Ninja files +in the wild depend on whatever the Ninja implementation allows, which is more +complex than the above. I don't think it's really worth fleshing out all the +idiosyncracies of Ninja's implementation as long as existing builds work, but +some details do matter. + +In particular, Ninja has particular behaviors around variable references found +within the same scope. Consider: + +``` +var = 1 +var = ${var}2 +rule ... + command = echo $depfile + depfile = abc +``` + +In Ninja, the value of `var` is `12`: the assignment proceeds from top down. But +within a `rule` block, the variable lookup of `$depfile` instead refers forward +to `abc`, which means there is a possibility of circular references, along with +logic that attempts to detect and warn in those cases(!). diff --git a/doc/development.md b/doc/development.md new file mode 100644 index 0000000..eaf7655 --- /dev/null +++ b/doc/development.md @@ -0,0 +1,94 @@ +## Git hook + +On a new checkout, run this to install the formatting check hook: + +``` +$ ln -s ../../git-pre-commit .git/hooks/pre-commit +``` + +## Path handling and Unicode safety + +See the longer discussion of Unicode in general in the +[design notes](design_notes.md). + +Concretely, we currently use Rust `String` for all paths and file contents, but +internally interpret them as as bytes (not UTF-8) including using `unsafe` +sometimes to convert. + +Based on my superficial understanding of how safety relates to UTF-8 in Rust +strings, it's probably harmless given that we never treat strings as Unicode, +but it's also possible some code outside of our control relies on this. But it +does mean there's a bunch of kind of needless `unsafe`s in the code, and some of +them are possibly actually doing something bad. + +We could fix this by switching to using a bag of bytes type, like +https://crates.io/crates/bstr. But it is pretty invasive. We would need to use +that not only for paths but also console output, error messages, etc. And it's +not clear (again, see above design notes discussion) that using bags of bytes is +the desired end state, so it's probably not worth doing. + +## Profiling + +### gperftools + +I played with a few profilers, but I think the gperftools profiler turned out to +be significantly better than the others. To install: + +``` +$ apt install libgoogle-perftools-dev +$ go install github.com/google/pprof@latest +``` + +To use: + +``` +[possibly modify main.rs to make the app do more work than normal] +$ LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libprofiler.so CPUPROFILE=p ./target/release/n2 ... +$ pprof -http=:8080 ./target/release/n2 p +``` + +The web server it brings up shows an interactive graph, top functions, annotated +code, disassembly... + +### Mac + +``` +$ cargo instruments --release --template time --bin n2 -- -C ~/projects/llvm-project-16.0.0.src/build clang-format +``` + +TODO: notes on this vs `cargo flamegraph`. + +### Windows + +It appears `perf` profiling of Rust under WSL2 is not a thing(?). + +## Benchmarking + +### Simple + +This benchmarks end-to-end n2 load time, by asking to build a nonexistent target: + +1. `cargo install hyperfine` +2. `$ hyperfine -i -- './target/release/n2 -C ~/llvm-project/llvm/utils/gn/out/ xxx'` + +### Micro + +There are microbenchmarks in the `benches/` directory. Run them with: + +``` +$ cargo bench +``` + +If there is a `build.ninja` in the `benches/` directory, the parsing benchmark +will load it. For example, you can copy the `build.ninja` generated by the +LLVM CMake build system (66mb on my system!). + +To run just the parsing benchmark: + +``` +$ cargo bench --bench parse -- parse +``` + +When iterating on benchmarks, it can help build time to disable `lto` in release +mode by commenting out the `lto =` line in `Cargo.toml`. (On my system, `lto` +is worth ~13% of parsing performance.) diff --git a/dprint.json b/dprint.json new file mode 100644 index 0000000..9dd9b16 --- /dev/null +++ b/dprint.json @@ -0,0 +1,14 @@ +{ + "markdown": { + "textWrap": "always" + }, + "includes": [ + "**/*.{md}", + "**/*.{toml}" + ], + "excludes": [], + "plugins": [ + "https://plugins.dprint.dev/markdown-0.15.3.wasm", + "https://plugins.dprint.dev/toml-0.5.4.wasm" + ] +}
\ No newline at end of file diff --git a/git-pre-commit b/git-pre-commit new file mode 100755 index 0000000..4abbcc2 --- /dev/null +++ b/git-pre-commit @@ -0,0 +1,9 @@ +#!/bin/bash + +diff=$(cargo fmt -- --check) +result=$? + +if [[ ${result} -ne 0 ]] ; then + echo 'run `cargo fmt`' + exit 1 +fi diff --git a/src/canon.rs b/src/canon.rs new file mode 100644 index 0000000..c0b9c99 --- /dev/null +++ b/src/canon.rs @@ -0,0 +1,197 @@ +//! Path canonicalization. + +use std::mem::MaybeUninit; + +/// An on-stack stack of values. +/// Used for tracking locations of parent components within a path. +struct StackStack<T> { + n: usize, + vals: [MaybeUninit<T>; 60], +} + +impl<T: Copy> StackStack<T> { + fn new() -> Self { + StackStack { + n: 0, + // Safety: we only access vals[i] after setting it. + vals: unsafe { MaybeUninit::uninit().assume_init() }, + } + } + + fn push(&mut self, val: T) { + if self.n >= self.vals.len() { + panic!("too many path components"); + } + self.vals[self.n].write(val); + self.n += 1; + } + + fn pop(&mut self) -> Option<T> { + if self.n > 0 { + self.n -= 1; + // Safety: we only access vals[i] after setting it. + Some(unsafe { self.vals[self.n].assume_init() }) + } else { + None + } + } +} + +/// Lexically canonicalize a path, removing redundant components. +/// Does not access the disk, but only simplifies things like +/// "foo/./bar" => "foo/bar". +/// These paths can show up due to variable expansion in particular. +/// Returns the new length of the path, guaranteed <= the original length. +#[must_use] +pub fn canon_path_fast(path: &mut str) -> usize { + assert!(!path.is_empty()); + // Safety: this traverses the path buffer to move data around. + // We maintain the invariant that *dst always points to a point within + // the buffer, and that src is always checked against end before reading. + unsafe { + let mut components = StackStack::<*mut u8>::new(); + let mut dst = path.as_mut_ptr(); + let mut src = path.as_ptr(); + let start = path.as_mut_ptr(); + let end = src.add(path.len()); + + if src == end { + return 0; + } + if *src == b'/' || *src == b'\\' { + src = src.add(1); + dst = dst.add(1); + } + + // Outer loop: one iteration per path component. + while src < end { + // Peek ahead for special path components: "/", ".", and "..". + match *src { + b'/' | b'\\' => { + src = src.add(1); + continue; + } + b'.' => { + let mut peek = src.add(1); + if peek == end { + break; // Trailing '.', trim. + } + match *peek { + b'/' | b'\\' => { + // "./", skip. + src = src.add(2); + continue; + } + b'.' => { + // ".." + peek = peek.add(1); + if !(peek == end || *peek == b'/' || *peek == b'\\') { + // Componet that happens to start with "..". + // Handle as an ordinary component. + break; + } + // ".." component, try to back up. + if let Some(ofs) = components.pop() { + dst = ofs; + } else { + *dst = b'.'; + dst = dst.add(1); + *dst = b'.'; + dst = dst.add(1); + if peek != end { + *dst = *peek; + dst = dst.add(1); + } + } + src = src.add(3); + continue; + } + _ => {} + } + } + _ => {} + } + + // Mark this point as a possible target to pop to. + components.push(dst); + + // Inner loop: copy one path component, including trailing '/'. + while src < end { + *dst = *src; + src = src.add(1); + dst = dst.add(1); + if *src.offset(-1) == b'/' || *src.offset(-1) == b'\\' { + break; + } + } + } + + if dst == start { + *start = b'.'; + 1 + } else { + dst.offset_from(start) as usize + } + } +} + +pub fn canon_path<T: Into<String>>(path: T) -> String { + let mut path = path.into(); + let len = canon_path_fast(&mut path); + path.truncate(len); + path +} + +#[cfg(test)] +mod tests { + use super::*; + + // Assert that canon path equals expected path with different path separators + fn assert_canon_path_eq(left: &str, right: &str) { + assert_eq!(canon_path(left), right); + assert_eq!( + canon_path(left.replace('/', "\\")), + right.replace('/', "\\") + ); + } + + #[test] + fn noop() { + assert_canon_path_eq("foo", "foo"); + + assert_canon_path_eq("foo/bar", "foo/bar"); + } + + #[test] + fn dot() { + assert_canon_path_eq("./foo", "foo"); + assert_canon_path_eq("foo/.", "foo/"); + assert_canon_path_eq("foo/./bar", "foo/bar"); + assert_canon_path_eq("./", "."); + assert_canon_path_eq("./.", "."); + assert_canon_path_eq("././", "."); + assert_canon_path_eq("././.", "."); + assert_canon_path_eq(".", "."); + } + + #[test] + fn slash() { + assert_canon_path_eq("/foo", "/foo"); + assert_canon_path_eq("foo//bar", "foo/bar"); + } + + #[test] + fn parent() { + assert_canon_path_eq("foo/../bar", "bar"); + + assert_canon_path_eq("/foo/../bar", "/bar"); + assert_canon_path_eq("../foo", "../foo"); + assert_canon_path_eq("../foo/../bar", "../bar"); + assert_canon_path_eq("../../bar", "../../bar"); + assert_canon_path_eq("./../foo", "../foo"); + assert_canon_path_eq("foo/..", "."); + assert_canon_path_eq("foo/../", "."); + assert_canon_path_eq("foo/../../", "../"); + assert_canon_path_eq("foo/../../bar", "../bar"); + } +} diff --git a/src/db.rs b/src/db.rs new file mode 100644 index 0000000..d6481ad --- /dev/null +++ b/src/db.rs @@ -0,0 +1,322 @@ +//! The n2 database stores information about previous builds for determining +//! which files are up to date. + +use crate::{ + densemap, densemap::DenseMap, graph::BuildId, graph::FileId, graph::Graph, graph::Hashes, + hash::BuildHash, +}; +use anyhow::{anyhow, bail}; +use std::collections::HashMap; +use std::fs::File; +use std::io::BufReader; +use std::io::Read; +use std::io::Write; +use std::path::Path; + +const VERSION: u32 = 1; + +/// Files are identified by integers that are stable across n2 executions. +#[derive(Debug, Clone, Copy)] +pub struct Id(u32); +impl densemap::Index for Id { + fn index(&self) -> usize { + self.0 as usize + } +} +impl From<usize> for Id { + fn from(u: usize) -> Id { + Id(u as u32) + } +} + +/// The loaded state of a database, as needed to make updates to the stored +/// state. Other state is directly loaded into the build graph. +#[derive(Default)] +pub struct IdMap { + /// Maps db::Id to FileId. + fileids: DenseMap<Id, FileId>, + /// Maps FileId to db::Id. + db_ids: HashMap<FileId, Id>, +} + +/// RecordWriter buffers writes into a Vec<u8>. +/// We attempt to write a full record per underlying finish() to lessen the chance of writing partial records. +#[derive(Default)] +struct RecordWriter(Vec<u8>); + +impl RecordWriter { + fn write(&mut self, buf: &[u8]) { + self.0.extend_from_slice(buf); + } + + fn write_u16(&mut self, n: u16) { + self.write(&n.to_le_bytes()); + } + + fn write_u24(&mut self, n: u32) { + self.write(&n.to_le_bytes()[..3]); + } + + fn write_u64(&mut self, n: u64) { + self.write(&n.to_le_bytes()); + } + + fn write_str(&mut self, s: &str) { + self.write_u16(s.len() as u16); + self.write(s.as_bytes()); + } + + fn write_id(&mut self, id: Id) { + if id.0 > (1 << 24) { + panic!("too many fileids"); + } + self.write_u24(id.0); + } + + fn finish(&self, w: &mut impl Write) -> std::io::Result<()> { + w.write_all(&self.0) + } +} + +/// An opened database, ready for writes. +pub struct Writer { + ids: IdMap, + w: File, +} + +impl Writer { + fn create(path: &Path) -> std::io::Result<Self> { + let f = std::fs::File::create(path)?; + let mut w = Self::from_opened(IdMap::default(), f); + w.write_signature()?; + Ok(w) + } + + fn from_opened(ids: IdMap, w: File) -> Self { + Writer { ids, w } + } + + fn write_signature(&mut self) -> std::io::Result<()> { + self.w.write_all("n2db".as_bytes())?; + self.w.write_all(&u32::to_le_bytes(VERSION)) + } + + fn write_path(&mut self, name: &str) -> std::io::Result<()> { + if name.len() >= 0b1000_0000_0000_0000 { + panic!("filename too long"); + } + let mut w = RecordWriter::default(); + w.write_str(&name); + w.finish(&mut self.w) + } + + fn ensure_id(&mut self, graph: &Graph, fileid: FileId) -> std::io::Result<Id> { + let id = match self.ids.db_ids.get(&fileid) { + Some(&id) => id, + None => { + let id = self.ids.fileids.push(fileid); + self.ids.db_ids.insert(fileid, id); + self.write_path(&graph.file(fileid).name)?; + id + } + }; + Ok(id) + } + + pub fn write_build( + &mut self, + graph: &Graph, + id: BuildId, + hash: BuildHash, + ) -> std::io::Result<()> { + let build = &graph.builds[id]; + let mut w = RecordWriter::default(); + let outs = build.outs(); + let mark = (outs.len() as u16) | 0b1000_0000_0000_0000; + w.write_u16(mark); + for &out in outs { + let id = self.ensure_id(graph, out)?; + w.write_id(id); + } + + let deps = build.discovered_ins(); + w.write_u16(deps.len() as u16); + for &dep in deps { + let id = self.ensure_id(graph, dep)?; + w.write_id(id); + } + + w.write_u64(hash.0); + w.finish(&mut self.w) + } +} + +struct Reader<'a> { + r: BufReader<&'a mut File>, + ids: IdMap, + graph: &'a mut Graph, + hashes: &'a mut Hashes, +} + +impl<'a> Reader<'a> { + fn read_u16(&mut self) -> std::io::Result<u16> { + let mut buf: [u8; 2] = [0; 2]; + self.r.read_exact(&mut buf[..])?; + Ok(u16::from_le_bytes(buf)) + } + + fn read_u24(&mut self) -> std::io::Result<u32> { + let mut buf: [u8; 4] = [0; 4]; + self.r.read_exact(&mut buf[..3])?; + Ok(u32::from_le_bytes(buf)) + } + + fn read_u64(&mut self) -> std::io::Result<u64> { + let mut buf: [u8; 8] = [0; 8]; + self.r.read_exact(&mut buf)?; + Ok(u64::from_le_bytes(buf)) + } + + fn read_id(&mut self) -> std::io::Result<Id> { + self.read_u24().map(Id) + } + + fn read_str(&mut self, len: usize) -> std::io::Result<String> { + let mut buf = vec![0; len]; + self.r.read_exact(buf.as_mut_slice())?; + Ok(unsafe { String::from_utf8_unchecked(buf) }) + } + + fn read_path(&mut self, len: usize) -> std::io::Result<()> { + let name = self.read_str(len)?; + // No canonicalization needed, paths were written canonicalized. + let fileid = self.graph.files.id_from_canonical(name); + let dbid = self.ids.fileids.push(fileid); + self.ids.db_ids.insert(fileid, dbid); + Ok(()) + } + + fn read_build(&mut self, len: usize) -> std::io::Result<()> { + // This record logs a build. We expect all the outputs to be + // outputs of the same build id; if not, that means the graph has + // changed since this log, in which case we just ignore it. + // + // It's possible we log a build that generates files A B, then + // change the build file such that it only generates file A; this + // logic will still attach the old dependencies to A, but it + // shouldn't matter because the changed command line will cause us + // to rebuild A regardless, and these dependencies are only used + // to affect dirty checking, not build order. + + let mut unique_bid = None; + let mut obsolete = false; + for _ in 0..len { + let fileid = self.read_id()?; + if obsolete { + // Even though we know we don't want this record, we must + // keep reading to parse through it. + continue; + } + match self.graph.file(self.ids.fileids[fileid]).input { + None => { + obsolete = true; + } + Some(bid) => { + match unique_bid { + None => unique_bid = Some(bid), + Some(unique_bid) if unique_bid == bid => { + // Ok, matches the existing id. + } + Some(_) => { + // Mismatch. + unique_bid = None; + obsolete = true; + } + } + } + } + } + + let len = self.read_u16()?; + let mut deps = Vec::new(); + for _ in 0..len { + let id = self.read_id()?; + deps.push(self.ids.fileids[id]); + } + + let hash = BuildHash(self.read_u64()?); + + // unique_bid is set here if this record is valid. + if let Some(id) = unique_bid { + // Common case: only one associated build. + self.graph.builds[id].set_discovered_ins(deps); + self.hashes.set(id, hash); + } + Ok(()) + } + + fn read_signature(&mut self) -> anyhow::Result<()> { + let mut buf: [u8; 4] = [0; 4]; + self.r.read_exact(&mut buf[..])?; + if buf.as_slice() != "n2db".as_bytes() { + bail!("invalid db signature"); + } + self.r.read_exact(&mut buf[..])?; + let version = u32::from_le_bytes(buf); + if version != VERSION { + bail!("db version mismatch: got {version}, expected {VERSION}; TODO: db upgrades etc"); + } + Ok(()) + } + + fn read_file(&mut self) -> anyhow::Result<()> { + self.read_signature()?; + loop { + let mut len = match self.read_u16() { + Ok(r) => r, + Err(err) if err.kind() == std::io::ErrorKind::UnexpectedEof => break, + Err(err) => bail!(err), + }; + let mask = 0b1000_0000_0000_0000; + if len & mask == 0 { + self.read_path(len as usize)?; + } else { + len &= !mask; + self.read_build(len as usize)?; + } + } + Ok(()) + } + + /// Reads an on-disk database, loading its state into the provided Graph/Hashes. + fn read(f: &mut File, graph: &mut Graph, hashes: &mut Hashes) -> anyhow::Result<IdMap> { + let mut r = Reader { + r: std::io::BufReader::new(f), + ids: IdMap::default(), + graph, + hashes, + }; + r.read_file()?; + + Ok(r.ids) + } +} + +/// Opens or creates an on-disk database, loading its state into the provided Graph. +pub fn open(path: &Path, graph: &mut Graph, hashes: &mut Hashes) -> anyhow::Result<Writer> { + match std::fs::OpenOptions::new() + .read(true) + .append(true) + .open(path) + { + Ok(mut f) => { + let ids = Reader::read(&mut f, graph, hashes)?; + Ok(Writer::from_opened(ids, f)) + } + Err(err) if err.kind() == std::io::ErrorKind::NotFound => { + let w = Writer::create(path)?; + Ok(w) + } + Err(err) => Err(anyhow!(err)), + } +} diff --git a/src/densemap.rs b/src/densemap.rs new file mode 100644 index 0000000..2d9878e --- /dev/null +++ b/src/densemap.rs @@ -0,0 +1,68 @@ +//! A map of dense integer key to value. + +use std::marker::PhantomData; + +pub trait Index: From<usize> { + fn index(&self) -> usize; +} + +/// A map of a dense integer key to value, implemented as a vector. +/// Effectively wraps Vec<V> to provided typed keys. +pub struct DenseMap<K, V> { + vec: Vec<V>, + key_type: std::marker::PhantomData<K>, +} + +impl<K, V> Default for DenseMap<K, V> { + fn default() -> Self { + DenseMap { + vec: Vec::default(), + key_type: PhantomData, + } + } +} + +impl<K: Index, V> std::ops::Index<K> for DenseMap<K, V> { + type Output = V; + + fn index(&self, k: K) -> &Self::Output { + &self.vec[k.index()] + } +} + +impl<K: Index, V> std::ops::IndexMut<K> for DenseMap<K, V> { + fn index_mut(&mut self, k: K) -> &mut Self::Output { + &mut self.vec[k.index()] + } +} + +impl<K: Index, V> DenseMap<K, V> { + pub fn lookup(&self, k: K) -> Option<&V> { + self.vec.get(k.index()) + } + + pub fn next_id(&self) -> K { + K::from(self.vec.len()) + } + + pub fn push(&mut self, val: V) -> K { + let id = self.next_id(); + self.vec.push(val); + id + } +} + +impl<K: Index, V: Clone> DenseMap<K, V> { + pub fn new_sized(n: K, default: V) -> Self { + let mut m = Self::default(); + m.vec.resize(n.index(), default); + m + } + + pub fn set_grow(&mut self, k: K, v: V, default: V) { + if k.index() >= self.vec.len() { + self.vec.resize(k.index() + 1, default); + } + self.vec[k.index()] = v + } +} diff --git a/src/depfile.rs b/src/depfile.rs new file mode 100644 index 0000000..b241cb6 --- /dev/null +++ b/src/depfile.rs @@ -0,0 +1,221 @@ +//! Parsing of Makefile syntax as found in `.d` files emitted by C compilers. + +use crate::{ + scanner::{ParseResult, Scanner}, + smallmap::SmallMap, +}; + +/// Skip spaces and backslashed newlines. +fn skip_spaces(scanner: &mut Scanner) -> ParseResult<()> { + loop { + match scanner.read() { + ' ' => {} + '\\' => match scanner.read() { + '\r' => scanner.expect('\n')?, + '\n' => {} + _ => return scanner.parse_error("invalid backslash escape"), + }, + _ => { + scanner.back(); + break; + } + } + } + Ok(()) +} + +/// Read one path from the input scanner. +/// Note: treats colon as a valid character in a path because of Windows-style +/// paths, but this means that the inital `output: ...` path will include the +/// trailing colon. +fn read_path<'a>(scanner: &mut Scanner<'a>) -> ParseResult<Option<&'a str>> { + skip_spaces(scanner)?; + let start = scanner.ofs; + loop { + match scanner.read() { + '\0' | ' ' | '\r' | '\n' => { + scanner.back(); + break; + } + '\\' => { + if scanner.peek_newline() { + scanner.back(); + break; + } + } + _ => {} + } + } + let end = scanner.ofs; + if end == start { + return Ok(None); + } + Ok(Some(scanner.slice(start, end))) +} + +/// Parse a `.d` file into `Deps`. +pub fn parse<'a>(scanner: &mut Scanner<'a>) -> ParseResult<SmallMap<&'a str, Vec<&'a str>>> { + let mut result = SmallMap::default(); + loop { + while scanner.peek() == ' ' || scanner.peek_newline() { + scanner.next(); + } + let target = match read_path(scanner)? { + None => break, + Some(o) => o, + }; + scanner.skip_spaces(); + let target = match target.strip_suffix(':') { + None => { + scanner.expect(':')?; + target + } + Some(target) => target, + }; + let mut deps = Vec::new(); + while let Some(p) = read_path(scanner)? { + deps.push(p); + } + result.insert(target, deps); + } + scanner.expect('\0')?; + + Ok(result) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::path::Path; + + fn try_parse(buf: &mut Vec<u8>) -> Result<SmallMap<&str, Vec<&str>>, String> { + buf.push(0); + let mut scanner = Scanner::new(buf); + parse(&mut scanner).map_err(|err| scanner.format_parse_error(Path::new("test"), err)) + } + + fn must_parse(buf: &mut Vec<u8>) -> SmallMap<&str, Vec<&str>> { + match try_parse(buf) { + Err(err) => { + println!("{}", err); + panic!("failed parse"); + } + Ok(d) => d, + } + } + + fn test_for_crlf(input: &str, test: fn(String)) { + let crlf = input.replace('\n', "\r\n"); + for test_case in [String::from(input), crlf] { + test(test_case); + } + } + + #[test] + fn test_parse() { + test_for_crlf( + "build/browse.o: src/browse.cc src/browse.h build/browse_py.h\n", + |text| { + let mut file = text.into_bytes(); + let deps = must_parse(&mut file); + assert_eq!( + deps, + SmallMap::from([( + "build/browse.o", + vec!["src/browse.cc", "src/browse.h", "build/browse_py.h",] + )]) + ); + }, + ); + } + + #[test] + fn test_parse_space_suffix() { + test_for_crlf("build/browse.o: src/browse.cc \n", |text| { + let mut file = text.into_bytes(); + let deps = must_parse(&mut file); + assert_eq!( + deps, + SmallMap::from([("build/browse.o", vec!["src/browse.cc",])]) + ); + }); + } + + #[test] + fn test_parse_multiline() { + test_for_crlf( + "build/browse.o: src/browse.cc\\\n build/browse_py.h", + |text| { + let mut file = text.into_bytes(); + let deps = must_parse(&mut file); + assert_eq!( + deps, + SmallMap::from([( + "build/browse.o", + vec!["src/browse.cc", "build/browse_py.h",] + )]) + ); + }, + ); + } + + #[test] + fn test_parse_without_final_newline() { + let mut file = b"build/browse.o: src/browse.cc".to_vec(); + let deps = must_parse(&mut file); + assert_eq!( + deps, + SmallMap::from([("build/browse.o", vec!["src/browse.cc",])]) + ); + } + + #[test] + fn test_parse_spaces_before_colon() { + let mut file = b"build/browse.o : src/browse.cc".to_vec(); + let deps = must_parse(&mut file); + assert_eq!( + deps, + SmallMap::from([("build/browse.o", vec!["src/browse.cc",])]) + ); + } + + #[test] + fn test_parse_windows_dep_path() { + let mut file = b"odd/path.o: C:/odd\\path.c".to_vec(); + let deps = must_parse(&mut file); + assert_eq!( + deps, + SmallMap::from([("odd/path.o", vec!["C:/odd\\path.c",])]) + ); + } + + #[test] + fn test_parse_multiple_targets() { + let mut file = b" +out/a.o: src/a.c \\ + src/b.c + +out/b.o : +" + .to_vec(); + let deps = must_parse(&mut file); + assert_eq!( + deps, + SmallMap::from([ + ("out/a.o", vec!["src/a.c", "src/b.c",]), + ("out/b.o", vec![]) + ]) + ); + } + + #[test] + fn test_parse_missing_colon() { + let mut file = b"foo bar".to_vec(); + let err = try_parse(&mut file).unwrap_err(); + assert!( + err.starts_with("parse error: expected ':'"), + "expected parse error, got {:?}", + err + ); + } +} diff --git a/src/eval.rs b/src/eval.rs new file mode 100644 index 0000000..d737bfe --- /dev/null +++ b/src/eval.rs @@ -0,0 +1,161 @@ +//! Represents parsed Ninja strings with embedded variable references, e.g. +//! `c++ $in -o $out`, and mechanisms for expanding those into plain strings. + +use rustc_hash::FxHashMap; + +use crate::smallmap::SmallMap; +use std::borrow::Borrow; +use std::borrow::Cow; + +/// An environment providing a mapping of variable name to variable value. +/// This represents one "frame" of evaluation context, a given EvalString may +/// need multiple environments in order to be fully expanded. +pub trait Env { + fn get_var(&self, var: &str) -> Option<EvalString<Cow<str>>>; +} + +/// One token within an EvalString, either literal text or a variable reference. +#[derive(Debug, Clone, PartialEq)] +pub enum EvalPart<T: AsRef<str>> { + Literal(T), + VarRef(T), +} + +/// A parsed but unexpanded variable-reference string, e.g. "cc $in -o $out". +/// This is generic to support EvalString<&str>, which is used for immediately- +/// expanded evals, like top-level bindings, and EvalString<String>, which is +/// used for delayed evals like in `rule` blocks. +#[derive(Debug, PartialEq)] +pub struct EvalString<T: AsRef<str>>(Vec<EvalPart<T>>); +impl<T: AsRef<str>> EvalString<T> { + pub fn new(parts: Vec<EvalPart<T>>) -> Self { + EvalString(parts) + } + + fn evaluate_inner(&self, result: &mut String, envs: &[&dyn Env]) { + for part in &self.0 { + match part { + EvalPart::Literal(s) => result.push_str(s.as_ref()), + EvalPart::VarRef(v) => { + for (i, env) in envs.iter().enumerate() { + if let Some(v) = env.get_var(v.as_ref()) { + v.evaluate_inner(result, &envs[i + 1..]); + break; + } + } + } + } + } + } + + fn calc_evaluated_length(&self, envs: &[&dyn Env]) -> usize { + self.0 + .iter() + .map(|part| match part { + EvalPart::Literal(s) => s.as_ref().len(), + EvalPart::VarRef(v) => { + for (i, env) in envs.iter().enumerate() { + if let Some(v) = env.get_var(v.as_ref()) { + return v.calc_evaluated_length(&envs[i + 1..]); + } + } + 0 + } + }) + .sum() + } + + /// evalulate turns the EvalString into a regular String, looking up the + /// values of variable references in the provided Envs. It will look up + /// its variables in the earliest Env that has them, and then those lookups + /// will be recursively expanded starting from the env after the one that + /// had the first successful lookup. + pub fn evaluate(&self, envs: &[&dyn Env]) -> String { + let mut result = String::new(); + result.reserve(self.calc_evaluated_length(envs)); + self.evaluate_inner(&mut result, envs); + result + } +} + +impl EvalString<&str> { + pub fn into_owned(self) -> EvalString<String> { + EvalString( + self.0 + .into_iter() + .map(|part| match part { + EvalPart::Literal(s) => EvalPart::Literal(s.to_owned()), + EvalPart::VarRef(s) => EvalPart::VarRef(s.to_owned()), + }) + .collect(), + ) + } +} + +impl EvalString<String> { + pub fn as_cow(&self) -> EvalString<Cow<str>> { + EvalString( + self.0 + .iter() + .map(|part| match part { + EvalPart::Literal(s) => EvalPart::Literal(Cow::Borrowed(s.as_ref())), + EvalPart::VarRef(s) => EvalPart::VarRef(Cow::Borrowed(s.as_ref())), + }) + .collect(), + ) + } +} + +impl EvalString<&str> { + pub fn as_cow(&self) -> EvalString<Cow<str>> { + EvalString( + self.0 + .iter() + .map(|part| match part { + EvalPart::Literal(s) => EvalPart::Literal(Cow::Borrowed(*s)), + EvalPart::VarRef(s) => EvalPart::VarRef(Cow::Borrowed(*s)), + }) + .collect(), + ) + } +} + +/// A single scope's worth of variable definitions. +#[derive(Debug, Default)] +pub struct Vars<'text>(FxHashMap<&'text str, String>); + +impl<'text> Vars<'text> { + pub fn insert(&mut self, key: &'text str, val: String) { + self.0.insert(key, val); + } + pub fn get(&self, key: &str) -> Option<&String> { + self.0.get(key) + } +} +impl<'a> Env for Vars<'a> { + fn get_var(&self, var: &str) -> Option<EvalString<Cow<str>>> { + Some(EvalString::new(vec![EvalPart::Literal( + std::borrow::Cow::Borrowed(self.get(var)?), + )])) + } +} + +impl<K: Borrow<str> + PartialEq> Env for SmallMap<K, EvalString<String>> { + fn get_var(&self, var: &str) -> Option<EvalString<Cow<str>>> { + Some(self.get(var)?.as_cow()) + } +} + +impl<K: Borrow<str> + PartialEq> Env for SmallMap<K, EvalString<&str>> { + fn get_var(&self, var: &str) -> Option<EvalString<Cow<str>>> { + Some(self.get(var)?.as_cow()) + } +} + +impl Env for SmallMap<&str, String> { + fn get_var(&self, var: &str) -> Option<EvalString<Cow<str>>> { + Some(EvalString::new(vec![EvalPart::Literal( + std::borrow::Cow::Borrowed(self.get(var)?), + )])) + } +} diff --git a/src/graph.rs b/src/graph.rs new file mode 100644 index 0000000..d6b1a38 --- /dev/null +++ b/src/graph.rs @@ -0,0 +1,427 @@ +//! The build graph, a graph between files and commands. + +use rustc_hash::FxHashMap; + +use crate::{ + densemap::{self, DenseMap}, + hash::BuildHash, +}; +use std::collections::{hash_map::Entry, HashMap}; +use std::path::{Path, PathBuf}; +use std::time::SystemTime; + +/// Id for File nodes in the Graph. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +pub struct FileId(u32); +impl densemap::Index for FileId { + fn index(&self) -> usize { + self.0 as usize + } +} +impl From<usize> for FileId { + fn from(u: usize) -> FileId { + FileId(u as u32) + } +} + +/// Id for Build nodes in the Graph. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +pub struct BuildId(u32); +impl densemap::Index for BuildId { + fn index(&self) -> usize { + self.0 as usize + } +} +impl From<usize> for BuildId { + fn from(u: usize) -> BuildId { + BuildId(u as u32) + } +} + +/// A single file referenced as part of a build. +#[derive(Debug)] +pub struct File { + /// Canonical path to the file. + pub name: String, + /// The Build that generates this file, if any. + pub input: Option<BuildId>, + /// The Builds that depend on this file as an input. + pub dependents: Vec<BuildId>, +} + +impl File { + pub fn path(&self) -> &Path { + Path::new(&self.name) + } +} + +/// A textual location within a build.ninja file, used in error messages. +#[derive(Debug)] +pub struct FileLoc { + pub filename: std::rc::Rc<PathBuf>, + pub line: usize, +} +impl std::fmt::Display for FileLoc { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { + write!(f, "{}:{}", self.filename.display(), self.line) + } +} + +#[derive(Debug, Clone, Hash)] +pub struct RspFile { + pub path: std::path::PathBuf, + pub content: String, +} + +/// Input files to a Build. +pub struct BuildIns { + /// Internally we stuff explicit/implicit/order-only ins all into one Vec. + /// This is mostly to simplify some of the iteration and is a little more + /// memory efficient than three separate Vecs, but it is kept internal to + /// Build and only exposed via methods on Build. + pub ids: Vec<FileId>, + pub explicit: usize, + pub implicit: usize, + pub order_only: usize, + // validation count implied by other counts. + // pub validation: usize, +} + +/// Output files from a Build. +pub struct BuildOuts { + /// Similar to ins, we keep both explicit and implicit outs in one Vec. + pub ids: Vec<FileId>, + pub explicit: usize, +} + +impl BuildOuts { + /// CMake seems to generate build files with the same output mentioned + /// multiple times in the outputs list. Given that Ninja accepts these, + /// this function removes duplicates from the output list. + pub fn remove_duplicates(&mut self) { + let mut ids = Vec::new(); + for (i, &id) in self.ids.iter().enumerate() { + if self.ids[0..i].iter().any(|&prev| prev == id) { + // Skip over duplicate. + if i < self.explicit { + self.explicit -= 1; + } + continue; + } + ids.push(id); + } + self.ids = ids; + } +} + +#[cfg(test)] +mod tests { + fn fileids(ids: Vec<usize>) -> Vec<FileId> { + ids.into_iter().map(FileId::from).collect() + } + + use super::*; + #[test] + fn remove_dups_explicit() { + let mut outs = BuildOuts { + ids: fileids(vec![1, 1, 2]), + explicit: 2, + }; + outs.remove_duplicates(); + assert_eq!(outs.ids, fileids(vec![1, 2])); + assert_eq!(outs.explicit, 1); + } + + #[test] + fn remove_dups_implicit() { + let mut outs = BuildOuts { + ids: fileids(vec![1, 2, 1]), + explicit: 2, + }; + outs.remove_duplicates(); + assert_eq!(outs.ids, fileids(vec![1, 2])); + assert_eq!(outs.explicit, 2); + } +} + +/// A single build action, generating File outputs from File inputs with a command. +pub struct Build { + /// Source location this Build was declared. + pub location: FileLoc, + + /// User-provided description of the build step. + pub desc: Option<String>, + + /// Command line to run. Absent for phony builds. + pub cmdline: Option<String>, + + /// Path to generated `.d` file, if any. + pub depfile: Option<String>, + + /// If true, extract "/showIncludes" lines from output. + pub parse_showincludes: bool, + + // Struct that contains the path to the rsp file and its contents, if any. + pub rspfile: Option<RspFile>, + + /// Pool to execute this build in, if any. + pub pool: Option<String>, + + pub ins: BuildIns, + + /// Additional inputs discovered from a previous build. + discovered_ins: Vec<FileId>, + + /// Output files. + pub outs: BuildOuts, +} +impl Build { + pub fn new(loc: FileLoc, ins: BuildIns, outs: BuildOuts) -> Self { + Build { + location: loc, + desc: None, + cmdline: None, + depfile: None, + parse_showincludes: false, + rspfile: None, + pool: None, + ins, + discovered_ins: Vec::new(), + outs, + } + } + + /// Input paths that appear in `$in`. + pub fn explicit_ins(&self) -> &[FileId] { + &self.ins.ids[0..self.ins.explicit] + } + + /// Input paths that, if changed, invalidate the output. + /// Note this omits discovered_ins, which also invalidate the output. + pub fn dirtying_ins(&self) -> &[FileId] { + &self.ins.ids[0..(self.ins.explicit + self.ins.implicit)] + } + + /// Inputs that are needed before building. + /// Distinct from dirtying_ins in that it includes order-only dependencies. + /// Note that we don't order on discovered_ins, because they're not allowed to + /// affect build order. + pub fn ordering_ins(&self) -> &[FileId] { + &self.ins.ids[0..(self.ins.order_only + self.ins.explicit + self.ins.implicit)] + } + + /// Inputs that are needed before validating information. + /// Validation inputs will be built whenever this Build is built, but this Build will not + /// wait for them to complete before running. The validation inputs can fail to build, which + /// will cause the overall build to fail. + pub fn validation_ins(&self) -> &[FileId] { + &self.ins.ids[(self.ins.order_only + self.ins.explicit + self.ins.implicit)..] + } + + /// Potentially update discovered_ins with a new set of deps, returning true if they changed. + pub fn update_discovered(&mut self, deps: Vec<FileId>) -> bool { + if deps == self.discovered_ins { + false + } else { + self.set_discovered_ins(deps); + true + } + } + + pub fn set_discovered_ins(&mut self, deps: Vec<FileId>) { + self.discovered_ins = deps; + } + + /// Input paths that were discovered after building, for use in the next build. + pub fn discovered_ins(&self) -> &[FileId] { + &self.discovered_ins + } + + /// Output paths that appear in `$out`. + pub fn explicit_outs(&self) -> &[FileId] { + &self.outs.ids[0..self.outs.explicit] + } + + /// Output paths that are updated when the build runs. + pub fn outs(&self) -> &[FileId] { + &self.outs.ids + } +} + +/// The build graph: owns Files/Builds and maps FileIds/BuildIds to them. +#[derive(Default)] +pub struct Graph { + pub builds: DenseMap<BuildId, Build>, + pub files: GraphFiles, +} + +/// Files identified by FileId, as well as mapping string filenames to them. +/// Split from Graph for lifetime reasons. +#[derive(Default)] +pub struct GraphFiles { + pub by_id: DenseMap<FileId, File>, + by_name: FxHashMap<String, FileId>, +} + +impl Graph { + /// Look up a file by its FileId. + pub fn file(&self, id: FileId) -> &File { + &self.files.by_id[id] + } + + /// Add a new Build, generating a BuildId for it. + pub fn add_build(&mut self, mut build: Build) -> anyhow::Result<()> { + let new_id = self.builds.next_id(); + for &id in &build.ins.ids { + self.files.by_id[id].dependents.push(new_id); + } + let mut fixup_dups = false; + for &id in &build.outs.ids { + let f = &mut self.files.by_id[id]; + match f.input { + Some(prev) if prev == new_id => { + fixup_dups = true; + println!( + "n2: warn: {}: {:?} is repeated in output list", + build.location, f.name, + ); + } + Some(prev) => { + anyhow::bail!( + "{}: {:?} is already an output at {}", + build.location, + f.name, + self.builds[prev].location + ); + } + None => f.input = Some(new_id), + } + } + if fixup_dups { + build.outs.remove_duplicates(); + } + self.builds.push(build); + Ok(()) + } +} + +impl GraphFiles { + /// Look up a file by its name. Name must have been canonicalized already. + pub fn lookup(&self, file: &str) -> Option<FileId> { + self.by_name.get(file).copied() + } + + /// Look up a file by its name, adding it if not already present. + /// Name must have been canonicalized already. Only accepting an owned + /// string allows us to avoid a string copy and a hashmap lookup when we + /// need to create a new id, but would also be possible to create a version + /// of this function that accepts string references that is more optimized + /// for the case where the entry already exists. But so far, all of our + /// usages of this function have an owned string easily accessible anyways. + pub fn id_from_canonical(&mut self, file: String) -> FileId { + // TODO: so many string copies :< + match self.by_name.entry(file) { + Entry::Occupied(o) => *o.get(), + Entry::Vacant(v) => { + let id = self.by_id.push(File { + name: v.key().clone(), + input: None, + dependents: Vec::new(), + }); + v.insert(id); + id + } + } + } + + pub fn all_ids(&self) -> impl Iterator<Item = FileId> { + (0..self.by_id.next_id().0).map(|id| FileId(id)) + } +} + +/// MTime info gathered for a file. This also models "file is absent". +/// It's not using an Option<> just because it makes the code using it easier +/// to follow. +#[derive(Copy, Clone, Debug, PartialEq)] +pub enum MTime { + Missing, + Stamp(SystemTime), +} + +/// stat() an on-disk path, producing its MTime. +pub fn stat(path: &Path) -> std::io::Result<MTime> { + // TODO: On Windows, use FindFirstFileEx()/FindNextFile() to get timestamps per + // directory, for better stat perf. + Ok(match std::fs::metadata(path) { + Ok(meta) => MTime::Stamp(meta.modified().unwrap()), + Err(err) => { + if err.kind() == std::io::ErrorKind::NotFound { + MTime::Missing + } else { + return Err(err); + } + } + }) +} + +/// Gathered state of on-disk files. +/// Due to discovered deps this map may grow after graph initialization. +pub struct FileState(DenseMap<FileId, Option<MTime>>); + +impl FileState { + pub fn new(graph: &Graph) -> Self { + FileState(DenseMap::new_sized(graph.files.by_id.next_id(), None)) + } + + pub fn get(&self, id: FileId) -> Option<MTime> { + self.0.lookup(id).copied().unwrap_or(None) + } + + pub fn stat(&mut self, id: FileId, path: &Path) -> anyhow::Result<MTime> { + let mtime = stat(path).map_err(|err| anyhow::anyhow!("stat {:?}: {}", path, err))?; + self.0.set_grow(id, Some(mtime), None); + Ok(mtime) + } +} + +#[derive(Default)] +pub struct Hashes(HashMap<BuildId, BuildHash>); + +impl Hashes { + pub fn set(&mut self, id: BuildId, hash: BuildHash) { + self.0.insert(id, hash); + } + + pub fn get(&self, id: BuildId) -> Option<BuildHash> { + self.0.get(&id).copied() + } +} + +#[test] +fn stat_mtime_resolution() { + use std::time::Duration; + + let temp_dir = tempfile::tempdir().unwrap(); + let filename = temp_dir.path().join("dummy"); + + // Write once and stat. + std::fs::write(&filename, "foo").unwrap(); + let mtime1 = match stat(&filename).unwrap() { + MTime::Stamp(mtime) => mtime, + _ => panic!("File not found: {}", filename.display()), + }; + + // Sleep for a short interval. + std::thread::sleep(std::time::Duration::from_millis(10)); + + // Write twice and stat. + std::fs::write(&filename, "foo").unwrap(); + let mtime2 = match stat(&filename).unwrap() { + MTime::Stamp(mtime) => mtime, + _ => panic!("File not found: {}", filename.display()), + }; + + let diff = mtime2.duration_since(mtime1).unwrap(); + assert!(diff > Duration::ZERO); + assert!(diff < Duration::from_millis(100)); +} diff --git a/src/hash.rs b/src/hash.rs new file mode 100644 index 0000000..23ea8d5 --- /dev/null +++ b/src/hash.rs @@ -0,0 +1,166 @@ +//! A single hash over input attributes is recorded and used to determine when +//! those inputs change. +//! +//! See "Manifests instead of mtime order" in +//! https://neugierig.org/software/blog/2022/03/n2.html + +use crate::graph::{Build, FileId, FileState, GraphFiles, MTime, RspFile}; +use std::{ + collections::hash_map::DefaultHasher, + fmt::Write, + hash::{Hash, Hasher}, + time::SystemTime, +}; + +/// Hash value used to identify a given instance of a Build's execution; +/// compared to verify whether a Build is up to date. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub struct BuildHash(pub u64); + +/// A trait for computing a build's manifest. Indirected as a trait so we can +/// implement it a second time for "-d explain" debug purposes. +trait Manifest { + /// Write a list of files+mtimes. desc is used only for "-d explain" output. + fn write_files( + &mut self, + desc: &str, + files: &GraphFiles, + file_state: &FileState, + ids: &[FileId], + ); + fn write_rsp(&mut self, rspfile: &RspFile); + fn write_cmdline(&mut self, cmdline: &str); +} + +fn get_fileid_status<'a>( + files: &'a GraphFiles, + file_state: &FileState, + id: FileId, +) -> (&'a str, SystemTime) { + let name = &files.by_id[id].name; + let mtime = file_state + .get(id) + .unwrap_or_else(|| panic!("no state for {:?}", name)); + let mtime = match mtime { + MTime::Stamp(mtime) => mtime, + MTime::Missing => panic!("missing file: {:?}", name), + }; + (name.as_str(), mtime) +} + +/// The BuildHasher used during normal builds, designed to not serialize too much. +#[derive(Default)] +struct TerseHash(DefaultHasher); + +const UNIT_SEPARATOR: u8 = 0x1F; + +impl TerseHash { + fn write_string(&mut self, string: &str) { + string.hash(&mut self.0); + } + + fn write_separator(&mut self) { + self.0.write_u8(UNIT_SEPARATOR); + } + + fn finish(&mut self) -> BuildHash { + BuildHash(self.0.finish()) + } +} + +impl Manifest for TerseHash { + fn write_files<'a>( + &mut self, + _desc: &str, + files: &GraphFiles, + file_state: &FileState, + ids: &[FileId], + ) { + for &id in ids { + let (name, mtime) = get_fileid_status(files, file_state, id); + self.write_string(name); + mtime.hash(&mut self.0); + } + self.write_separator(); + } + + fn write_cmdline(&mut self, cmdline: &str) { + self.write_string(cmdline); + self.write_separator(); + } + + fn write_rsp(&mut self, rspfile: &RspFile) { + rspfile.hash(&mut self.0); + } +} + +fn build_manifest<M: Manifest>( + manifest: &mut M, + files: &GraphFiles, + file_state: &FileState, + build: &Build, +) { + manifest.write_files("in", files, file_state, build.dirtying_ins()); + manifest.write_files("discovered", files, file_state, build.discovered_ins()); + manifest.write_cmdline(build.cmdline.as_deref().unwrap_or("")); + if let Some(rspfile) = &build.rspfile { + manifest.write_rsp(rspfile); + } + manifest.write_files("out", files, file_state, build.outs()); +} + +// Hashes the inputs of a build to compute a signature. +// Prerequisite: all referenced files have already been stat()ed and are present. +// (It doesn't make sense to hash a build with missing files, because it's out +// of date regardless of the state of the other files.) +pub fn hash_build(files: &GraphFiles, file_state: &FileState, build: &Build) -> BuildHash { + let mut hasher = TerseHash::default(); + build_manifest(&mut hasher, files, file_state, build); + hasher.finish() +} + +/// A BuildHasher that records human-readable text for "-d explain" debugging. +#[derive(Default)] +struct ExplainHash { + text: String, +} + +impl Manifest for ExplainHash { + fn write_files<'a>( + &mut self, + desc: &str, + files: &GraphFiles, + file_state: &FileState, + ids: &[FileId], + ) { + writeln!(&mut self.text, "{desc}:").unwrap(); + for &id in ids { + let (name, mtime) = get_fileid_status(files, file_state, id); + let millis = mtime + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap() + .as_millis(); + writeln!(&mut self.text, " {millis} {name}").unwrap(); + } + } + + fn write_rsp(&mut self, rspfile: &RspFile) { + writeln!(&mut self.text, "rspfile path: {}", rspfile.path.display()).unwrap(); + + let mut h = DefaultHasher::new(); + h.write(rspfile.content.as_bytes()); + writeln!(&mut self.text, "rspfile hash: {:x}", h.finish()).unwrap(); + } + + fn write_cmdline(&mut self, cmdline: &str) { + writeln!(&mut self.text, "cmdline: {}", cmdline).unwrap(); + } +} + +/// Logs human-readable state of all the inputs used for hashing a given build. +/// Used for "-d explain" debugging output. +pub fn explain_hash_build(files: &GraphFiles, file_state: &FileState, build: &Build) -> String { + let mut explainer = ExplainHash::default(); + build_manifest(&mut explainer, files, file_state, build); + explainer.text +} diff --git a/src/intern.rs b/src/intern.rs new file mode 100644 index 0000000..1273bc1 --- /dev/null +++ b/src/intern.rs @@ -0,0 +1,88 @@ +//! String interning. +//! Not currently used in n2. + +use hashbrown::raw::RawTable; +use std::hash::Hasher; + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub struct Symbol(usize); + +struct EndTab { + data: Vec<u8>, + ends: Vec<usize>, +} + +impl EndTab { + fn add(&mut self, s: &[u8]) -> Symbol { + self.data.extend_from_slice(s); + let sym = Symbol(self.ends.len()); + self.ends.push(self.data.len()); + sym + } + fn get(&self, sym: Symbol) -> &[u8] { + let start = if sym.0 > 0 { self.ends[sym.0 - 1] } else { 0 }; + let end = self.ends[sym.0]; + &self.data[start..end] + } +} + +pub struct Intern { + lookup: RawTable<Symbol>, + endtab: EndTab, +} + +fn hash_bytes(s: &[u8]) -> u64 { + let mut hasher = ahash::AHasher::default(); + hasher.write(s); + hasher.finish() +} + +impl Intern { + pub fn new() -> Intern { + Intern { + lookup: RawTable::new(), + endtab: EndTab { + data: Vec::new(), + ends: Vec::new(), + }, + } + } + + pub fn add<'a>(&mut self, s: &'a [u8]) -> Symbol { + let hash = hash_bytes(s); + if let Some(sym) = self + .lookup + .get(hash, |sym: &Symbol| s == self.endtab.get(*sym)) + { + return *sym; + } + let sym = self.endtab.add(s); + + let endtab = &self.endtab; + self.lookup + .insert(hash, sym, |sym: &Symbol| hash_bytes(endtab.get(*sym))); + sym + } + + pub fn get(&self, sym: Symbol) -> &[u8] { + self.endtab.get(sym) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn user() { + let mut i = Intern::new(); + let hi = i.add("hi".as_bytes()); + let yo = i.add("yo".as_bytes()); + let hi2 = i.add("hi".as_bytes()); + assert_eq!(hi, hi2); + assert_ne!(hi, yo); + + assert_eq!(i.get(hi), "hi".as_bytes()); + assert_eq!(i.get(yo), "yo".as_bytes()); + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..0989e56 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,31 @@ +pub mod canon; +mod db; +mod densemap; +mod depfile; +mod eval; +mod graph; +mod hash; +pub mod load; +pub mod parse; +mod process; +#[cfg(unix)] +mod process_posix; +#[cfg(windows)] +mod process_win; +mod progress; +pub mod run; +pub mod scanner; +mod signal; +mod smallmap; +mod task; +mod terminal; +mod trace; +mod work; + +// TODO: Import Jemalloc into android and re-enable it here. +// #[cfg(not(any(windows, target_arch = "wasm32")))] +// use jemallocator::Jemalloc; + +// #[cfg(not(any(windows, target_arch = "wasm32")))] +// #[global_allocator] +// static GLOBAL: Jemalloc = Jemalloc; diff --git a/src/load.rs b/src/load.rs new file mode 100644 index 0000000..edcf49d --- /dev/null +++ b/src/load.rs @@ -0,0 +1,285 @@ +//! Graph loading: runs .ninja parsing and constructs the build graph from it. + +use crate::{ + canon::{canon_path, canon_path_fast}, + eval::{EvalPart, EvalString}, + graph::{FileId, RspFile}, + parse::Statement, + scanner, + smallmap::SmallMap, + {db, eval, graph, parse, trace}, +}; +use anyhow::{anyhow, bail}; +use std::collections::HashMap; +use std::path::PathBuf; +use std::{borrow::Cow, path::Path}; + +/// A variable lookup environment for magic $in/$out variables. +struct BuildImplicitVars<'a> { + graph: &'a graph::Graph, + build: &'a graph::Build, +} +impl<'a> BuildImplicitVars<'a> { + fn file_list(&self, ids: &[FileId], sep: char) -> String { + let mut out = String::new(); + for &id in ids { + if !out.is_empty() { + out.push(sep); + } + out.push_str(&self.graph.file(id).name); + } + out + } +} +impl<'a> eval::Env for BuildImplicitVars<'a> { + fn get_var(&self, var: &str) -> Option<EvalString<Cow<str>>> { + let string_to_evalstring = + |s: String| Some(EvalString::new(vec![EvalPart::Literal(Cow::Owned(s))])); + match var { + "in" => string_to_evalstring(self.file_list(self.build.explicit_ins(), ' ')), + "in_newline" => string_to_evalstring(self.file_list(self.build.explicit_ins(), '\n')), + "out" => string_to_evalstring(self.file_list(self.build.explicit_outs(), ' ')), + "out_newline" => string_to_evalstring(self.file_list(self.build.explicit_outs(), '\n')), + _ => None, + } + } +} + +/// Internal state used while loading. +#[derive(Default)] +pub struct Loader { + graph: graph::Graph, + default: Vec<FileId>, + /// rule name -> list of (key, val) + rules: HashMap<String, SmallMap<String, eval::EvalString<String>>>, + pools: SmallMap<String, usize>, + builddir: Option<String>, +} + +impl Loader { + pub fn new() -> Self { + let mut loader = Loader::default(); + + loader.rules.insert("phony".to_owned(), SmallMap::default()); + + loader + } + + /// Convert a path string to a FileId. For performance reasons + /// this requires an owned 'path' param. + fn path(&mut self, mut path: String) -> FileId { + // Perf: this is called while parsing build.ninja files. We go to + // some effort to avoid allocating in the common case of a path that + // refers to a file that is already known. + let len = canon_path_fast(&mut path); + path.truncate(len); + self.graph.files.id_from_canonical(path) + } + + fn evaluate_path(&mut self, path: EvalString<&str>, envs: &[&dyn eval::Env]) -> FileId { + self.path(path.evaluate(envs)) + } + + fn evaluate_paths( + &mut self, + paths: Vec<EvalString<&str>>, + envs: &[&dyn eval::Env], + ) -> Vec<FileId> { + paths + .into_iter() + .map(|path| self.evaluate_path(path, envs)) + .collect() + } + + fn add_build( + &mut self, + filename: std::rc::Rc<PathBuf>, + env: &eval::Vars, + b: parse::Build, + ) -> anyhow::Result<()> { + let ins = graph::BuildIns { + ids: self.evaluate_paths(b.ins, &[&b.vars, env]), + explicit: b.explicit_ins, + implicit: b.implicit_ins, + order_only: b.order_only_ins, + // validation is implied by the other counts + }; + let outs = graph::BuildOuts { + ids: self.evaluate_paths(b.outs, &[&b.vars, env]), + explicit: b.explicit_outs, + }; + let mut build = graph::Build::new( + graph::FileLoc { + filename, + line: b.line, + }, + ins, + outs, + ); + + let rule = match self.rules.get(b.rule) { + Some(r) => r, + None => bail!("unknown rule {:?}", b.rule), + }; + + let implicit_vars = BuildImplicitVars { + graph: &self.graph, + build: &build, + }; + + // temp variable in order to not move all of b into the closure + let build_vars = &b.vars; + let lookup = |key: &str| -> Option<String> { + // Look up `key = ...` binding in build and rule block. + Some(match rule.get(key) { + Some(val) => val.evaluate(&[&implicit_vars, build_vars, env]), + None => build_vars.get(key)?.evaluate(&[env]), + }) + }; + + let cmdline = lookup("command"); + let desc = lookup("description"); + let depfile = lookup("depfile"); + let parse_showincludes = match lookup("deps").as_deref() { + None => false, + Some("gcc") => false, + Some("msvc") => true, + Some(other) => bail!("invalid deps attribute {:?}", other), + }; + let pool = lookup("pool"); + + let rspfile_path = lookup("rspfile"); + let rspfile_content = lookup("rspfile_content"); + let rspfile = match (rspfile_path, rspfile_content) { + (None, None) => None, + (Some(path), Some(content)) => Some(RspFile { + path: std::path::PathBuf::from(path), + content, + }), + _ => bail!("rspfile and rspfile_content need to be both specified"), + }; + + build.cmdline = cmdline; + build.desc = desc; + build.depfile = depfile; + build.parse_showincludes = parse_showincludes; + build.rspfile = rspfile; + build.pool = pool; + + self.graph.add_build(build) + } + + fn read_file(&mut self, id: FileId) -> anyhow::Result<()> { + let path = self.graph.file(id).path().to_path_buf(); + let bytes = match trace::scope("read file", || scanner::read_file_with_nul(&path)) { + Ok(b) => b, + Err(e) => bail!("read {}: {}", path.display(), e), + }; + self.parse(path, &bytes) + } + + fn evaluate_and_read_file( + &mut self, + file: EvalString<&str>, + envs: &[&dyn eval::Env], + ) -> anyhow::Result<()> { + let evaluated = self.evaluate_path(file, envs); + self.read_file(evaluated) + } + + pub fn parse(&mut self, path: PathBuf, bytes: &[u8]) -> anyhow::Result<()> { + let filename = std::rc::Rc::new(path); + + let mut parser = parse::Parser::new(&bytes); + + loop { + let stmt = match parser + .read() + .map_err(|err| anyhow!(parser.format_parse_error(&filename, err)))? + { + None => break, + Some(s) => s, + }; + match stmt { + Statement::Include(id) => trace::scope("include", || { + self.evaluate_and_read_file(id, &[&parser.vars]) + })?, + // TODO: implement scoping for subninja + Statement::Subninja(id) => trace::scope("subninja", || { + self.evaluate_and_read_file(id, &[&parser.vars]) + })?, + Statement::Default(defaults) => { + let evaluated = self.evaluate_paths(defaults, &[&parser.vars]); + self.default.extend(evaluated); + } + Statement::Rule(rule) => { + let mut vars: SmallMap<String, eval::EvalString<String>> = SmallMap::default(); + for (name, val) in rule.vars.into_iter() { + // TODO: We should not need to call .into_owned() here + // if we keep the contents of all included files in + // memory. + vars.insert(name.to_owned(), val.into_owned()); + } + self.rules.insert(rule.name.to_owned(), vars); + } + Statement::Build(build) => self.add_build(filename.clone(), &parser.vars, build)?, + Statement::Pool(pool) => { + self.pools.insert(pool.name.to_string(), pool.depth); + } + }; + } + self.builddir = parser.vars.get("builddir").cloned(); + Ok(()) + } +} + +/// State loaded by read(). +pub struct State { + pub graph: graph::Graph, + pub db: db::Writer, + pub hashes: graph::Hashes, + pub default: Vec<FileId>, + pub pools: SmallMap<String, usize>, +} + +/// Load build.ninja/.n2_db and return the loaded build graph and state. +pub fn read(build_filename: &str) -> anyhow::Result<State> { + let mut loader = Loader::new(); + trace::scope("loader.read_file", || { + let id = loader + .graph + .files + .id_from_canonical(canon_path(build_filename)); + loader.read_file(id) + })?; + let mut hashes = graph::Hashes::default(); + let db = trace::scope("db::open", || { + let mut db_path = PathBuf::from(".n2_db"); + if let Some(builddir) = &loader.builddir { + db_path = Path::new(&builddir).join(db_path); + if let Some(parent) = db_path.parent() { + std::fs::create_dir_all(parent)?; + } + }; + db::open(&db_path, &mut loader.graph, &mut hashes) + }) + .map_err(|err| anyhow!("load .n2_db: {}", err))?; + Ok(State { + graph: loader.graph, + db, + hashes, + default: loader.default, + pools: loader.pools, + }) +} + +/// Parse a single file's content. +#[cfg(test)] +pub fn parse(name: &str, mut content: Vec<u8>) -> anyhow::Result<graph::Graph> { + content.push(0); + let mut loader = Loader::new(); + trace::scope("loader.read_file", || { + loader.parse(PathBuf::from(name), &content) + })?; + Ok(loader.graph) +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..bec0a2d --- /dev/null +++ b/src/main.rs @@ -0,0 +1,12 @@ +fn main() { + let exit_code = match n2::run::run() { + Ok(code) => code, + Err(err) => { + println!("n2: error: {}", err); + 1 + } + }; + if exit_code != 0 { + std::process::exit(exit_code); + } +} diff --git a/src/parse.rs b/src/parse.rs new file mode 100644 index 0000000..04a8397 --- /dev/null +++ b/src/parse.rs @@ -0,0 +1,514 @@ +//! Parser for .ninja files. +//! +//! See design notes on parsing in doc/design_notes.md. +//! +//! To avoid allocations parsing frequently uses references into the input +//! text, marked with the lifetime `'text`. + +use crate::{ + eval::{EvalPart, EvalString, Vars}, + scanner::{ParseError, ParseResult, Scanner}, + smallmap::SmallMap, +}; +use std::path::Path; + +/// A list of variable bindings, as expressed with syntax like: +/// key = $val +pub type VarList<'text> = SmallMap<&'text str, EvalString<&'text str>>; + +pub struct Rule<'text> { + pub name: &'text str, + pub vars: VarList<'text>, +} + +pub struct Build<'text> { + pub rule: &'text str, + pub line: usize, + pub outs: Vec<EvalString<&'text str>>, + pub explicit_outs: usize, + pub ins: Vec<EvalString<&'text str>>, + pub explicit_ins: usize, + pub implicit_ins: usize, + pub order_only_ins: usize, + pub validation_ins: usize, + pub vars: VarList<'text>, +} + +#[derive(Debug)] +pub struct Pool<'text> { + pub name: &'text str, + pub depth: usize, +} + +pub enum Statement<'text> { + Rule(Rule<'text>), + Build(Build<'text>), + Default(Vec<EvalString<&'text str>>), + Include(EvalString<&'text str>), + Subninja(EvalString<&'text str>), + Pool(Pool<'text>), +} + +pub struct Parser<'text> { + scanner: Scanner<'text>, + pub vars: Vars<'text>, + /// Reading EvalStrings is very hot when parsing, so we always read into + /// this buffer and then clone it afterwards. + eval_buf: Vec<EvalPart<&'text str>>, +} + +impl<'text> Parser<'text> { + pub fn new(buf: &'text [u8]) -> Parser<'text> { + Parser { + scanner: Scanner::new(buf), + vars: Vars::default(), + eval_buf: Vec::with_capacity(16), + } + } + + pub fn format_parse_error(&self, filename: &Path, err: ParseError) -> String { + self.scanner.format_parse_error(filename, err) + } + + pub fn read(&mut self) -> ParseResult<Option<Statement<'text>>> { + loop { + match self.scanner.peek() { + '\0' => return Ok(None), + '\n' | '\r' => self.scanner.next(), + '#' => self.skip_comment()?, + ' ' | '\t' => return self.scanner.parse_error("unexpected whitespace"), + _ => { + let ident = self.read_ident()?; + self.skip_spaces(); + match ident { + "rule" => return Ok(Some(Statement::Rule(self.read_rule()?))), + "build" => return Ok(Some(Statement::Build(self.read_build()?))), + "default" => return Ok(Some(Statement::Default(self.read_default()?))), + "include" => { + return Ok(Some(Statement::Include(self.read_eval(false)?))); + } + "subninja" => { + return Ok(Some(Statement::Subninja(self.read_eval(false)?))); + } + "pool" => return Ok(Some(Statement::Pool(self.read_pool()?))), + ident => { + // TODO: The evaluation of global variables should + // be moved out of the parser, so that we can run + // multiple parsers in parallel and then evaluate + // all the variables in series at the end. + let val = self.read_vardef()?.evaluate(&[&self.vars]); + self.vars.insert(ident, val); + } + } + } + } + } + } + + /// Read the `= ...` part of a variable definition. + fn read_vardef(&mut self) -> ParseResult<EvalString<&'text str>> { + self.skip_spaces(); + self.scanner.expect('=')?; + self.skip_spaces(); + // read_eval will error out if there's nothing to read + if self.scanner.peek_newline() { + self.scanner.skip('\r'); + self.scanner.expect('\n')?; + return Ok(EvalString::new(Vec::new())); + } + let result = self.read_eval(false); + self.scanner.skip('\r'); + self.scanner.expect('\n')?; + result + } + + /// Read a collection of ` foo = bar` variables, with leading indent. + fn read_scoped_vars( + &mut self, + variable_name_validator: fn(var: &str) -> bool, + ) -> ParseResult<VarList<'text>> { + let mut vars = VarList::default(); + while self.scanner.peek() == ' ' { + self.scanner.skip_spaces(); + let name = self.read_ident()?; + if !variable_name_validator(name) { + self.scanner + .parse_error(format!("unexpected variable {:?}", name))?; + } + self.skip_spaces(); + let val = self.read_vardef()?; + vars.insert(name, val); + } + Ok(vars) + } + + fn read_rule(&mut self) -> ParseResult<Rule<'text>> { + let name = self.read_ident()?; + self.scanner.skip('\r'); + self.scanner.expect('\n')?; + let vars = self.read_scoped_vars(|var| { + matches!( + var, + "command" + | "depfile" + | "dyndep" + | "description" + | "deps" + | "generator" + | "pool" + | "restat" + | "rspfile" + | "rspfile_content" + | "msvc_deps_prefix" + ) + })?; + Ok(Rule { name, vars }) + } + + fn read_pool(&mut self) -> ParseResult<Pool<'text>> { + let name = self.read_ident()?; + self.scanner.skip('\r'); + self.scanner.expect('\n')?; + let vars = self.read_scoped_vars(|var| matches!(var, "depth"))?; + let mut depth = 0; + if let Some((_, val)) = vars.into_iter().next() { + let val = val.evaluate(&[]); + depth = match val.parse::<usize>() { + Ok(d) => d, + Err(err) => return self.scanner.parse_error(format!("pool depth: {}", err)), + } + } + Ok(Pool { name, depth }) + } + + fn read_unevaluated_paths_to( + &mut self, + v: &mut Vec<EvalString<&'text str>>, + ) -> ParseResult<()> { + self.skip_spaces(); + while self.scanner.peek() != ':' + && self.scanner.peek() != '|' + && !self.scanner.peek_newline() + { + v.push(self.read_eval(true)?); + self.skip_spaces(); + } + Ok(()) + } + + fn read_build(&mut self) -> ParseResult<Build<'text>> { + let line = self.scanner.line; + let mut outs = Vec::new(); + self.read_unevaluated_paths_to(&mut outs)?; + let explicit_outs = outs.len(); + + if self.scanner.peek() == '|' { + self.scanner.next(); + self.read_unevaluated_paths_to(&mut outs)?; + } + + self.scanner.expect(':')?; + self.skip_spaces(); + let rule = self.read_ident()?; + + let mut ins = Vec::new(); + self.read_unevaluated_paths_to(&mut ins)?; + let explicit_ins = ins.len(); + + if self.scanner.peek() == '|' { + self.scanner.next(); + let peek = self.scanner.peek(); + if peek == '|' || peek == '@' { + self.scanner.back(); + } else { + self.read_unevaluated_paths_to(&mut ins)?; + } + } + let implicit_ins = ins.len() - explicit_ins; + + if self.scanner.peek() == '|' { + self.scanner.next(); + if self.scanner.peek() == '@' { + self.scanner.back(); + } else { + self.scanner.expect('|')?; + self.read_unevaluated_paths_to(&mut ins)?; + } + } + let order_only_ins = ins.len() - implicit_ins - explicit_ins; + + if self.scanner.peek() == '|' { + self.scanner.next(); + self.scanner.expect('@')?; + self.read_unevaluated_paths_to(&mut ins)?; + } + let validation_ins = ins.len() - order_only_ins - implicit_ins - explicit_ins; + + self.scanner.skip('\r'); + self.scanner.expect('\n')?; + let vars = self.read_scoped_vars(|_| true)?; + Ok(Build { + rule, + line, + outs, + explicit_outs, + ins, + explicit_ins, + implicit_ins, + order_only_ins, + validation_ins, + vars, + }) + } + + fn read_default(&mut self) -> ParseResult<Vec<EvalString<&'text str>>> { + let mut defaults = Vec::new(); + self.read_unevaluated_paths_to(&mut defaults)?; + if defaults.is_empty() { + return self.scanner.parse_error("expected path"); + } + self.scanner.skip('\r'); + self.scanner.expect('\n')?; + Ok(defaults) + } + + fn skip_comment(&mut self) -> ParseResult<()> { + loop { + match self.scanner.read() { + '\0' => { + self.scanner.back(); + return Ok(()); + } + '\n' => return Ok(()), + _ => {} + } + } + } + + /// Read an identifier -- rule name, pool name, variable name, etc. + fn read_ident(&mut self) -> ParseResult<&'text str> { + let start = self.scanner.ofs; + while matches!( + self.scanner.read(), + 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '-' | '.' + ) {} + self.scanner.back(); + let end = self.scanner.ofs; + if end == start { + return self.scanner.parse_error("failed to scan ident"); + } + Ok(self.scanner.slice(start, end)) + } + + /// Reads an EvalString. Stops at either a newline, or ' ', ':', '|' if + /// stop_at_path_separators is set, without consuming the character that + /// caused it to stop. + fn read_eval(&mut self, stop_at_path_separators: bool) -> ParseResult<EvalString<&'text str>> { + self.eval_buf.clear(); + let mut ofs = self.scanner.ofs; + // This match block is copied twice, with the only difference being the check for + // spaces, colons, and pipes in the stop_at_path_separators version. We could remove the + // duplication by adding a match branch like `' ' | ':' | '|' if stop_at_path_separators =>` + // or even moving the `if stop_at_path_separators` inside of the match body, but both of + // those options are ~10% slower on a benchmark test of running the loader on llvm-cmake + // ninja files. + let end = if stop_at_path_separators { + loop { + match self.scanner.read() { + '\0' => return self.scanner.parse_error("unexpected EOF"), + ' ' | ':' | '|' | '\n' => { + self.scanner.back(); + break self.scanner.ofs; + } + '\r' if self.scanner.peek() == '\n' => { + self.scanner.back(); + break self.scanner.ofs; + } + '$' => { + let end = self.scanner.ofs - 1; + if end > ofs { + self.eval_buf + .push(EvalPart::Literal(self.scanner.slice(ofs, end))); + } + let escape = self.read_escape()?; + self.eval_buf.push(escape); + ofs = self.scanner.ofs; + } + _ => {} + } + } + } else { + loop { + match self.scanner.read() { + '\0' => return self.scanner.parse_error("unexpected EOF"), + '\n' => { + self.scanner.back(); + break self.scanner.ofs; + } + '\r' if self.scanner.peek() == '\n' => { + self.scanner.back(); + break self.scanner.ofs; + } + '$' => { + let end = self.scanner.ofs - 1; + if end > ofs { + self.eval_buf + .push(EvalPart::Literal(self.scanner.slice(ofs, end))); + } + let escape = self.read_escape()?; + self.eval_buf.push(escape); + ofs = self.scanner.ofs; + } + _ => {} + } + } + }; + if end > ofs { + self.eval_buf + .push(EvalPart::Literal(self.scanner.slice(ofs, end))); + } + if self.eval_buf.is_empty() { + return self.scanner.parse_error(format!("Expected a string")); + } + Ok(EvalString::new(self.eval_buf.clone())) + } + + /// Read a variable name as found after a '$' in an eval. + /// Ninja calls this a "simple" varname and it is the same as read_ident without + /// period allowed(!), I guess because we expect things like + /// foo = $bar.d + /// to parse as a reference to $bar. + fn read_simple_varname(&mut self) -> ParseResult<&'text str> { + let start = self.scanner.ofs; + while matches!(self.scanner.read(), 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '-') {} + self.scanner.back(); + let end = self.scanner.ofs; + if end == start { + return self.scanner.parse_error("failed to scan variable name"); + } + Ok(self.scanner.slice(start, end)) + } + + /// Read and interpret the text following a '$' escape character. + fn read_escape(&mut self) -> ParseResult<EvalPart<&'text str>> { + Ok(match self.scanner.read() { + '\n' | '\r' => { + self.scanner.skip_spaces(); + EvalPart::Literal(self.scanner.slice(0, 0)) + } + ' ' | '$' | ':' => { + EvalPart::Literal(self.scanner.slice(self.scanner.ofs - 1, self.scanner.ofs)) + } + '{' => { + let start = self.scanner.ofs; + loop { + match self.scanner.read() { + '\0' => return self.scanner.parse_error("unexpected EOF"), + '}' => break, + _ => {} + } + } + let end = self.scanner.ofs - 1; + EvalPart::VarRef(self.scanner.slice(start, end)) + } + _ => { + // '$' followed by some other text. + self.scanner.back(); + let var = self.read_simple_varname()?; + EvalPart::VarRef(var) + } + }) + } + + fn skip_spaces(&mut self) { + loop { + match self.scanner.read() { + ' ' => {} + '$' => { + if self.scanner.peek() != '\n' { + self.scanner.back(); + return; + } + self.scanner.next(); + } + _ => { + self.scanner.back(); + return; + } + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn test_case_buffer(test_case: &str) -> Vec<u8> { + let mut buf = test_case.as_bytes().to_vec(); + buf.push(0); + buf + } + + fn test_for_line_endings(input: &[&str], test: fn(&str)) { + let test_case_lf = input.join("\n"); + let test_case_crlf = input.join("\r\n"); + for test_case in [test_case_lf, test_case_crlf] { + test(&test_case); + } + } + + #[test] + fn parse_defaults() { + test_for_line_endings(&["var = 3", "default a b$var c", ""], |test_case| { + let mut buf = test_case_buffer(test_case); + let mut parser = Parser::new(&mut buf); + let default = match parser.read().unwrap().unwrap() { + Statement::Default(d) => d, + _ => panic!("expected default"), + }; + assert_eq!( + default, + vec![ + EvalString::new(vec![EvalPart::Literal("a")]), + EvalString::new(vec![EvalPart::Literal("b"), EvalPart::VarRef("var")]), + EvalString::new(vec![EvalPart::Literal("c")]), + ] + ); + }); + } + + #[test] + fn parse_dot_in_eval() { + let mut buf = test_case_buffer("x = $y.z\n"); + let mut parser = Parser::new(&mut buf); + parser.read().unwrap(); + let x = parser.vars.get("x").unwrap(); + assert_eq!(x, ".z"); + } + + #[test] + fn parse_dot_in_rule() { + let mut buf = test_case_buffer("rule x.y\n command = x\n"); + let mut parser = Parser::new(&mut buf); + let stmt = parser.read().unwrap().unwrap(); + assert!(matches!( + stmt, + Statement::Rule(Rule { + name: "x.y", + vars: _ + }) + )); + } + + #[test] + fn parse_trailing_newline() { + let mut buf = test_case_buffer("build$\n foo$\n : $\n touch $\n\n"); + let mut parser = Parser::new(&mut buf); + let stmt = parser.read().unwrap().unwrap(); + assert!(matches!( + stmt, + Statement::Build(Build { rule: "touch", .. }) + )); + } +} diff --git a/src/process.rs b/src/process.rs new file mode 100644 index 0000000..ae67bc3 --- /dev/null +++ b/src/process.rs @@ -0,0 +1,21 @@ +//! Exposes process::run_command, a wrapper around platform-native process execution. + +#[cfg(unix)] +pub use crate::process_posix::run_command; +#[cfg(windows)] +pub use crate::process_win::run_command; + +#[cfg(target_arch = "wasm32")] +fn run_command( + cmdline: &str, + mut output_cb: impl FnMut(&[u8]), +) -> anyhow::Result<(Termination, Vec<u8>)> { + anyhow::bail!("wasm cannot run commands"); +} + +#[derive(Debug, PartialEq)] +pub enum Termination { + Success, + Interrupted, + Failure, +} diff --git a/src/process_posix.rs b/src/process_posix.rs new file mode 100644 index 0000000..d6047d9 --- /dev/null +++ b/src/process_posix.rs @@ -0,0 +1,245 @@ +//! Implements run_command on posix using posix_spawn. +//! See run_command comments for why. + +use crate::process::Termination; +use std::io::{Error, Read}; +use std::os::fd::FromRawFd; +use std::os::unix::process::ExitStatusExt; + +// https://github.com/rust-lang/libc/issues/2520 +// libc crate doesn't expose the 'environ' pointer. +extern "C" { + static environ: *const *mut libc::c_char; +} + +fn check_posix_spawn(func: &str, ret: libc::c_int) -> anyhow::Result<()> { + if ret != 0 { + let err_str = unsafe { std::ffi::CStr::from_ptr(libc::strerror(ret)) }; + anyhow::bail!("{}: {}", func, err_str.to_str().unwrap()); + } + Ok(()) +} + +fn check_ret_errno(func: &str, ret: libc::c_int) -> anyhow::Result<()> { + if ret < 0 { + let errno = Error::last_os_error().raw_os_error().unwrap(); + let err_str = unsafe { std::ffi::CStr::from_ptr(libc::strerror(errno)) }; + anyhow::bail!("{}: {}", func, err_str.to_str().unwrap()); + } + Ok(()) +} + +/// Wraps libc::posix_spawnattr_t, in particular to implement Drop. +struct PosixSpawnAttr(libc::posix_spawnattr_t); + +impl PosixSpawnAttr { + fn new() -> anyhow::Result<Self> { + unsafe { + let mut attr: libc::posix_spawnattr_t = std::mem::zeroed(); + check_posix_spawn( + "posix_spawnattr_init", + libc::posix_spawnattr_init(&mut attr), + )?; + Ok(Self(attr)) + } + } + + fn as_ptr(&mut self) -> *mut libc::posix_spawnattr_t { + &mut self.0 + } + + fn setflags(&mut self, flags: libc::c_short) -> anyhow::Result<()> { + unsafe { + check_posix_spawn( + "posix_spawnattr_setflags", + libc::posix_spawnattr_setflags(self.as_ptr(), flags), + ) + } + } +} + +impl Drop for PosixSpawnAttr { + fn drop(&mut self) { + unsafe { + libc::posix_spawnattr_destroy(self.as_ptr()); + } + } +} + +/// Wraps libc::posix_spawn_file_actions_t, in particular to implement Drop. +struct PosixSpawnFileActions(libc::posix_spawn_file_actions_t); + +impl PosixSpawnFileActions { + fn new() -> anyhow::Result<Self> { + unsafe { + let mut actions: libc::posix_spawn_file_actions_t = std::mem::zeroed(); + check_posix_spawn( + "posix_spawn_file_actions_init", + libc::posix_spawn_file_actions_init(&mut actions), + )?; + Ok(Self(actions)) + } + } + + fn as_ptr(&mut self) -> *mut libc::posix_spawn_file_actions_t { + &mut self.0 + } + + fn addopen( + &mut self, + fd: i32, + path: &std::ffi::CStr, + oflag: i32, + mode: libc::mode_t, + ) -> anyhow::Result<()> { + unsafe { + check_posix_spawn( + "posix_spawn_file_actions_addopen", + libc::posix_spawn_file_actions_addopen( + self.as_ptr(), + fd, + path.as_ptr(), + oflag, + mode, + ), + ) + } + } + + fn adddup2(&mut self, fd: i32, newfd: i32) -> anyhow::Result<()> { + unsafe { + check_posix_spawn( + "posix_spawn_file_actions_adddup2", + libc::posix_spawn_file_actions_adddup2(self.as_ptr(), fd, newfd), + ) + } + } + + fn addclose(&mut self, fd: i32) -> anyhow::Result<()> { + unsafe { + check_posix_spawn( + "posix_spawn_file_actions_addclose", + libc::posix_spawn_file_actions_addclose(self.as_ptr(), fd), + ) + } + } +} + +impl Drop for PosixSpawnFileActions { + fn drop(&mut self) { + unsafe { libc::posix_spawn_file_actions_destroy(&mut self.0) }; + } +} + +/// Create an anonymous pipe as in libc::pipe(), but using pipe2() when available +/// to set CLOEXEC flag. +fn pipe2() -> anyhow::Result<[libc::c_int; 2]> { + // Compare to: https://doc.rust-lang.org/src/std/sys/unix/pipe.rs.html + unsafe { + let mut pipe: [libc::c_int; 2] = std::mem::zeroed(); + + // Mac: specially handled below with POSIX_SPAWN_CLOEXEC_DEFAULT + #[cfg(target_os = "macos")] + check_ret_errno("pipe", libc::pipe(pipe.as_mut_ptr()))?; + + // Assume all non-Mac have pipe2; we can refine this on user feedback. + #[cfg(all(unix, not(target_os = "macos")))] + check_ret_errno("pipe", libc::pipe2(pipe.as_mut_ptr(), libc::O_CLOEXEC))?; + + Ok(pipe) + } +} + +pub fn run_command(cmdline: &str, mut output_cb: impl FnMut(&[u8])) -> anyhow::Result<Termination> { + // Spawn the subprocess using posix_spawn with output redirected to the pipe. + // We don't use Rust's process spawning because of issue #14 and because + // we want to feed both stdout and stderr into the same pipe, which cannot + // be done with the existing std::process API. + let (pid, mut pipe) = unsafe { + let pipe = pipe2()?; + + let mut attr = PosixSpawnAttr::new()?; + + // Apple-specific extension: close any open fds. + #[cfg(target_os = "macos")] + attr.setflags(libc::POSIX_SPAWN_CLOEXEC_DEFAULT as _)?; + + let mut actions = PosixSpawnFileActions::new()?; + // open /dev/null over stdin + actions.addopen( + 0, + std::ffi::CStr::from_bytes_with_nul_unchecked(b"/dev/null\0"), + libc::O_RDONLY, + 0, + )?; + // stdout/stderr => pipe + actions.adddup2(pipe[1], 1)?; + actions.adddup2(pipe[1], 2)?; + // close pipe in child + actions.addclose(pipe[0])?; + actions.addclose(pipe[1])?; + + let mut pid: libc::pid_t = 0; + let path = std::ffi::CStr::from_bytes_with_nul_unchecked(b"/bin/sh\0"); + let cmdline_nul = std::ffi::CString::new(cmdline).unwrap(); + let argv: [*const libc::c_char; 4] = [ + path.as_ptr(), + b"-c\0".as_ptr() as *const _, + cmdline_nul.as_ptr(), + std::ptr::null(), + ]; + + check_posix_spawn( + "posix_spawn", + libc::posix_spawn( + &mut pid, + path.as_ptr(), + actions.as_ptr(), + attr.as_ptr(), + // posix_spawn wants mutable argv: + // https://stackoverflow.com/questions/50596439/can-string-literals-be-passed-in-posix-spawns-argv + argv.as_ptr() as *const *mut _, + environ, + ), + )?; + + check_ret_errno("close", libc::close(pipe[1]))?; + + (pid, std::fs::File::from_raw_fd(pipe[0])) + }; + + let mut buf: [u8; 4 << 10] = [0; 4 << 10]; + loop { + let n = pipe.read(&mut buf)?; + if n == 0 { + break; + } + output_cb(&buf[0..n]); + } + drop(pipe); + + let status = unsafe { + let mut status: i32 = 0; + check_ret_errno("waitpid", libc::waitpid(pid, &mut status, 0))?; + std::process::ExitStatus::from_raw(status) + }; + + let termination = if status.success() { + Termination::Success + } else if let Some(sig) = status.signal() { + match sig { + libc::SIGINT => { + output_cb("interrupted".as_bytes()); + Termination::Interrupted + } + _ => { + output_cb(format!("signal {}", sig).as_bytes()); + Termination::Failure + } + } + } else { + Termination::Failure + }; + + Ok(termination) +} diff --git a/src/process_win.rs b/src/process_win.rs new file mode 100644 index 0000000..6b8aa17 --- /dev/null +++ b/src/process_win.rs @@ -0,0 +1,302 @@ +//! Implements run_command on Windows using native Windows calls. +//! See run_command comments for why. + +use crate::process::Termination; +use std::ffi::c_void; +use std::io::Read; +use std::os::windows::io::{FromRawHandle, OwnedHandle}; +use std::os::windows::prelude::AsRawHandle; +use std::pin::{pin, Pin}; +use windows_sys::Win32::{ + Foundation::*, + Security::SECURITY_ATTRIBUTES, + System::{Console::*, Diagnostics::Debug::*, Pipes::CreatePipe, Threading::*}, +}; + +fn get_error_string(err: u32) -> String { + let mut buf: [u8; 1024] = [0; 1024]; + let len = unsafe { + FormatMessageA( + FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, + std::ptr::null(), + err, + 0x0000_0400, // MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT) + buf.as_mut_ptr(), + buf.len() as u32, + std::ptr::null(), + ) + }; + if len == 0 { + panic!("FormatMessageA on error failed: {}", err); + } + std::str::from_utf8(&buf[..len as usize]) + .unwrap() + .trim_end() + .to_owned() +} + +/// Construct an error from GetLastError(). +fn windows_error(func: &str) -> anyhow::Error { + let err = unsafe { GetLastError() }; + anyhow::anyhow!("{}: {}", func, get_error_string(err)) +} +/// Return an Err from the current function with GetLastError info in it. +macro_rules! win_bail { + ($func:ident) => { + return Err(windows_error(stringify!($func))); + }; +} + +/// Wrapper for PROCESS_INFORMATION that cleans up on Drop. +struct ProcessInformation(PROCESS_INFORMATION); + +impl ProcessInformation { + fn new() -> Self { + Self(unsafe { std::mem::zeroed() }) + } + fn as_mut_ptr(&mut self) -> *mut PROCESS_INFORMATION { + &mut self.0 + } +} + +impl std::ops::Deref for ProcessInformation { + type Target = PROCESS_INFORMATION; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} +impl std::ops::DerefMut for ProcessInformation { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} +impl Drop for ProcessInformation { + fn drop(&mut self) { + unsafe { + if self.hProcess != 0 { + CloseHandle(self.hProcess); + } + if self.hThread != 0 { + CloseHandle(self.hThread); + } + } + } +} + +/// Wrapper for PROC_THREAD_ATTRIBUTE_LIST. +/// Per MSDN: attribute values "must persist until the attribute list is +/// destroyed using the DeleteProcThreadAttributeList function", which is +/// captured by the 'a lifetime. +struct ProcThreadAttributeList<'a> { + /// The PROC_THREAD_ATTRIBUTE_LIST; this is a type whose size we discover at runtime. + raw: Box<[u8]>, + /// The inherit_handles pointer. + _marker: std::marker::PhantomData<&'a [HANDLE]>, +} +impl<'a> ProcThreadAttributeList<'a> { + fn new(count: usize) -> anyhow::Result<Self> { + unsafe { + let mut size = 0; + if InitializeProcThreadAttributeList(std::ptr::null_mut(), count as u32, 0, &mut size) + == 0 + { + if GetLastError() != ERROR_INSUFFICIENT_BUFFER { + win_bail!(InitializeProcThreadAttributeList); + } + } + + let mut buf = vec![0u8; size].into_boxed_slice(); + if InitializeProcThreadAttributeList( + buf.as_mut_ptr() as LPPROC_THREAD_ATTRIBUTE_LIST, + count as u32, + 0, + &mut size, + ) == 0 + { + win_bail!(InitializeProcThreadAttributeList); + } + Ok(Self { + raw: buf, + _marker: std::marker::PhantomData, + }) + } + } + + /// Mark some handles as to be inherited. + fn inherit_handles(&mut self, handles: Pin<&'a [HANDLE]>) -> anyhow::Result<()> { + unsafe { + if UpdateProcThreadAttribute( + self.as_mut_ptr(), + 0, + PROC_THREAD_ATTRIBUTE_HANDLE_LIST as usize, + handles.as_ptr() as *const c_void, + handles.len() * std::mem::size_of::<HANDLE>(), + std::ptr::null_mut(), + std::ptr::null_mut(), + ) == 0 + { + win_bail!(UpdateProcThreadAttribute); + } + } + Ok(()) + } + + fn as_mut_ptr(&mut self) -> LPPROC_THREAD_ATTRIBUTE_LIST { + self.raw.as_mut_ptr() as LPPROC_THREAD_ATTRIBUTE_LIST + } +} + +impl<'a> Drop for ProcThreadAttributeList<'a> { + fn drop(&mut self) { + unsafe { DeleteProcThreadAttributeList(self.as_mut_ptr()) }; + } +} + +pub fn run_command(cmdline: &str, mut output_cb: impl FnMut(&[u8])) -> anyhow::Result<Termination> { + // Don't want to run `cmd /c` since that limits cmd line length to 8192 bytes. + // std::process::Command can't take a string and pass it through to CreateProcess unchanged, + // so call that ourselves. + // https://github.com/rust-lang/rust/issues/38227 + + let (pipe_read, pipe_write) = unsafe { + let mut pipe_read: HANDLE = 0; + let mut pipe_write: HANDLE = 0; + let mut attrs = std::mem::zeroed::<SECURITY_ATTRIBUTES>(); + attrs.nLength = std::mem::size_of::<SECURITY_ATTRIBUTES>() as u32; + attrs.bInheritHandle = TRUE; + if CreatePipe( + &mut pipe_read, + &mut pipe_write, + &mut attrs, + /* use default buffer size */ 0, + ) == 0 + { + win_bail!(CreatePipe); + } + ( + OwnedHandle::from_raw_handle(pipe_read as *mut c_void), + OwnedHandle::from_raw_handle(pipe_write as *mut c_void), + ) + }; + + let process_info = unsafe { + // TODO: Set this to just 0 for console pool jobs. + let process_flags = CREATE_NEW_PROCESS_GROUP | EXTENDED_STARTUPINFO_PRESENT; + + let mut startup_info = std::mem::zeroed::<STARTUPINFOEXA>(); + startup_info.StartupInfo.cb = std::mem::size_of::<STARTUPINFOEXA>() as u32; + startup_info.StartupInfo.dwFlags = STARTF_USESTDHANDLES; + startup_info.StartupInfo.hStdInput = GetStdHandle(STD_INPUT_HANDLE); + let raw_pipe_write = pipe_write.as_raw_handle() as isize; + startup_info.StartupInfo.hStdOutput = raw_pipe_write; + startup_info.StartupInfo.hStdError = raw_pipe_write; + + // Safely inherit in/out handles. + // https://devblogs.microsoft.com/oldnewthing/20111216-00/?p=8873 + let handles = pin!([startup_info.StartupInfo.hStdInput, raw_pipe_write]); + let mut attrs = ProcThreadAttributeList::new(1)?; + attrs.inherit_handles(handles)?; + startup_info.lpAttributeList = attrs.as_mut_ptr(); + + let mut process_info = ProcessInformation::new(); + + let mut cmdline_nul: Vec<u8> = String::from(cmdline).into_bytes(); + cmdline_nul.push(0); + + if CreateProcessA( + std::ptr::null_mut(), + cmdline_nul.as_mut_ptr(), + std::ptr::null_mut(), + std::ptr::null_mut(), + /*inherit handles = */ TRUE, + process_flags, + std::ptr::null_mut(), + std::ptr::null_mut(), + &mut startup_info.StartupInfo, + process_info.as_mut_ptr(), + ) == 0 + { + let err = GetLastError(); + if err == ERROR_INVALID_PARAMETER { + if cmdline.is_empty() { + anyhow::bail!("CreateProcess failed: command is empty"); + } + if let Some(first_char) = cmdline.bytes().nth(0) { + if first_char == b' ' || first_char == b'\t' { + anyhow::bail!("CreateProcess failed: command has leading whitespace"); + } + } + } + win_bail!(CreateProcessA); + } + drop(pipe_write); + + process_info + }; + + let mut pipe = std::fs::File::from(pipe_read); + let mut buf: [u8; 4 << 10] = [0; 4 << 10]; + loop { + let n = pipe.read(&mut buf)?; + if n == 0 { + break; + } + output_cb(&buf[0..n]); + } + + let exit_code = unsafe { + if WaitForSingleObject(process_info.hProcess, INFINITE) != 0 { + win_bail!(WaitForSingleObject); + } + + let mut exit_code: u32 = 0; + if GetExitCodeProcess(process_info.hProcess, &mut exit_code) == 0 { + win_bail!(GetExitCodeProcess); + } + + exit_code + }; + + let termination = match exit_code { + 0 => Termination::Success, + 0xC000013A => Termination::Interrupted, + _ => Termination::Failure, + }; + + Ok(termination) +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Simple command that is expected to succeed. + #[test] + fn run_echo() -> anyhow::Result<()> { + let mut output = Vec::new(); + run_command("cmd /c echo hello", |buf| output.extend_from_slice(buf))?; + assert_eq!(output, b"hello\r\n"); + Ok(()) + } + + /// Expect empty command to be specially handled in errors. + #[test] + fn empty_command() -> anyhow::Result<()> { + let mut output = Vec::new(); + let err = + run_command("", |buf| output.extend_from_slice(buf)).expect_err("expected failure"); + assert!(err.to_string().contains("command is empty")); + Ok(()) + } + + /// Expect leading whitespace to be specially handled in errors. + #[test] + fn initial_space() -> anyhow::Result<()> { + let mut output = Vec::new(); + let err = run_command(" cmd /c echo hello", |buf| output.extend_from_slice(buf)) + .expect_err("expected failure"); + assert!(err.to_string().contains("command has leading whitespace")); + Ok(()) + } +} diff --git a/src/progress.rs b/src/progress.rs new file mode 100644 index 0000000..6ba1418 --- /dev/null +++ b/src/progress.rs @@ -0,0 +1,453 @@ +//! Build progress tracking and reporting, for the purpose of display to the +//! user. + +use crate::{ + graph::Build, graph::BuildId, process::Termination, task::TaskResult, terminal, + work::BuildState, work::StateCounts, +}; +use std::collections::VecDeque; +use std::io::Write; +use std::sync::Arc; +use std::sync::Condvar; +use std::sync::Mutex; +use std::time::Duration; +use std::time::Instant; + +/// Compute the message to display on the console for a given build. +pub fn build_message(build: &Build) -> &str { + build + .desc + .as_ref() + .filter(|desc| !desc.is_empty()) + .unwrap_or_else(|| build.cmdline.as_ref().unwrap()) +} + +/// Trait for build progress notifications. +pub trait Progress { + /// Called as individual build tasks progress through build states. + fn update(&mut self, counts: &StateCounts); + + /// Called when a task starts. + fn task_started(&mut self, id: BuildId, build: &Build); + + /// Called when a task's last line of output changes. + fn task_output(&mut self, id: BuildId, line: Vec<u8>); + + /// Called when a task completes. + fn task_finished(&mut self, id: BuildId, build: &Build, result: &TaskResult); + + /// Log a line of output without corrupting the progress display. + /// This line is persisted beyond further progress updates. For example, + /// used when a task fails; we want the final output to show that failed + /// task's output even if we do more work after it fails. + fn log(&mut self, msg: &str); +} + +/// Currently running build task, as tracked for progress updates. +struct Task { + id: BuildId, + /// When the task started running. + start: Instant, + /// Build status message for the task. + message: String, + /// Last line of output from the task. + last_line: Option<String>, +} + +/// Progress implementation for "dumb" console, without any overprinting. +#[derive(Default)] +pub struct DumbConsoleProgress { + /// Whether to print command lines of started programs. + verbose: bool, + + /// The id of the last command printed, used to avoid printing it twice + /// when we have two updates from the same command in a row. + last_started: Option<BuildId>, +} + +impl DumbConsoleProgress { + pub fn new(verbose: bool) -> Self { + Self { + verbose, + last_started: None, + } + } +} + +impl Progress for DumbConsoleProgress { + fn update(&mut self, _counts: &StateCounts) { + // ignore + } + + fn task_started(&mut self, id: BuildId, build: &Build) { + self.log(if self.verbose { + build.cmdline.as_ref().unwrap() + } else { + build_message(build) + }); + self.last_started = Some(id); + } + + fn task_output(&mut self, _id: BuildId, _line: Vec<u8>) { + // ignore + } + + fn task_finished(&mut self, id: BuildId, build: &Build, result: &TaskResult) { + match result.termination { + Termination::Success => { + if result.output.is_empty() || self.last_started == Some(id) { + // Output is empty, or we just printed the command, don't print it again. + } else { + self.log(build_message(build)) + } + } + Termination::Interrupted => self.log(&format!("interrupted: {}", build_message(build))), + Termination::Failure => self.log(&format!("failed: {}", build_message(build))), + }; + if !result.output.is_empty() { + std::io::stdout().write_all(&result.output).unwrap(); + } + } + + fn log(&mut self, msg: &str) { + println!("{}", msg); + } +} + +/// Progress implementation for "fancy" console, with progress bar etc. +/// Each time it prints, it clears from the cursor to the end of the console, +/// prints the status text, and then moves moves the cursor back up to the +/// start position. This means on errors etc. we can clear any status by +/// clearing the console too. +pub struct FancyConsoleProgress { + state: Arc<Mutex<FancyState>>, +} + +/// Screen updates happen after this duration passes, to reduce the amount +/// of printing in the case of rapid updates. This helps with terminal flicker. +const UPDATE_DELAY: Duration = std::time::Duration::from_millis(50); + +impl FancyConsoleProgress { + pub fn new(verbose: bool) -> Self { + let dirty_cond = Arc::new(Condvar::new()); + let state = Arc::new(Mutex::new(FancyState { + done: false, + dirty: false, + dirty_cond: dirty_cond.clone(), + counts: StateCounts::default(), + tasks: VecDeque::new(), + verbose, + })); + + // Thread to debounce status updates -- waits a bit, then prints after + // any dirty state. + std::thread::spawn({ + let state = state.clone(); + move || loop { + // Wait to be notified of a display update, or timeout at 500ms. + // The timeout is for the case where there are lengthy build + // steps and the progress will show how long they've been + // running. + { + let (state, _) = dirty_cond + .wait_timeout_while( + state.lock().unwrap(), + Duration::from_millis(500), + |state| !state.dirty, + ) + .unwrap(); + if state.done { + break; + } + } + + // Delay a little bit in case more display updates come in. + std::thread::sleep(UPDATE_DELAY); + + // Update regardless of whether we timed out or not. + state.lock().unwrap().print_progress(); + } + }); + + FancyConsoleProgress { state } + } +} + +impl Progress for FancyConsoleProgress { + fn update(&mut self, counts: &StateCounts) { + self.state.lock().unwrap().update(counts); + } + + fn task_started(&mut self, id: BuildId, build: &Build) { + self.state.lock().unwrap().task_started(id, build); + } + + fn task_output(&mut self, id: BuildId, line: Vec<u8>) { + self.state.lock().unwrap().task_output(id, line); + } + + fn task_finished(&mut self, id: BuildId, build: &Build, result: &TaskResult) { + self.state.lock().unwrap().task_finished(id, build, result); + } + + fn log(&mut self, msg: &str) { + self.state.lock().unwrap().log(msg); + } +} + +impl Drop for FancyConsoleProgress { + fn drop(&mut self) { + self.state.lock().unwrap().cleanup(); + } +} + +struct FancyState { + done: bool, + dirty: bool, + dirty_cond: Arc<Condvar>, + + /// Counts of tasks in each state. TODO: pass this as function args? + counts: StateCounts, + /// Build tasks that are currently executing. + /// Pushed to as tasks are started, so it's always in order of age. + tasks: VecDeque<Task>, + /// Whether to print command lines of started programs. + verbose: bool, +} + +impl FancyState { + fn dirty(&mut self) { + self.dirty = true; + self.dirty_cond.notify_one(); + } + + fn update(&mut self, counts: &StateCounts) { + self.counts = counts.clone(); + self.dirty(); + } + + fn task_started(&mut self, id: BuildId, build: &Build) { + if self.verbose { + self.log(build.cmdline.as_ref().unwrap()); + } + let message = build_message(build); + self.tasks.push_back(Task { + id, + start: Instant::now(), + message: message.to_string(), + last_line: None, + }); + self.dirty(); + } + + fn task_output(&mut self, id: BuildId, line: Vec<u8>) { + let task = self.tasks.iter_mut().find(|t| t.id == id).unwrap(); + task.last_line = Some(String::from_utf8_lossy(&line).into_owned()); + self.dirty(); + } + + fn task_finished(&mut self, id: BuildId, build: &Build, result: &TaskResult) { + self.tasks + .remove(self.tasks.iter().position(|t| t.id == id).unwrap()); + match result.termination { + Termination::Success => { + if result.output.is_empty() { + // Common case: don't show anything. + } else { + self.log(build_message(build)) + } + } + Termination::Interrupted => self.log(&format!("interrupted: {}", build_message(build))), + Termination::Failure => self.log(&format!("failed: {}", build_message(build))), + }; + if !result.output.is_empty() { + std::io::stdout().write_all(&result.output).unwrap(); + } + self.dirty(); + } + + fn log(&mut self, msg: &str) { + self.clear_progress(); + println!("{}", msg); + self.dirty(); + } + + fn cleanup(&mut self) { + self.clear_progress(); + self.done = true; + self.dirty(); // let thread quit + } + + fn clear_progress(&self) { + // If the user hit ctl-c, it may have printed something on the line. + // So \r to go to first column first, then clear anything below. + std::io::stdout().write_all(b"\r\x1b[J").unwrap(); + } + + fn print_progress(&mut self) { + self.clear_progress(); + let failed = self.counts.get(BuildState::Failed); + let mut progress_line = format!( + "[{}] {}/{} done, ", + progress_bar(&self.counts, 40), + self.counts.get(BuildState::Done) + failed, + self.counts.total() + ); + if failed > 0 { + progress_line.push_str(&format!("{} failed, ", failed)); + } + progress_line.push_str(&format!( + "{}/{} running", + self.tasks.len(), + self.counts.get(BuildState::Queued) + + self.counts.get(BuildState::Running) + + self.counts.get(BuildState::Ready), + )); + println!("{}", progress_line); + let mut lines = 1; + + let max_cols = terminal::get_cols().unwrap_or(80); + let max_tasks = 8; + let now = Instant::now(); + for task in self.tasks.iter().take(max_tasks) { + let delta = now.duration_since(task.start).as_secs() as usize; + println!("{}", task_message(&task.message, delta, max_cols)); + lines += 1; + if let Some(line) = &task.last_line { + let max_len = max_cols - 2; + println!(" {}", truncate(line, max_len)); + lines += 1; + } + } + + if self.tasks.len() > max_tasks { + let remaining = self.tasks.len() - max_tasks; + println!("...and {} more", remaining); + lines += 1; + } + + // Move cursor up to the first printed line, for overprinting. + print!("\x1b[{}A", lines); + self.dirty = false; + } +} + +/// Format a task's status message to optionally include how long it has been running +/// and also to fit within a maximum number of terminal columns. +fn task_message(message: &str, seconds: usize, max_cols: usize) -> String { + let time_note = if seconds > 2 { + format!(" ({}s)", seconds) + } else { + "".into() + }; + let mut out = message.to_owned(); + if out.len() + time_note.len() >= max_cols { + out.truncate(max_cols - time_note.len() - 3); + out.push_str("..."); + } + out.push_str(&time_note); + out +} + +fn truncate(s: &str, mut max: usize) -> &str { + if max >= s.len() { + return s; + } + while !s.is_char_boundary(max) { + max -= 1; + } + &s[..max] +} + +/// Render a StateCounts as an ASCII progress bar. +fn progress_bar(counts: &StateCounts, bar_size: usize) -> String { + let mut bar = String::with_capacity(bar_size); + let mut sum: usize = 0; + let total = counts.total(); + if total == 0 { + return " ".repeat(bar_size); + } + for (count, ch) in [ + ( + counts.get(BuildState::Done) + counts.get(BuildState::Failed), + '=', + ), + ( + counts.get(BuildState::Queued) + + counts.get(BuildState::Running) + + counts.get(BuildState::Ready), + '-', + ), + (counts.get(BuildState::Want), ' '), + ] { + sum += count; + let mut target_size = sum * bar_size / total; + if count > 0 && target_size == bar.len() && target_size < bar_size { + // Special case: for non-zero count, ensure we always get at least + // one tick. + target_size += 1; + } + while bar.len() < target_size { + bar.push(ch); + } + } + bar +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn progress_bar_rendering() { + let mut counts = StateCounts::default(); + + // Don't crash if we show progress before having any tasks. + assert_eq!(progress_bar(&counts, 10), " "); + + counts.add(BuildState::Want, 100); + assert_eq!(progress_bar(&counts, 10), " "); + + // Half want -> ready. + counts.add(BuildState::Want, -50); + counts.add(BuildState::Ready, 50); + assert_eq!(progress_bar(&counts, 10), "----- "); + + // One ready -> done. + counts.add(BuildState::Ready, -1); + counts.add(BuildState::Done, 1); + assert_eq!(progress_bar(&counts, 10), "=---- "); + + // All but one want -> ready. + counts.add(BuildState::Want, -49); + counts.add(BuildState::Ready, 49); + assert_eq!(progress_bar(&counts, 10), "=-------- "); + + // All want -> ready. + counts.add(BuildState::Want, -1); + counts.add(BuildState::Ready, 1); + assert_eq!(progress_bar(&counts, 10), "=---------"); + } + + #[test] + fn task_rendering() { + assert_eq!(task_message("building foo.o", 0, 80), "building foo.o"); + assert_eq!(task_message("building foo.o", 0, 10), "buildin..."); + assert_eq!(task_message("building foo.o", 0, 5), "bu..."); + } + + #[test] + fn task_rendering_with_time() { + assert_eq!(task_message("building foo.o", 5, 80), "building foo.o (5s)"); + assert_eq!(task_message("building foo.o", 5, 10), "bu... (5s)"); + } + + #[test] + fn truncate_utf8() { + let text = "utf8 progress bar: ━━━━━━━━━━━━"; + for len in 10..text.len() { + // test passes if this doesn't panic + truncate(text, len); + } + } +} diff --git a/src/run.rs b/src/run.rs new file mode 100644 index 0000000..e84ec26 --- /dev/null +++ b/src/run.rs @@ -0,0 +1,241 @@ +use crate::{ + load, + progress::{DumbConsoleProgress, FancyConsoleProgress, Progress}, + terminal, trace, work, +}; +use anyhow::anyhow; +use std::path::Path; + +fn build( + options: work::Options, + build_filename: String, + targets: Vec<String>, + verbose: bool, +) -> anyhow::Result<Option<usize>> { + let (mut dumb_console, mut fancy_console); + let progress: &mut dyn Progress = if terminal::use_fancy() { + fancy_console = FancyConsoleProgress::new(verbose); + &mut fancy_console + } else { + dumb_console = DumbConsoleProgress::new(verbose); + &mut dumb_console + }; + + let mut state = trace::scope("load::read", || load::read(&build_filename))?; + let mut work = work::Work::new( + state.graph, + state.hashes, + state.db, + &options, + progress, + state.pools, + ); + + let mut tasks_finished = 0; + + // Attempt to rebuild build.ninja. + let build_file_target = work.lookup(&build_filename); + if let Some(target) = build_file_target { + work.want_file(target)?; + match trace::scope("work.run", || work.run())? { + None => return Ok(None), + Some(0) => { + // build.ninja already up to date. + // TODO: this logic is not right in the case where a build has + // a step that doesn't touch build.ninja. We should instead + // verify the specific FileId was updated. + } + Some(n) => { + // Regenerated build.ninja; start over. + tasks_finished = n; + state = trace::scope("load::read", || load::read(&build_filename))?; + work = work::Work::new( + state.graph, + state.hashes, + state.db, + &options, + progress, + state.pools, + ); + } + } + } + + if !targets.is_empty() { + for name in &targets { + let target = work + .lookup(name) + .ok_or_else(|| anyhow::anyhow!("unknown path requested: {:?}", name))?; + if Some(target) == build_file_target { + // Already built above. + continue; + } + work.want_file(target)?; + } + } else if !state.default.is_empty() { + for target in state.default { + work.want_file(target)?; + } + } else { + work.want_every_file(build_file_target)?; + } + + let tasks = trace::scope("work.run", || work.run())?; + // Include any tasks from initial build in final count of steps. + Ok(tasks.map(|n| n + tasks_finished)) +} + +fn default_parallelism() -> anyhow::Result<usize> { + // Ninja uses available processors + a constant, but I don't think the + // difference matters too much. + let par = std::thread::available_parallelism()?; + Ok(usize::from(par)) +} + +#[derive(argh::FromArgs)] // this struct generates the flags and --help output +/// n2, a ninja compatible build system +struct Args { + /// chdir before running + #[argh(option, short = 'C')] + chdir: Option<String>, + + /// input build file [default=build.ninja] + #[argh(option, short = 'f', default = "(\"build.ninja\".into())")] + build_file: String, + + /// debugging tools + #[argh(option, short = 'd')] + debug: Option<String>, + + /// subcommands + #[argh(option, short = 't')] + tool: Option<String>, + + /// parallelism [default uses system thread count] + #[argh(option, short = 'j')] // tododefault_parallelism()")] + parallelism: Option<usize>, + + /// keep going until at least N failures (0 means infinity) [default=1] + #[argh(option, short = 'k', default = "1")] + keep_going: usize, + + /// print version (required by cmake) + #[argh(switch, hidden_help)] + version: bool, + + /// compdb flag (required by meson) + #[allow(dead_code)] + #[argh(switch, short = 'x', hidden_help)] + expand_rspfile: bool, + + /// print executed command lines + #[argh(switch, short = 'v')] + verbose: bool, + + /// targets to build + #[argh(positional)] + targets: Vec<String>, +} + +fn run_impl() -> anyhow::Result<i32> { + let mut fake_ninja_compat = Path::new(&std::env::args().next().unwrap()) + .file_name() + .unwrap() + == std::ffi::OsStr::new(&format!("ninja{}", std::env::consts::EXE_SUFFIX)); + + let args: Args = argh::from_env(); + + let mut options = work::Options { + parallelism: match args.parallelism { + Some(p) => p, + None => default_parallelism()?, + }, + failures_left: Some(args.keep_going).filter(|&n| n > 0), + explain: false, + adopt: false, + }; + + if let Some(dir) = args.chdir { + let dir = Path::new(&dir); + std::env::set_current_dir(dir).map_err(|err| anyhow!("chdir {:?}: {}", dir, err))?; + } + + if let Some(debug) = args.debug { + match debug.as_str() { + "ninja_compat" => fake_ninja_compat = true, + "explain" => options.explain = true, + "list" => { + println!("debug tools:"); + println!(" explain print why each target is considered out of date"); + println!(" trace generate json performance trace"); + return Ok(1); + } + "trace" => trace::open("trace.json")?, + _ => anyhow::bail!("unknown -d {:?}, use -d list to list", debug), + } + } + + if args.version { + if fake_ninja_compat { + // CMake requires a particular Ninja version. + println!("1.10.2"); + return Ok(0); + } else { + println!("{}", env!("CARGO_PKG_VERSION")); + } + return Ok(0); + } + + if let Some(tool) = args.tool { + match tool.as_str() { + "list" => { + println!("subcommands:"); + println!(" (none yet, but see README if you're looking here trying to get CMake to work)"); + return Ok(1); + } + "compdb" if fake_ninja_compat => { + // meson wants to invoke this tool. + return Ok(0); // do nothing; TODO + } + "recompact" if fake_ninja_compat => { + // CMake unconditionally invokes this tool, yuck. + return Ok(0); // do nothing + } + "restat" if fake_ninja_compat => { + // CMake invokes this after generating build files; mark build + // targets as up to date by running the build with "adopt" flag + // on. + options.adopt = true; + } + _ => { + anyhow::bail!("unknown -t {:?}, use -t list to list", tool); + } + } + } + + match build(options, args.build_file, args.targets, args.verbose)? { + None => { + // Don't print any summary, the failing task is enough info. + return Ok(1); + } + Some(0) => { + // Special case: don't print numbers when no work done. + println!("n2: no work to do"); + } + Some(n) => { + println!( + "n2: ran {} task{}, now up to date", + n, + if n == 1 { "" } else { "s" } + ); + } + } + + Ok(0) +} + +pub fn run() -> anyhow::Result<i32> { + let res = run_impl(); + trace::close(); + res +} diff --git a/src/scanner.rs b/src/scanner.rs new file mode 100644 index 0000000..091791d --- /dev/null +++ b/src/scanner.rs @@ -0,0 +1,152 @@ +//! Scans an input string (source file) character by character. + +use std::{io::Read, path::Path}; + +#[derive(Debug)] +pub struct ParseError { + msg: String, + ofs: usize, +} +pub type ParseResult<T> = Result<T, ParseError>; + +pub struct Scanner<'a> { + buf: &'a [u8], + pub ofs: usize, + pub line: usize, +} + +impl<'a> Scanner<'a> { + pub fn new(buf: &'a [u8]) -> Self { + if !buf.ends_with(b"\0") { + panic!("Scanner requires nul-terminated buf"); + } + Scanner { + buf, + ofs: 0, + line: 1, + } + } + + pub fn slice(&self, start: usize, end: usize) -> &'a str { + unsafe { std::str::from_utf8_unchecked(self.buf.get_unchecked(start..end)) } + } + pub fn peek(&self) -> char { + unsafe { *self.buf.get_unchecked(self.ofs) as char } + } + pub fn peek_newline(&self) -> bool { + if self.peek() == '\n' { + return true; + } + if self.ofs >= self.buf.len() - 1 { + return false; + } + let peek2 = unsafe { *self.buf.get_unchecked(self.ofs + 1) as char }; + self.peek() == '\r' && peek2 == '\n' + } + pub fn next(&mut self) { + if self.peek() == '\n' { + self.line += 1; + } + if self.ofs == self.buf.len() { + panic!("scanned past end") + } + self.ofs += 1; + } + pub fn back(&mut self) { + if self.ofs == 0 { + panic!("back at start") + } + self.ofs -= 1; + if self.peek() == '\n' { + self.line -= 1; + } + } + pub fn read(&mut self) -> char { + let c = self.peek(); + self.next(); + c + } + pub fn skip(&mut self, ch: char) -> bool { + if self.peek() == ch { + self.next(); + return true; + } + false + } + + pub fn skip_spaces(&mut self) { + while self.skip(' ') {} + } + + pub fn expect(&mut self, ch: char) -> ParseResult<()> { + let r = self.read(); + if r != ch { + self.back(); + return self.parse_error(format!("expected {:?}, got {:?}", ch, r)); + } + Ok(()) + } + + pub fn parse_error<T, S: Into<String>>(&self, msg: S) -> ParseResult<T> { + Err(ParseError { + msg: msg.into(), + ofs: self.ofs, + }) + } + + pub fn format_parse_error(&self, filename: &Path, err: ParseError) -> String { + let mut ofs = 0; + let lines = self.buf.split(|&c| c == b'\n'); + for (line_number, line) in lines.enumerate() { + if ofs + line.len() >= err.ofs { + let mut msg = "parse error: ".to_string(); + msg.push_str(&err.msg); + msg.push('\n'); + + let prefix = format!("{}:{}: ", filename.display(), line_number + 1); + msg.push_str(&prefix); + + let mut context = unsafe { std::str::from_utf8_unchecked(line) }; + let mut col = err.ofs - ofs; + if col > 40 { + // Trim beginning of line to fit it on screen. + msg.push_str("..."); + context = &context[col - 20..]; + col = 3 + 20; + } + if context.len() > 40 { + context = &context[0..40]; + msg.push_str(context); + msg.push_str("..."); + } else { + msg.push_str(context); + } + msg.push('\n'); + + msg.push_str(&" ".repeat(prefix.len() + col)); + msg.push_str("^\n"); + return msg; + } + ofs += line.len() + 1; + } + panic!("invalid offset when formatting error") + } +} + +/// Scanner wants its input buffer to end in a trailing nul. +/// This function is like std::fs::read() but appends a nul, efficiently. +pub fn read_file_with_nul(path: &Path) -> std::io::Result<Vec<u8>> { + // Using std::fs::read() to read the file and then pushing a nul on the end + // causes us to allocate a buffer the size of the file, then grow it to push + // the nul, copying the entire file(!). So instead create a buffer of the + // right size up front. + let mut file = std::fs::File::open(path)?; + let size = file.metadata()?.len() as usize; + let mut bytes = Vec::with_capacity(size + 1); + unsafe { + bytes.set_len(size); + } + file.read_exact(&mut bytes[..size])?; + bytes.push(0); + Ok(bytes) +} diff --git a/src/signal.rs b/src/signal.rs new file mode 100644 index 0000000..1e6f5f7 --- /dev/null +++ b/src/signal.rs @@ -0,0 +1,32 @@ +//! Unix signal handling (SIGINT). +//! +//! We let the first SIGINT reach child processes, which ought to build-fail +//! and let the parent properly print that progress. This also lets us still +//! write out pending debug traces, too. + +use std::sync::atomic::AtomicBool; + +static mut INTERRUPTED: AtomicBool = AtomicBool::new(false); + +#[cfg(unix)] +extern "C" fn sigint_handler(_sig: libc::c_int) { + unsafe { + INTERRUPTED.store(true, std::sync::atomic::Ordering::Relaxed); + } + // SA_RESETHAND should clear the handler. +} + +#[cfg(unix)] +pub fn register_sigint() { + // Safety: registering a signal handler is libc unsafe code. + unsafe { + let mut sa: libc::sigaction = std::mem::zeroed(); + sa.sa_sigaction = sigint_handler as libc::sighandler_t; + sa.sa_flags = libc::SA_RESETHAND; + libc::sigaction(libc::SIGINT, &sa, std::ptr::null_mut()); + } +} + +pub fn was_interrupted() -> bool { + unsafe { INTERRUPTED.load(std::sync::atomic::Ordering::Relaxed) } +} diff --git a/src/smallmap.rs b/src/smallmap.rs new file mode 100644 index 0000000..da8298c --- /dev/null +++ b/src/smallmap.rs @@ -0,0 +1,80 @@ +//! A map-like object for maps with few entries. +//! TODO: this may not be needed at all, but the code used this pattern in a +//! few places so I figured I may as well name it. + +use std::{borrow::Borrow, fmt::Debug}; + +/// A map-like object implemented as a list of pairs, for cases where the +/// number of entries in the map is small. +pub struct SmallMap<K, V>(Vec<(K, V)>); + +impl<K, V> Default for SmallMap<K, V> { + fn default() -> Self { + SmallMap(Vec::default()) + } +} + +impl<K: PartialEq, V> SmallMap<K, V> { + pub fn insert(&mut self, k: K, v: V) { + for (ik, iv) in self.0.iter_mut() { + if *ik == k { + *iv = v; + return; + } + } + self.0.push((k, v)); + } + + pub fn get<Q>(&self, q: &Q) -> Option<&V> + where + K: Borrow<Q>, + Q: PartialEq + ?Sized, + { + for (k, v) in self.0.iter() { + if k.borrow() == q { + return Some(v); + } + } + None + } + + pub fn iter(&self) -> std::slice::Iter<(K, V)> { + self.0.iter() + } + + pub fn iter_mut(&mut self) -> std::slice::IterMut<(K, V)> { + self.0.iter_mut() + } + + pub fn into_iter(self) -> std::vec::IntoIter<(K, V)> { + self.0.into_iter() + } + + pub fn values(&self) -> impl Iterator<Item = &V> + '_ { + self.0.iter().map(|x| &x.1) + } +} + +impl<K: PartialEq, V, const N: usize> std::convert::From<[(K, V); N]> for SmallMap<K, V> { + fn from(value: [(K, V); N]) -> Self { + let mut result = SmallMap::default(); + for (k, v) in value { + result.insert(k, v); + } + result + } +} + +impl<K: Debug, V: Debug> Debug for SmallMap<K, V> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +// Only for tests because it is order-sensitive +#[cfg(test)] +impl<K: PartialEq, V: PartialEq> PartialEq for SmallMap<K, V> { + fn eq(&self, other: &Self) -> bool { + return self.0 == other.0; + } +} diff --git a/src/task.rs b/src/task.rs new file mode 100644 index 0000000..dfb818b --- /dev/null +++ b/src/task.rs @@ -0,0 +1,311 @@ +//! Runs build tasks, potentially in parallel. +//! Unaware of the build graph, pools, etc.; just command execution. +//! +//! We use one thread per subprocess. This differs from Ninja which goes to +//! some effort to use ppoll-like behavior. Because the threads are mostly +//! blocked in IO I don't expect this to be too costly in terms of CPU, but it's +//! worth considering how much RAM it costs. On the positive side, the logic +//! is significantly simpler than Ninja and we get free behaviors like parallel +//! parsing of depfiles. + +use crate::{ + depfile, + graph::{Build, BuildId, RspFile}, + process, + scanner::{self, Scanner}, +}; +use anyhow::{anyhow, bail}; +use std::path::{Path, PathBuf}; +use std::sync::mpsc; +use std::time::Instant; + +pub struct FinishedTask { + /// A (faked) "thread id", used to put different finished builds in different + /// tracks in a performance trace. + pub tid: usize, + pub buildid: BuildId, + pub span: (Instant, Instant), + pub result: TaskResult, +} + +/// The result of running a build step. +pub struct TaskResult { + pub termination: process::Termination, + /// Console output. + pub output: Vec<u8>, + pub discovered_deps: Option<Vec<String>>, +} + +/// Reads dependencies from a .d file path. +fn read_depfile(path: &Path) -> anyhow::Result<Vec<String>> { + let bytes = match scanner::read_file_with_nul(path) { + Ok(b) => b, + // See discussion of missing depfiles in #80. + // TODO(#99): warn or error in this circumstance? + Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(Vec::new()), + Err(e) => bail!("read {}: {}", path.display(), e), + }; + + let mut scanner = Scanner::new(&bytes); + let parsed_deps = depfile::parse(&mut scanner) + .map_err(|err| anyhow!(scanner.format_parse_error(path, err)))?; + // TODO verify deps refers to correct output + let deps: Vec<String> = parsed_deps + .values() + .flat_map(|x| x.iter()) + .map(|&dep| dep.to_owned()) + .collect(); + Ok(deps) +} + +fn write_rspfile(rspfile: &RspFile) -> anyhow::Result<()> { + if let Some(parent) = rspfile.path.parent() { + std::fs::create_dir_all(parent)?; + } + std::fs::write(&rspfile.path, &rspfile.content)?; + Ok(()) +} + +/// Parse some subcommand output to extract "Note: including file:" lines as +/// emitted by MSVC/clang-cl. +fn extract_showincludes(output: Vec<u8>) -> (Vec<String>, Vec<u8>) { + let mut filtered_output = Vec::new(); + let mut includes = Vec::new(); + for line in output.split(|&c| c == b'\n') { + if let Some(include) = line.strip_prefix(b"Note: including file: ") { + let start = include.iter().position(|&c| c != b' ').unwrap_or(0); + let end = if include.ends_with(&[b'\r']) { + include.len() - 1 + } else { + include.len() + }; + let include = &include[start..end]; + includes.push(unsafe { String::from_utf8_unchecked(include.to_vec()) }); + } else { + if !filtered_output.is_empty() { + filtered_output.push(b'\n'); + } + filtered_output.extend_from_slice(line); + } + } + (includes, filtered_output) +} + +/// Find the span of the last line of text in buf, ignoring trailing empty +/// lines. +fn find_last_line(buf: &[u8]) -> &[u8] { + fn is_nl(c: u8) -> bool { + c == b'\r' || c == b'\n' + } + + let end = match buf.iter().rposition(|&c| !is_nl(c)) { + Some(pos) => pos + 1, + None => buf.len(), + }; + let start = match buf[..end].iter().rposition(|&c| is_nl(c)) { + Some(pos) => pos + 1, + None => 0, + }; + &buf[start..end] +} + +/// Executes a build task as a subprocess. +/// Returns an Err() if we failed outside of the process itself. +/// This is run as a separate thread from the main n2 process and will block +/// on the subprocess, so any additional per-subprocess work we can do belongs +/// here. +fn run_task( + cmdline: &str, + depfile: Option<&Path>, + parse_showincludes: bool, + rspfile: Option<&RspFile>, + mut last_line_cb: impl FnMut(&[u8]), +) -> anyhow::Result<TaskResult> { + if let Some(rspfile) = rspfile { + write_rspfile(rspfile)?; + } + + let mut output = Vec::new(); + let termination = process::run_command(cmdline, |buf| { + output.extend_from_slice(buf); + last_line_cb(find_last_line(&output)); + })?; + + let mut discovered_deps = None; + if parse_showincludes { + // Remove /showIncludes lines from output, regardless of success/fail. + let (includes, filtered) = extract_showincludes(output); + output = filtered; + discovered_deps = Some(includes); + } + if termination == process::Termination::Success { + if let Some(depfile) = depfile { + discovered_deps = Some(read_depfile(depfile)?); + } + } + Ok(TaskResult { + termination, + output, + discovered_deps, + }) +} + +/// Tracks faked "thread ids" -- integers assigned to build tasks to track +/// parallelism in perf trace output. +#[derive(Default)] +struct ThreadIds { + /// An entry is true when claimed, false or nonexistent otherwise. + slots: Vec<bool>, +} +impl ThreadIds { + fn claim(&mut self) -> usize { + match self.slots.iter().position(|&used| !used) { + Some(idx) => { + self.slots[idx] = true; + idx + } + None => { + let idx = self.slots.len(); + self.slots.push(true); + idx + } + } + } + + fn release(&mut self, slot: usize) { + self.slots[slot] = false; + } +} + +enum Message { + Output((BuildId, Vec<u8>)), + Done(FinishedTask), +} + +pub struct Runner { + tx: mpsc::Sender<Message>, + rx: mpsc::Receiver<Message>, + pub running: usize, + tids: ThreadIds, + parallelism: usize, +} + +impl Runner { + pub fn new(parallelism: usize) -> Self { + let (tx, rx) = mpsc::channel(); + Runner { + tx, + rx, + running: 0, + tids: ThreadIds::default(), + parallelism, + } + } + + pub fn can_start_more(&self) -> bool { + self.running < self.parallelism + } + + pub fn is_running(&self) -> bool { + self.running > 0 + } + + pub fn start(&mut self, id: BuildId, build: &Build) { + let cmdline = build.cmdline.clone().unwrap(); + let depfile = build.depfile.clone().map(PathBuf::from); + let rspfile = build.rspfile.clone(); + let parse_showincludes = build.parse_showincludes; + + let tid = self.tids.claim(); + let tx = self.tx.clone(); + std::thread::spawn(move || { + let start = Instant::now(); + let result = run_task( + &cmdline, + depfile.as_deref(), + parse_showincludes, + rspfile.as_ref(), + |line| { + let _ = tx.send(Message::Output((id, line.to_owned()))); + }, + ) + .unwrap_or_else(|err| TaskResult { + termination: process::Termination::Failure, + output: format!("{}\n", err).into_bytes(), + discovered_deps: None, + }); + let finish = Instant::now(); + + let task = FinishedTask { + tid, + buildid: id, + span: (start, finish), + result, + }; + // The send will only fail if the receiver disappeared, e.g. due to shutting down. + let _ = tx.send(Message::Done(task)); + }); + self.running += 1; + } + + /// Wait for a build to complete. May block for a long time. + pub fn wait(&mut self, mut output: impl FnMut(BuildId, Vec<u8>)) -> FinishedTask { + loop { + match self.rx.recv().unwrap() { + Message::Output((bid, line)) => output(bid, line), + Message::Done(task) => { + self.tids.release(task.tid); + self.running -= 1; + return task; + } + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn show_includes() { + let (includes, output) = extract_showincludes( + b"some text +Note: including file: a +other text +Note: including file: b\r +more text +" + .to_vec(), + ); + assert_eq!(includes, &["a", "b"]); + assert_eq!( + output, + b"some text +other text +more text +" + ); + } + + #[test] + fn find_last() { + assert_eq!(find_last_line(b""), b""); + assert_eq!(find_last_line(b"\n"), b""); + + assert_eq!(find_last_line(b"hello"), b"hello"); + assert_eq!(find_last_line(b"hello\n"), b"hello"); + + assert_eq!(find_last_line(b"hello\nt"), b"t"); + assert_eq!(find_last_line(b"hello\nt\n"), b"t"); + + assert_eq!(find_last_line(b"hello\n\n"), b"hello"); + assert_eq!(find_last_line(b"hello\nt\n\n"), b"t"); + } + + #[test] + fn missing_depfile_allowed() { + let deps = read_depfile(Path::new("/missing/dep/file")).unwrap(); + assert_eq!(deps.len(), 0); + } +} diff --git a/src/terminal.rs b/src/terminal.rs new file mode 100644 index 0000000..3c29327 --- /dev/null +++ b/src/terminal.rs @@ -0,0 +1,80 @@ +#[cfg(unix)] +mod unix { + pub fn use_fancy() -> bool { + unsafe { + libc::isatty(/* stdout */ 1) == 1 + } + } + + pub fn get_cols() -> Option<usize> { + unsafe { + let mut winsize = std::mem::zeroed::<libc::winsize>(); + if libc::ioctl(0, libc::TIOCGWINSZ, &mut winsize) < 0 { + return None; + } + if winsize.ws_col < 10 { + // https://github.com/evmar/n2/issues/63: ignore too-narrow widths + return None; + } + Some(winsize.ws_col as usize) + } + } +} + +#[cfg(unix)] +pub use unix::*; + +#[cfg(windows)] +mod windows { + use windows_sys::Win32::{Foundation::*, System::Console::*}; + + pub fn use_fancy() -> bool { + unsafe { + let handle = GetStdHandle(STD_OUTPUT_HANDLE); + let mut mode = 0; + // Note: GetConsoleMode itself fails when not attached to a console. + let ok = GetConsoleMode(handle, &mut mode) != 0; + if ok { + // Enable terminal processing so we can overwrite previous content. + // Ignore errors. + _ = SetConsoleMode(handle, mode | ENABLE_VIRTUAL_TERMINAL_PROCESSING); + } + ok + } + } + + pub fn get_cols() -> Option<usize> { + unsafe { + let console = GetStdHandle(STD_OUTPUT_HANDLE); + if console == INVALID_HANDLE_VALUE { + return None; + } + let mut csbi = ::std::mem::zeroed::<CONSOLE_SCREEN_BUFFER_INFO>(); + if GetConsoleScreenBufferInfo(console, &mut csbi) == 0 { + return None; + } + if csbi.dwSize.X < 10 { + // https://github.com/evmar/n2/issues/63: ignore too-narrow widths + return None; + } + Some(csbi.dwSize.X as usize) + } + } +} + +#[cfg(windows)] +pub use windows::*; + +#[cfg(target_arch = "wasm32")] +mod wasm { + pub fn use_fancy() -> bool { + false + } + + pub fn get_cols() -> Option<usize> { + None + } +} + +#[cfg(target_arch = "wasm32")] +pub use wasm::*; diff --git a/src/trace.rs b/src/trace.rs new file mode 100644 index 0000000..c862bfc --- /dev/null +++ b/src/trace.rs @@ -0,0 +1,124 @@ +//! Chrome trace output. + +use std::fs::File; +use std::io::{BufWriter, Write}; +use std::time::Instant; + +static mut TRACE: Option<Trace> = None; + +pub struct Trace { + start: Instant, + w: BufWriter<File>, + count: usize, +} + +impl Trace { + fn new(path: &str) -> std::io::Result<Self> { + let mut w = BufWriter::new(File::create(path)?); + writeln!(w, "[")?; + Ok(Trace { + start: Instant::now(), + w, + count: 0, + }) + } + + fn write_event_prefix(&mut self, name: &str, ts: Instant) { + if self.count > 0 { + write!(self.w, ",").unwrap(); + } + self.count += 1; + write!( + self.w, + "{{\"pid\":0, \"name\":{:?}, \"ts\":{}, ", + name, + ts.duration_since(self.start).as_micros(), + ) + .unwrap(); + } + + pub fn write_complete(&mut self, name: &str, tid: usize, start: Instant, end: Instant) { + self.write_event_prefix(name, start); + writeln!( + self.w, + "\"tid\": {}, \"ph\":\"X\", \"dur\":{}}}", + tid, + end.duration_since(start).as_micros() + ) + .unwrap(); + } + + fn scope<T>(&mut self, name: &str, f: impl FnOnce() -> T) -> T { + let start = Instant::now(); + let result = f(); + let end = Instant::now(); + self.write_complete(name, 0, start, end); + result + } + + /* + These functions were useful when developing, but are currently unused. + + pub fn write_instant(&mut self, name: &str) { + self.write_event_prefix(name, Instant::now()); + writeln!(self.w, "\"ph\":\"i\"}}").unwrap(); + } + + pub fn write_counts<'a>( + &mut self, + name: &str, + counts: impl Iterator<Item = &'a (&'a str, usize)>, + ) { + self.write_event_prefix(name, Instant::now()); + write!(self.w, "\"ph\":\"C\", \"args\":{{").unwrap(); + for (i, (name, count)) in counts.enumerate() { + if i > 0 { + write!(self.w, ",").unwrap(); + } + write!(self.w, "\"{}\":{}", name, count).unwrap(); + } + writeln!(self.w, "}}}}").unwrap(); + } + */ + + fn close(&mut self) { + self.write_complete("main", 0, self.start, Instant::now()); + writeln!(self.w, "]").unwrap(); + self.w.flush().unwrap(); + } +} + +pub fn open(path: &str) -> std::io::Result<()> { + let trace = Trace::new(path)?; + // Safety: accessing global mut, not threadsafe. + unsafe { + TRACE = Some(trace); + } + Ok(()) +} + +#[inline] +pub fn if_enabled(f: impl FnOnce(&mut Trace)) { + // Safety: accessing global mut, not threadsafe. + unsafe { + match &mut TRACE { + None => {} + Some(t) => f(t), + } + } +} + +#[inline] +pub fn scope<T>(name: &'static str, f: impl FnOnce() -> T) -> T { + // Safety: accessing global mut, not threadsafe. + unsafe { + match &mut TRACE { + None => f(), + Some(t) => t.scope(name, f), + } + } +} + +pub fn close() { + if_enabled(|t| t.close()); +} diff --git a/src/work.rs b/src/work.rs new file mode 100644 index 0000000..6366c72 --- /dev/null +++ b/src/work.rs @@ -0,0 +1,800 @@ +//! Build runner, choosing and executing tasks as determined by out of date inputs. + +use crate::{ + canon::canon_path, db, densemap::DenseMap, graph::*, hash, process, progress, + progress::Progress, signal, smallmap::SmallMap, task, trace, +}; +use std::collections::HashSet; +use std::collections::VecDeque; + +/// Build steps go through this sequence of states. +/// See "Build states" in the design notes. +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum BuildState { + /// Default initial state, for Builds unneeded by the current build. + Unknown, + /// Builds we want to ensure are up to date, but which aren't ready yet. + Want, + /// Builds whose dependencies are up to date and are ready to be + /// checked. This is purely a function of whether all builds before + /// it have have run, and is independent of any file state. + /// + /// Preconditions: + /// - generated inputs: have already been stat()ed as part of completing + /// the step that generated those inputs + /// - non-generated inputs: may not have yet stat()ed, so doing those + /// stat()s is part of the work of running these builds + Ready, + /// Builds who have been determined not up to date and which are ready + /// to be executed. + Queued, + /// Currently executing. + Running, + /// Finished executing successfully. + Done, + /// Finished executing but failed. + Failed, +} + +/// Counters that track builds in each state, excluding phony builds. +/// This is only for display to the user and should not be used as a source of +/// truth for tracking progress. +/// Only covers builds not in the "unknown" state, which means it's only builds +/// that are considered part of the current build. +#[derive(Clone, Debug, Default)] +pub struct StateCounts([usize; 6]); +impl StateCounts { + fn idx(state: BuildState) -> usize { + match state { + BuildState::Unknown => panic!("unexpected state"), + BuildState::Want => 0, + BuildState::Ready => 1, + BuildState::Queued => 2, + BuildState::Running => 3, + BuildState::Done => 4, + BuildState::Failed => 5, + } + } + pub fn add(&mut self, state: BuildState, delta: isize) { + self.0[StateCounts::idx(state)] = + (self.0[StateCounts::idx(state)] as isize + delta) as usize; + } + pub fn get(&self, state: BuildState) -> usize { + self.0[StateCounts::idx(state)] + } + pub fn total(&self) -> usize { + self.0[0] + self.0[1] + self.0[2] + self.0[3] + self.0[4] + self.0[5] + } +} + +/// Pools gather collections of running builds. +/// Each running build is running "in" a pool; there's a default unbounded +/// pool for builds that don't specify one. +/// See "Tracking build state" in the design notes. +struct PoolState { + /// A queue of builds that are ready to be executed in this pool. + queued: VecDeque<BuildId>, + /// The number of builds currently running in this pool. + running: usize, + /// The total depth of the pool. 0 means unbounded. + depth: usize, +} + +impl PoolState { + fn new(depth: usize) -> Self { + PoolState { + queued: VecDeque::new(), + running: 0, + depth, + } + } +} + +/// BuildStates tracks progress of each Build step through the build. +/// See "Tracking build state" in the design notes. +struct BuildStates { + states: DenseMap<BuildId, BuildState>, + + /// Counts of builds in each state. + counts: StateCounts, + + /// Total number of builds that haven't been driven to completion + /// (done or failed). + total_pending: usize, + + /// Builds in the ready state, stored redundantly for quick access. + ready: VecDeque<BuildId>, + + /// Named pools of queued and running builds. + /// Builds otherwise default to using an unnamed infinite pool. + pools: SmallMap<String, PoolState>, +} + +impl BuildStates { + fn new(size: BuildId, depths: SmallMap<String, usize>) -> Self { + let mut pools = SmallMap::default(); + // The implied default pool. + pools.insert(String::from(""), PoolState::new(0)); + // TODO: the console pool is just a depth-1 pool for now. + pools.insert(String::from("console"), PoolState::new(1)); + for (name, depth) in depths.into_iter() { + pools.insert(name, PoolState::new(depth)); + } + BuildStates { + states: DenseMap::new_sized(size, BuildState::Unknown), + counts: StateCounts::default(), + total_pending: 0, + ready: VecDeque::new(), + pools, + } + } + + fn get(&self, id: BuildId) -> BuildState { + self.states[id] + } + + fn set(&mut self, id: BuildId, build: &Build, state: BuildState) { + // This function is called on all state transitions. + // We get 'prev', the previous state, and 'state', the new state. + let prev = std::mem::replace(&mut self.states[id], state); + + // We skip user-facing counters for phony builds. + let skip_ui_count = build.cmdline.is_none(); + + // println!("{:?} {:?}=>{:?} {:?}", id, prev, state, self.counts); + if prev == BuildState::Unknown { + self.total_pending += 1; + } else { + if prev == BuildState::Running { + self.get_pool(build).unwrap().running -= 1; + } + if !skip_ui_count { + self.counts.add(prev, -1); + } + } + + match state { + BuildState::Ready => { + self.ready.push_back(id); + } + BuildState::Running => { + // Trace instants render poorly in the old Chrome UI, and + // not at all in speedscope or Perfetto. + // if self.counts.get(BuildState::Running) == 0 { + // trace::if_enabled(|t| t.write_instant("first build")); + // } + self.get_pool(build).unwrap().running += 1; + } + BuildState::Done | BuildState::Failed => { + self.total_pending -= 1; + } + _ => {} + }; + if !skip_ui_count { + self.counts.add(state, 1); + } + + /* + This is too expensive to log on every individual state change... + trace::if_enabled(|t| { + t.write_counts( + "builds", + [ + ("want", self.counts.get(BuildState::Want)), + ("ready", self.counts.get(BuildState::Ready)), + ("queued", self.counts.get(BuildState::Queued)), + ("running", self.counts.get(BuildState::Running)), + ("done", self.counts.get(BuildState::Done)), + ] + .iter(), + ) + });*/ + } + + fn unfinished(&self) -> bool { + self.total_pending > 0 + } + + /// Visits a BuildId that is an input to the desired output. + /// Will recursively visit its own inputs. + fn want_build( + &mut self, + graph: &Graph, + stack: &mut Vec<FileId>, + id: BuildId, + ) -> anyhow::Result<()> { + if self.get(id) != BuildState::Unknown { + return Ok(()); // Already visited. + } + + let build = &graph.builds[id]; + self.set(id, build, BuildState::Want); + + // Any Build that doesn't depend on an output of another Build is ready. + let mut ready = true; + for &id in build.ordering_ins() { + self.want_file(graph, stack, id)?; + ready = ready && graph.file(id).input.is_none(); + } + for &id in build.validation_ins() { + // This build doesn't technically depend on the validation inputs, so + // allocate a new stack. Validation inputs could in theory depend on this build's + // outputs. + let mut stack = Vec::new(); + self.want_file(graph, &mut stack, id)?; + } + + if ready { + self.set(id, build, BuildState::Ready); + } + Ok(()) + } + + /// Visits a FileId that is an input to the desired output. + /// Will recursively visit its own inputs. + pub fn want_file( + &mut self, + graph: &Graph, + stack: &mut Vec<FileId>, + id: FileId, + ) -> anyhow::Result<()> { + // Check for a dependency cycle. + if let Some(cycle) = stack.iter().position(|&sid| sid == id) { + let mut err = "dependency cycle: ".to_string(); + for &id in stack[cycle..].iter() { + err.push_str(&format!("{} -> ", graph.file(id).name)); + } + err.push_str(&graph.file(id).name); + anyhow::bail!(err); + } + + if let Some(bid) = graph.file(id).input { + stack.push(id); + self.want_build(graph, stack, bid)?; + stack.pop(); + } + Ok(()) + } + + pub fn pop_ready(&mut self) -> Option<BuildId> { + // Here is where we might consider prioritizing from among the available + // ready set. + self.ready.pop_front() + } + + /// Look up a PoolState by name. + fn get_pool(&mut self, build: &Build) -> Option<&mut PoolState> { + let name = build.pool.as_deref().unwrap_or(""); + for (key, pool) in self.pools.iter_mut() { + if key == name { + return Some(pool); + } + } + None + } + + /// Mark a build as ready to run. + /// May fail if the build references an unknown pool. + pub fn enqueue(&mut self, id: BuildId, build: &Build) -> anyhow::Result<()> { + self.set(id, build, BuildState::Queued); + let pool = self.get_pool(build).ok_or_else(|| { + anyhow::anyhow!( + "{}: unknown pool {:?}", + build.location, + // Unnamed pool lookups always succeed, this error is about + // named pools. + build.pool.as_ref().unwrap() + ) + })?; + pool.queued.push_back(id); + Ok(()) + } + + /// Pop a ready to run queued build. + pub fn pop_queued(&mut self) -> Option<BuildId> { + for (_, pool) in self.pools.iter_mut() { + if pool.depth == 0 || pool.running < pool.depth { + if let Some(id) = pool.queued.pop_front() { + return Some(id); + } + } + } + None + } +} + +#[derive(Clone)] +pub struct Options { + pub failures_left: Option<usize>, + pub parallelism: usize, + /// When true, verbosely explain why targets are considered dirty. + pub explain: bool, + /// When true, just mark targets up to date without running anything. + pub adopt: bool, +} + +pub struct Work<'a> { + graph: Graph, + db: db::Writer, + pub progress: &'a mut dyn Progress, + options: Options, + file_state: FileState, + last_hashes: Hashes, + build_states: BuildStates, +} + +impl<'a> Work<'a> { + pub fn new( + graph: Graph, + last_hashes: Hashes, + db: db::Writer, + options: &Options, + progress: &'a mut dyn Progress, + pools: SmallMap<String, usize>, + ) -> Self { + let file_state = FileState::new(&graph); + let build_count = graph.builds.next_id(); + Work { + graph, + db, + progress, + options: options.clone(), + file_state, + last_hashes, + build_states: BuildStates::new(build_count, pools), + } + } + + pub fn lookup(&mut self, name: &str) -> Option<FileId> { + self.graph.files.lookup(&canon_path(name)) + } + + pub fn want_file(&mut self, id: FileId) -> anyhow::Result<()> { + let mut stack = Vec::new(); + self.build_states.want_file(&self.graph, &mut stack, id) + } + + pub fn want_every_file(&mut self, exclude: Option<FileId>) -> anyhow::Result<()> { + for id in self.graph.files.all_ids() { + if let Some(exclude) = exclude { + if id == exclude { + continue; + } + } + self.want_file(id)?; + } + Ok(()) + } + + /// Check whether a given build is ready, generally after one of its inputs + /// has been updated. + fn recheck_ready(&self, id: BuildId) -> bool { + let build = &self.graph.builds[id]; + // println!("recheck {:?} {} ({}...)", id, build.location, self.graph.file(build.outs()[0]).name); + for &id in build.ordering_ins() { + let file = self.graph.file(id); + match file.input { + None => { + // Only generated inputs contribute to readiness. + continue; + } + Some(id) => { + if self.build_states.get(id) != BuildState::Done { + // println!(" {:?} {} not done, it's {:?}", id, file.name, self.build_states.get(id)); + return false; + } + } + } + } + // println!("{:?} now ready", id); + true + } + + /// Return the id of any input file to a ready build step that is missing. + /// Assumes the input dependencies have already executed, but otherwise + /// may stat the file on disk. + fn ensure_input_files( + &mut self, + id: BuildId, + discovered: bool, + ) -> anyhow::Result<Option<FileId>> { + let build = &self.graph.builds[id]; + let ids = if discovered { + build.discovered_ins() + } else { + build.dirtying_ins() + }; + for &id in ids { + let mtime = match self.file_state.get(id) { + Some(mtime) => mtime, + None => { + let file = self.graph.file(id); + if file.input.is_some() { + // This dep is generated by some other build step, but the + // build graph didn't cause that other build step to be + // visited first. This is an error in the build file. + // For example, imagine: + // build generated.h: codegen_headers ... + // build generated.stamp: stamp || generated.h + // build foo.o: cc ... + // If we deps discover that foo.o depends on generated.h, + // we must have some dependency path from foo.o to generated.h, + // either direct or indirect (like the stamp). If that + // were present, then we'd already have file_state for this + // file and wouldn't get here. + anyhow::bail!( + "{}: used generated file {}, but has no dependency path to it", + build.location, + file.name + ); + } + self.file_state.stat(id, file.path())? + } + }; + if mtime == MTime::Missing { + return Ok(Some(id)); + } + } + Ok(None) + } + + /// Given a task that just finished, record any discovered deps and hash. + /// Postcondition: all outputs have been stat()ed. + fn record_finished(&mut self, id: BuildId, result: task::TaskResult) -> anyhow::Result<()> { + // Clean up the deps discovered from the task. + let mut deps = Vec::new(); + if let Some(names) = result.discovered_deps { + for name in names { + let fileid = self.graph.files.id_from_canonical(canon_path(name)); + // Filter duplicates from the file list. + if deps.contains(&fileid) { + continue; + } + // Filter out any deps that were already dirtying in the build file. + // Note that it's allowed to have a duplicate against an order-only + // dep; see `discover_existing_dep` test. + if self.graph.builds[id].dirtying_ins().contains(&fileid) { + continue; + } + deps.push(fileid); + } + } + + // We may have discovered new deps, so ensure we have mtimes for those. + let deps_changed = self.graph.builds[id].update_discovered(deps); + if deps_changed { + if let Some(missing) = self.ensure_input_files(id, true)? { + anyhow::bail!( + "{}: depfile references nonexistent {}", + self.graph.builds[id].location, + self.graph.file(missing).name + ); + } + } + + let input_was_missing = self.graph.builds[id] + .dirtying_ins() + .iter() + .any(|&id| self.file_state.get(id).unwrap() == MTime::Missing); + + // Update any cached state of the output files to reflect their new state. + let output_was_missing = self.stat_all_outputs(id)?.is_some(); + + if input_was_missing || output_was_missing { + // If a file is missing, don't record the build in in the db. + // It will be considered dirty next time anyway due to the missing file. + return Ok(()); + } + + let build = &self.graph.builds[id]; + let hash = hash::hash_build(&self.graph.files, &self.file_state, build); + self.db.write_build(&self.graph, id, hash)?; + + Ok(()) + } + + /// Given a build that just finished, check whether its dependent builds are now ready. + fn ready_dependents(&mut self, id: BuildId) { + let build = &self.graph.builds[id]; + self.build_states.set(id, build, BuildState::Done); + + let mut dependents = HashSet::new(); + for &id in build.outs() { + for &id in &self.graph.file(id).dependents { + if self.build_states.get(id) != BuildState::Want { + continue; + } + dependents.insert(id); + } + } + for id in dependents { + if !self.recheck_ready(id) { + continue; + } + self.build_states + .set(id, &self.graph.builds[id], BuildState::Ready); + } + } + + /// Stat all the outputs of a build. + /// Called before it's run (for determining whether it's up to date) and + /// after (to see if it touched any outputs). + fn stat_all_outputs(&mut self, id: BuildId) -> anyhow::Result<Option<FileId>> { + let build = &self.graph.builds[id]; + let mut missing = None; + for &id in build.outs() { + let file = self.graph.file(id); + let mtime = self.file_state.stat(id, file.path())?; + if mtime == MTime::Missing && missing.is_none() { + missing = Some(id); + } + } + Ok(missing) + } + + /// Stat all the input/output files for a given build in anticipation of + /// deciding whether it needs to be run again. + /// Prereq: any dependent input is already generated. + /// Returns a build error if any required input files are missing. + /// Otherwise returns the missing id if any expected but not required files, + /// e.g. outputs, are missing, implying that the build needs to be executed. + fn check_build_files_missing(&mut self, id: BuildId) -> anyhow::Result<Option<FileId>> { + // Ensure we have state for all input files. + if let Some(missing) = self.ensure_input_files(id, false)? { + let file = self.graph.file(missing); + if file.input.is_none() { + let build = &self.graph.builds[id]; + anyhow::bail!("{}: input {} missing", build.location, file.name); + } + return Ok(Some(missing)); + } + if let Some(missing) = self.ensure_input_files(id, true)? { + return Ok(Some(missing)); + } + + // Ensure we have state for all output files. + // We know this build is solely responsible for updating these outputs, + // and if we're checking if it's dirty we are visiting it the first + // time, so we stat unconditionally. + // This is looking at if the outputs are already present. + if let Some(missing) = self.stat_all_outputs(id)? { + return Ok(Some(missing)); + } + + // All files accounted for. + Ok(None) + } + + /// Like check_build_files_missing, but for phony rules, which have + /// different behavior for inputs. + fn check_build_files_missing_phony(&mut self, id: BuildId) -> anyhow::Result<()> { + // We don't consider the input files. This works around + // https://github.com/ninja-build/ninja/issues/1779 + // which is a bug that a phony rule with a missing input + // dependency doesn't fail the build. + // TODO: key this behavior off of the "ninja compat" flag. + // TODO: reconsider how phony deps work, maybe we should always promote + // phony deps to order-only? + + // Maintain the invariant that we have stat info for all outputs, but + // we generally don't expect them to have been created. + // TODO: what should happen if a rule uses a phony output as its own input? + // The Ninja manual suggests you can use phony rules to aggregate outputs + // together, so we might need to create some sort of fake mtime here? + self.stat_all_outputs(id)?; + Ok(()) + } + + /// Check a ready build for whether it needs to run, returning true if so. + /// Prereq: any dependent input is already generated. + fn check_build_dirty(&mut self, id: BuildId) -> anyhow::Result<bool> { + let build = &self.graph.builds[id]; + let phony = build.cmdline.is_none(); + let file_missing = if phony { + self.check_build_files_missing_phony(id)?; + return Ok(false); // Phony builds never need to run anything. + } else { + self.check_build_files_missing(id)? + }; + + // If any files are missing, the build is dirty without needing + // to consider hashes. + let build = &self.graph.builds[id]; + if let Some(missing) = file_missing { + if self.options.explain { + self.progress.log(&format!( + "explain: {}: input {} missing", + build.location, + self.graph.file(missing).name + )); + } + return Ok(true); + } + + // If we get here, all the relevant files are present and stat()ed, + // so compare the hash against the last hash. + + // TODO: skip this whole function if no previous hash is present. + // More complex than just moving this block up, because we currently + // assume that we've always checked inputs after we've run a build. + let prev_hash = match self.last_hashes.get(id) { + None => { + if self.options.explain { + self.progress.log(&format!( + "explain: {}: no previous state known", + build.location + )); + } + return Ok(true); + } + Some(prev_hash) => prev_hash, + }; + + let hash = hash::hash_build(&self.graph.files, &self.file_state, build); + if prev_hash != hash { + if self.options.explain { + self.progress + .log(&format!("explain: {}: manifest changed", build.location)); + self.progress.log(&hash::explain_hash_build( + &self.graph.files, + &self.file_state, + build, + )); + } + return Ok(true); + } + + Ok(false) + } + + /// Create the parent directories of a given list of fileids. + /// Used to create directories used for outputs. + /// TODO: do this within the thread executing the subtask? + fn create_parent_dirs(&self, ids: &[FileId]) -> anyhow::Result<()> { + let mut dirs: Vec<&std::path::Path> = Vec::new(); + for &out in ids { + if let Some(parent) = self.graph.file(out).path().parent() { + if dirs.iter().any(|&p| p == parent) { + continue; + } + std::fs::create_dir_all(parent)?; + dirs.push(parent); + } + } + Ok(()) + } + + /// Runs the build. + /// Returns the number of tasks executed on successful builds, or None on failed builds. + pub fn run(&mut self) -> anyhow::Result<Option<usize>> { + #[cfg(unix)] + signal::register_sigint(); + let mut tasks_done = 0; + let mut tasks_failed = 0; + let mut runner = task::Runner::new(self.options.parallelism); + while self.build_states.unfinished() { + self.progress.update(&self.build_states.counts); + + // Approach: + // - First make sure we're running as many queued tasks as the runner + // allows. + // - Next make sure we've finished or enqueued any tasks that are + // ready. + // - If either one of those made progress, loop, to ensure the other + // one gets to work from the result. + // - If neither made progress, wait for a task to complete and + // loop. + + let mut made_progress = false; + while runner.can_start_more() { + let id = match self.build_states.pop_queued() { + Some(id) => id, + None => break, + }; + let build = &self.graph.builds[id]; + self.build_states.set(id, build, BuildState::Running); + self.create_parent_dirs(build.outs())?; + runner.start(id, build); + self.progress.task_started(id, build); + made_progress = true; + } + + while let Some(id) = self.build_states.pop_ready() { + if !self.check_build_dirty(id)? { + // Not dirty; go directly to the Done state. + self.ready_dependents(id); + } else if self.options.adopt { + // Act as if the target already finished. + self.record_finished( + id, + task::TaskResult { + termination: process::Termination::Success, + output: vec![], + discovered_deps: None, + }, + )?; + self.ready_dependents(id); + } else { + self.build_states.enqueue(id, &self.graph.builds[id])?; + } + made_progress = true; + } + + if made_progress { + continue; + } + + if !runner.is_running() { + if tasks_failed > 0 { + // No more progress can be made, hopefully due to tasks that failed. + break; + } + panic!("BUG: no work to do and runner not running"); + } + + let task = runner.wait(|id, line| { + self.progress.task_output(id, line); + }); + let build = &self.graph.builds[task.buildid]; + trace::if_enabled(|t| { + let desc = progress::build_message(build); + t.write_complete(desc, task.tid + 1, task.span.0, task.span.1); + }); + + self.progress + .task_finished(task.buildid, build, &task.result); + match task.result.termination { + process::Termination::Failure => { + if let Some(failures_left) = &mut self.options.failures_left { + *failures_left -= 1; + if *failures_left == 0 { + return Ok(None); + } + } + tasks_failed += 1; + self.build_states + .set(task.buildid, build, BuildState::Failed); + } + process::Termination::Interrupted => { + // If the task was interrupted bail immediately. + return Ok(None); + } + process::Termination::Success => { + tasks_done += 1; + self.record_finished(task.buildid, task.result)?; + self.ready_dependents(task.buildid); + } + }; + } + + // If the user ctl-c's, it likely caused a subtask to fail. + // But at least for the LLVM test suite it can catch sigint and print + // "interrupted by user" and exit with success, and in that case we + // don't want n2 to print a "succeeded" message afterwards. + let success = tasks_failed == 0 && !signal::was_interrupted(); + Ok(success.then_some(tasks_done)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn build_cycle() -> Result<(), anyhow::Error> { + let file = " +build a: phony b +build b: phony c +build c: phony a +"; + let mut graph = crate::load::parse("build.ninja", file.as_bytes().to_vec())?; + let a_id = graph.files.id_from_canonical("a".to_owned()); + let mut states = BuildStates::new(graph.builds.next_id(), SmallMap::default()); + let mut stack = Vec::new(); + match states.want_file(&graph, &mut stack, a_id) { + Ok(_) => panic!("expected build cycle error"), + Err(err) => assert_eq!(err.to_string(), "dependency cycle: a -> b -> c -> a"), + } + Ok(()) + } +} diff --git a/tests/e2e/basic.rs b/tests/e2e/basic.rs new file mode 100644 index 0000000..fca23b8 --- /dev/null +++ b/tests/e2e/basic.rs @@ -0,0 +1,453 @@ +use crate::e2e::*; + +#[test] +fn empty_file() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write("build.ninja", "")?; + let out = space.run(&mut n2_command(vec![]))?; + assert_eq!(std::str::from_utf8(&out.stdout)?, "n2: no work to do\n"); + Ok(()) +} + +#[test] +fn basic_build() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + &[TOUCH_RULE, "build out: touch in", ""].join("\n"), + )?; + space.write("in", "")?; + space.run_expect(&mut n2_command(vec!["out"]))?; + assert!(space.read("out").is_ok()); + + Ok(()) +} + +#[test] +fn create_subdir() -> anyhow::Result<()> { + // Run a build rule that needs a subdir to be automatically created. + let space = TestSpace::new()?; + space.write( + "build.ninja", + &[TOUCH_RULE, "build subdir/out: touch in", ""].join("\n"), + )?; + space.write("in", "")?; + space.run_expect(&mut n2_command(vec!["subdir/out"]))?; + assert!(space.read("subdir/out").is_ok()); + + Ok(()) +} + +#[cfg(unix)] +#[test] +fn generate_rsp_file() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + " +rule cat + command = cat ${out}.rsp > ${out} + rspfile = ${out}.rsp + rspfile_content = 1 $in 2 $in_newline 3 + +rule litter + command = cat make/me/${out}.rsp > ${out} + rspfile = make/me/${out}.rsp + rspfile_content = random stuff + +rule touch + command = touch $out + +build main: cat foo bar baz in +build foo: litter bar +build bar: touch baz +build baz: touch in +", + )?; + space.write("in", "go!")?; + + let _ = space.run_expect(&mut n2_command(vec!["main"]))?; + + // The 'main' and 'foo' targets copy the contents of their rsp file to their + // output. + let main_rsp = space.read("main").unwrap(); + assert_eq!(main_rsp, b"1 foo bar baz in 2 foo\nbar\nbaz\nin 3"); + let foo_rsp = space.read("foo").unwrap(); + assert_eq!(foo_rsp, b"random stuff"); + + // The 'make/me' directory was created when writing an rsp file. + // It should still be there. + let meta = space.metadata("make/me").unwrap(); + assert!(meta.is_dir()); + + // Run again: everything should be up to date. + let out = space.run_expect(&mut n2_command(vec!["main"]))?; + assert_output_contains(&out, "no work"); + + Ok(()) +} + +/// Run a task that prints something, and verify it shows up. +#[cfg(unix)] +#[test] +fn spam_output() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + " +rule quiet + description = quiet $out + command = touch $out +rule spam + description = spam $out + command = echo greetz from $out && touch $out +build a: quiet +build b: spam a +build c: quiet b +", + )?; + let out = space.run_expect(&mut n2_command(vec!["c"]))?; + assert_output_contains( + &out, + "quiet a +spam b +greetz from b +quiet c +", + ); + Ok(()) +} + +#[test] +fn specify_build_file() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build_specified.ninja", + &[TOUCH_RULE, "build out: touch in", ""].join("\n"), + )?; + space.write("in", "")?; + space.run_expect(&mut n2_command(vec!["-f", "build_specified.ninja", "out"]))?; + assert!(space.read("out").is_ok()); + + Ok(()) +} + +/// Regression test for https://github.com/evmar/n2/issues/44 +/// and https://github.com/evmar/n2/issues/46 . +/// Build with the same output listed multiple times. +#[test] +fn repeated_out() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + &[ + TOUCH_RULE, + "build dup dup: touch in", + "build out: touch dup", + "", + ] + .join("\n"), + )?; + space.write("in", "")?; + space.write("dup", "")?; + let out = space.run_expect(&mut n2_command(vec!["out"]))?; + assert_output_contains(&out, "is repeated in output list"); + + Ok(()) +} + +/// Regression test for https://github.com/evmar/n2/issues/55 +/// UTF-8 filename. +#[cfg(unix)] +#[test] +fn utf8_filename() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + &[ + " +rule echo + description = unicode variable: $in + command = echo unicode command line: $in && touch $out +", + "build out: echo reykjavík.md", + "", + ] + .join("\n"), + )?; + space.write("reykjavík.md", "")?; + let out = space.run_expect(&mut n2_command(vec!["out"]))?; + assert_output_contains(&out, "unicode variable: reykjavík.md"); + assert_output_contains(&out, "unicode command line: reykjavík.md"); + + Ok(()) +} + +#[test] +fn explain() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + &[TOUCH_RULE, "build out: touch in", ""].join("\n"), + )?; + space.write("in", "")?; + let out = space.run_expect(&mut n2_command(vec!["out"]))?; + assert_output_contains(&out, "up to date"); + + space.write("in", "")?; + let out = space.run_expect(&mut n2_command(vec!["-d", "explain", "out"]))?; + // The main "explain" log line: + assert_output_contains(&out, "explain: build.ninja:6: manifest changed"); + // The dump of the file manifest after includes mtimes that we don't want + // to be sensitive to, so just look for some bits we know show up there. + assert_output_contains(&out, "discovered:"); + + Ok(()) +} + +/// Meson generates a build step that writes to one of its inputs. +#[test] +fn write_to_input() -> anyhow::Result<()> { + #[cfg(unix)] + let touch_input_command = "touch out in"; + #[cfg(windows)] + let touch_input_command = "cmd /c type nul > in && cmd /c type nul > out"; + let touch_input_rule = format!( + " +rule touch_in + description = touch out+in + command = {} +", + touch_input_command + ); + + let space = TestSpace::new()?; + space.write( + "build.ninja", + &[&touch_input_rule, "build out: touch_in in", ""].join("\n"), + )?; + space.write("in", "")?; + space.sub_mtime("in", std::time::Duration::from_secs(1))?; + + let out = space.run_expect(&mut n2_command(vec!["out"]))?; + assert_output_contains(&out, "ran 1 task"); + + // TODO: to support meson, we need this second invocation to not build anything. + let out = space.run_expect(&mut n2_command(vec!["out"]))?; + assert_output_contains(&out, "ran 1 task"); + + Ok(()) +} + +#[test] +fn showincludes() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + &[ + ECHO_RULE, + " +build out: echo + text = Note: including file: foo + deps = msvc +", + ] + .join("\n"), + )?; + space.write("foo", "")?; + + let out = space.run_expect(&mut n2_command(vec!["out"]))?; + assert_output_contains(&out, "ran 1 task"); + + space.write("foo", "")?; + let out = space.run_expect(&mut n2_command(vec!["out"]))?; + assert_output_contains(&out, "ran 1 task"); + + Ok(()) +} + +// Repro for issue #83. +#[cfg(unix)] +#[test] +fn eval_twice() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + &[ + TOUCH_RULE, + " +var = 123 +rule custom + command = $cmd $var +build out: custom + cmd = echo $var hello +", + ] + .join("\n"), + )?; + + let out = space.run_expect(&mut n2_command(vec!["out"]))?; + assert_output_contains(&out, "echo 123 hello 123"); + Ok(()) +} + +// Repro for issue #84: phony depending on phony. +#[test] +fn phony_depends() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + &[ + TOUCH_RULE, + " +build out1: touch +build out2: phony out1 +build out3: phony out2 +", + ] + .join("\n"), + )?; + space.run_expect(&mut n2_command(vec!["out3"]))?; + space.read("out1")?; + Ok(()) +} + +// builddir controls where .n2_db is written. +#[test] +fn builddir() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + &[ + "builddir = foo", + TOUCH_RULE, + "build $builddir/bar: touch", + "", + ] + .join("\n"), + )?; + space.run_expect(&mut n2_command(vec!["foo/bar"]))?; + space.read("foo/.n2_db")?; + Ok(()) +} + +#[test] +fn bad_rule_variable() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + " +rule my_rule + command = touch $out + my_var = foo + +build out: my_rule +", + )?; + + let out = space.run(&mut n2_command(vec!["out"]))?; + assert_output_contains(&out, "unexpected variable \"my_var\""); + Ok(()) +} + +#[cfg(unix)] +#[test] +fn deps_evaluate_build_bindings() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + " +rule touch + command = touch $out +rule copy + command = cp $in $out +build foo: copy ${my_dep} + my_dep = bar +build bar: touch +", + )?; + space.run_expect(&mut n2_command(vec!["foo"]))?; + space.read("foo")?; + Ok(()) +} + +#[cfg(unix)] +#[test] +fn looks_up_values_from_build() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + " +rule copy_rspfile + command = cp $out.rsp $out + rspfile = $out.rsp + +build foo: copy_rspfile + rspfile_content = Hello, world! +", + )?; + space.run_expect(&mut n2_command(vec!["foo"]))?; + assert_eq!(space.read("foo")?, b"Hello, world!"); + Ok(()) +} + +#[cfg(unix)] +#[test] +fn build_bindings_arent_recursive() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + " +rule write_file + command = echo $my_var > $out + +build foo: write_file + my_var = Hello,$my_var_2 world! + my_var_2 = my_var_2_value +", + )?; + space.run_expect(&mut n2_command(vec!["foo"]))?; + assert_eq!(space.read("foo")?, b"Hello, world!\n"); + Ok(()) +} + +#[cfg(unix)] +#[test] +fn empty_variable_binding() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + " +empty_var = + +rule write_file + command = echo $my_var > $out + +build foo: write_file + my_var = Hello,$empty_var world! +", + )?; + space.run_expect(&mut n2_command(vec!["foo"]))?; + assert_eq!(space.read("foo")?, b"Hello, world!\n"); + Ok(()) +} + +#[cfg(unix)] +#[test] +fn empty_build_variable() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + " +rule write_file + command = echo $my_var > $out + +build foo: write_file + empty = + my_var = Hello, world! +", + )?; + space.run_expect(&mut n2_command(vec!["foo"]))?; + assert_eq!(space.read("foo")?, b"Hello, world!\n"); + Ok(()) +} diff --git a/tests/e2e/directories.rs b/tests/e2e/directories.rs new file mode 100644 index 0000000..b416e28 --- /dev/null +++ b/tests/e2e/directories.rs @@ -0,0 +1,24 @@ +use crate::e2e::*; + +#[cfg(unix)] +#[test] +fn dep_on_current_directory() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + " +rule list_files + command = ls $in > $out + +build out: list_files . +", + )?; + space.write("foo", "")?; + space.run_expect(&mut n2_command(vec!["out"]))?; + assert_eq!(space.read("out")?, b"build.ninja\nfoo\nout\n"); + space.write("foo2", "")?; + space.run_expect(&mut n2_command(vec!["out"]))?; + assert_eq!(space.read("out")?, b"build.ninja\nfoo\nfoo2\nout\n"); + + Ok(()) +} diff --git a/tests/e2e/discovered.rs b/tests/e2e/discovered.rs new file mode 100644 index 0000000..a4252e5 --- /dev/null +++ b/tests/e2e/discovered.rs @@ -0,0 +1,159 @@ +use crate::e2e::*; + +#[cfg(unix)] +const GENDEP_RULE: &str = " +rule gendep + description = gendep $out + command = echo \"$dep_content\" > $out.d && touch $out + depfile = $out.d +"; + +#[cfg(windows)] +const GENDEP_RULE: &str = " +rule gendep + description = gendep $out + command = cmd /c echo $dep_content > $out.d && type nul > $out + depfile = $out.d +"; + +/// depfile contains invalid syntax. +#[test] +fn bad_depfile() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + &[ + GENDEP_RULE, + " +build out: gendep + dep_content = garbage text +", + "", + ] + .join("\n"), + )?; + + let out = space.run(&mut n2_command(vec!["out"]))?; + assert_output_contains(&out, "parse error:"); + Ok(()) +} + +/// depfile contains reference to missing file. +#[test] +fn depfile_missing_file() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + &[ + GENDEP_RULE, + " +build out: gendep + dep_content = out: missing_file +", + "", + ] + .join("\n"), + )?; + + let out = space.run(&mut n2_command(vec!["out"]))?; + assert_output_contains(&out, "depfile references nonexistent missing_file"); + Ok(()) +} + +/// depfile contains reference to existing order-only dep. +#[test] +fn discover_existing_dep() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + &[ + GENDEP_RULE, + TOUCH_RULE, + "build in: touch", + " +build out: gendep || in + dep_content = out: in +", + "", + ] + .join("\n"), + )?; + + let out = space.run_expect(&mut n2_command(vec!["out"]))?; + assert_output_contains(&out, "ran 2 tasks"); + + let out = space.run_expect(&mut n2_command(vec!["out"]))?; + assert_output_contains(&out, "no work"); + + // Even though out only has an order-only dep on 'in' in the build file, + // we still should rebuild it due to the discovered dep on 'in'. + space.write("in", "x")?; + let out = space.run_expect(&mut n2_command(vec!["out"]))?; + assert_output_contains(&out, "gendep out"); + + Ok(()) +} + +#[cfg(unix)] +#[test] +fn multi_output_depfile() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + " +rule myrule + command = echo \"out: foo\" > out.d && echo \"out2: foo2\" >> out.d && echo >> out.d && echo >> out.d && touch out out2 + depfile = out.d + +build out out2: myrule +", + )?; + space.write("foo", "")?; + space.write("foo2", "")?; + + let out = space.run_expect(&mut n2_command(vec!["out"]))?; + assert_output_contains(&out, "ran 1 task"); + let out = space.run_expect(&mut n2_command(vec!["out"]))?; + assert_output_contains(&out, "no work"); + space.write("foo", "x")?; + let out = space.run_expect(&mut n2_command(vec!["out"]))?; + assert_output_contains(&out, "ran 1 task"); + space.write("foo2", "x")?; + let out = space.run_expect(&mut n2_command(vec!["out"]))?; + assert_output_contains(&out, "ran 1 task"); + let out = space.run_expect(&mut n2_command(vec!["out"]))?; + assert_output_contains(&out, "no work"); + Ok(()) +} + +#[cfg(unix)] +#[test] +fn escaped_newline_in_depfile() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + " +rule myrule + command = echo \"out: foo \\\\\" > out.d && echo \" foo2\" >> out.d && touch out + depfile = out.d + +build out: myrule +", + )?; + space.write("foo", "")?; + space.write("foo2", "")?; + + let out = space.run_expect(&mut n2_command(vec!["out"]))?; + assert_output_contains(&out, "ran 1 task"); + let out = space.run_expect(&mut n2_command(vec!["out"]))?; + assert_output_contains(&out, "no work"); + space.write("foo", "x")?; + let out = space.run_expect(&mut n2_command(vec!["out"]))?; + assert_output_contains(&out, "ran 1 task"); + space.write("foo2", "x")?; + let out = space.run_expect(&mut n2_command(vec!["out"]))?; + assert_output_contains(&out, "ran 1 task"); + let out = space.run_expect(&mut n2_command(vec!["out"]))?; + assert_output_contains(&out, "no work"); + Ok(()) +} diff --git a/tests/e2e/missing.rs b/tests/e2e/missing.rs new file mode 100644 index 0000000..1b78d36 --- /dev/null +++ b/tests/e2e/missing.rs @@ -0,0 +1,113 @@ +//! Tests for behavior around missing files. + +use super::*; + +#[test] +fn missing_input() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + &[TOUCH_RULE, "build out: touch in", ""].join("\n"), + )?; + + let out = space.run(&mut n2_command(vec!["out"]))?; + assert_output_contains(&out, "input in missing"); + + Ok(()) +} + +#[test] +fn missing_generated() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + &[ + TOUCH_RULE, + ECHO_RULE, + "build mid: echo", // never writes output + "build out: touch mid", // uses never-written output + "", + ] + .join("\n"), + )?; + + // https://github.com/evmar/n2/issues/69 + + let out = space.run_expect(&mut n2_command(vec!["out"]))?; + assert_output_contains(&out, "echo mid"); + assert_output_contains(&out, "touch out"); + + Ok(()) +} + +#[test] +fn missing_phony() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + &[ + TOUCH_RULE, + "build order_only: phony", // never writes output + "build out: touch || order_only", // uses never-written output + "", + ] + .join("\n"), + )?; + + // https://github.com/evmar/n2/issues/69 + + let out = space.run_expect(&mut n2_command(vec!["out"]))?; + assert_output_contains(&out, "touch out"); + + Ok(()) +} + +// Ensure we don't regress on +// https://github.com/ninja-build/ninja/issues/1779 +// I can't remember the specific code CMake generates that relies on this; +// I wonder if we can tighten the behavior at all. +#[test] +fn missing_phony_input() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + &[TOUCH_RULE, "build out: phony || no_such_file", ""].join("\n"), + )?; + + let out = space.run_expect(&mut n2_command(vec!["out"]))?; + assert_output_contains(&out, "no work to do"); + + Ok(()) +} + +#[test] +fn phony_output_on_disk() -> anyhow::Result<()> { + // https://github.com/evmar/n2/issues/40 + // CMake uses a phony rule targeted at a real file as a way of marking + // "don't fail the build if this file is missing", but it had the consequence + // of confusing our dirty-checking logic. + + let space = TestSpace::new()?; + space.write( + "build.ninja", + &[ + TOUCH_RULE, + "build out: touch | phony_file", + "build phony_file: phony", + "", + ] + .join("\n"), + )?; + + // Despite being a target of a phony rule, the file exists on disk. + space.write("phony_file", "")?; + + // Expect the first build to generate some state... + let out = space.run_expect(&mut n2_command(vec!["out"]))?; + assert_output_contains(&out, "ran 1 task"); + // ...but a second one should be up to date (#40 was that this ran again). + let out = space.run_expect(&mut n2_command(vec!["out"]))?; + assert_output_contains(&out, "no work to do"); + + Ok(()) +} diff --git a/tests/e2e/mod.rs b/tests/e2e/mod.rs new file mode 100644 index 0000000..a1e7e7f --- /dev/null +++ b/tests/e2e/mod.rs @@ -0,0 +1,141 @@ +//! Support code for e2e tests, which run n2 as a binary. + +mod basic; +mod directories; +mod discovered; +mod missing; +mod regen; +mod validations; + +use anyhow::anyhow; + +pub fn n2_binary() -> std::path::PathBuf { + std::env::current_exe() + .expect("test binary path") + .parent() + .expect("test binary directory") + .parent() + .expect("binary directory") + .join("n2") +} + +pub fn n2_command(args: Vec<&str>) -> std::process::Command { + let mut cmd = std::process::Command::new(n2_binary()); + cmd.args(args); + cmd +} + +fn print_output(out: &std::process::Output) { + // Gross: use print! instead of writing to stdout so Rust test + // framework can capture it. + print!("{}", std::str::from_utf8(&out.stdout).unwrap()); + print!("{}", std::str::from_utf8(&out.stderr).unwrap()); +} + +pub fn assert_output_contains(out: &std::process::Output, text: &str) { + let out = std::str::from_utf8(&out.stdout).unwrap(); + if !out.contains(text) { + panic!( + "assertion failed; expected output to contain {:?} but got:\n{}", + text, out + ); + } +} + +pub fn assert_output_not_contains(out: &std::process::Output, text: &str) { + let out = std::str::from_utf8(&out.stdout).unwrap(); + if out.contains(text) { + panic!( + "assertion failed; expected output to not contain {:?} but got:\n{}", + text, out + ); + } +} + +/// Manages a temporary directory for invoking n2. +pub struct TestSpace { + dir: tempfile::TempDir, +} +impl TestSpace { + pub fn new() -> anyhow::Result<Self> { + let dir = tempfile::tempdir()?; + Ok(TestSpace { dir }) + } + + /// Write a file into the working space. + pub fn write(&self, path: &str, content: &str) -> std::io::Result<()> { + std::fs::write(self.dir.path().join(path), content) + } + + /// Read a file from the working space. + pub fn read(&self, path: &str) -> anyhow::Result<Vec<u8>> { + let path = self.dir.path().join(path); + std::fs::read(&path).map_err(|err| anyhow!("read {}: {}", path.display(), err)) + } + + pub fn metadata(&self, path: &str) -> std::io::Result<std::fs::Metadata> { + std::fs::metadata(self.dir.path().join(path)) + } + + pub fn sub_mtime(&self, path: &str, dur: std::time::Duration) -> anyhow::Result<()> { + let path = self.dir.path().join(path); + let t = std::time::SystemTime::now() - dur; + filetime::set_file_mtime(path, filetime::FileTime::from_system_time(t))?; + Ok(()) + } + + /// Invoke n2, returning process output. + pub fn run(&self, cmd: &mut std::process::Command) -> std::io::Result<std::process::Output> { + cmd.current_dir(self.dir.path()).output() + } + + /// Like run, but also print output if the build failed. + pub fn run_expect( + &self, + cmd: &mut std::process::Command, + ) -> anyhow::Result<std::process::Output> { + let out = self.run(cmd)?; + if !out.status.success() { + print_output(&out); + anyhow::bail!("build failed, status {}", out.status); + } + Ok(out) + } + + /// Persist the temp dir locally and abort the test. Debugging helper. + #[allow(dead_code)] + pub fn eject(self) -> ! { + panic!("ejected at {:?}", self.dir.into_path()); + } +} + +// Ensure TOUCH_RULE has the same description and number of lines of text +// on Windows/non-Windows to make tests agnostic to platform. + +#[cfg(unix)] +pub const TOUCH_RULE: &str = " +rule touch + command = touch $out + description = touch $out +"; + +#[cfg(windows)] +pub const TOUCH_RULE: &str = " +rule touch + command = cmd /c type nul > $out + description = touch $out +"; + +#[cfg(unix)] +pub const ECHO_RULE: &str = " +rule echo + command = echo $text + description = echo $out +"; + +#[cfg(windows)] +pub const ECHO_RULE: &str = " +rule echo + command = cmd /c echo $text + description = echo $out +"; diff --git a/tests/e2e/regen.rs b/tests/e2e/regen.rs new file mode 100644 index 0000000..49a8b05 --- /dev/null +++ b/tests/e2e/regen.rs @@ -0,0 +1,137 @@ +//! Tests around regenerating the build.ninja file. + +use crate::e2e::*; + +#[cfg(unix)] +#[test] +fn generate_build_file() -> anyhow::Result<()> { + // Run a project where a build rule generates the build.ninja. + let space = TestSpace::new()?; + space.write( + "gen.sh", + " +echo 'regenerating build.ninja' +cat >build.ninja <<EOT +rule regen + command = sh ./gen.sh + generator = 1 +build build.ninja: regen gen.sh +rule touch + command = touch \\$out +build out: touch +EOT +", + )?; + + // Generate the initial build.ninja. + space.run_expect(std::process::Command::new("sh").args(vec!["./gen.sh"]))?; + + // Run: expect to regenerate because we don't know how the file was made. + let out = space.run_expect(&mut n2_command(vec!["out"]))?; + assert_output_contains(&out, "regenerating build.ninja"); + assert_output_contains(&out, "ran 2 tasks"); + + // Run: everything should be up to date. + let out = space.run_expect(&mut n2_command(vec!["out"]))?; + assert_output_not_contains(&out, "regenerating build.ninja"); + assert_output_contains(&out, "no work"); + + Ok(()) +} + +#[cfg(unix)] +#[test] +fn generate_specified_build_file() -> anyhow::Result<()> { + // Run a project where a build rule generates specified_build.ninja. + let space = TestSpace::new()?; + space.write( + "gen.sh", + " +echo 'regenerating specified_build.ninja' +cat >specified_build.ninja <<EOT +rule regen + command = sh ./gen.sh + generator = 1 +build specified_build.ninja: regen gen.sh +rule touch + command = touch \\$out +build out: touch +EOT +", + )?; + + // Generate the initial specified_build.ninja. + space.run_expect(std::process::Command::new("sh").args(vec!["./gen.sh"]))?; + + // Run: expect to regenerate because we don't know how the file was made. + let out = space.run_expect(&mut n2_command(vec!["-f", "specified_build.ninja", "out"]))?; + assert_output_contains(&out, "regenerating specified_build.ninja"); + assert_output_contains(&out, "ran 2 tasks"); + + // Run: everything should be up to date. + let out = space.run_expect(&mut n2_command(vec!["-f", "specified_build.ninja", "out"]))?; + assert_output_not_contains(&out, "regenerating"); + assert_output_contains(&out, "no work"); + + Ok(()) +} + +#[cfg(unix)] +#[test] +fn generate_build_file_failure() -> anyhow::Result<()> { + // Run a project where a build rule generates the build.ninja but it fails. + let space = TestSpace::new()?; + space.write( + "build.ninja", + &[ + TOUCH_RULE, + "build out: touch", + " +rule regen + command = sh ./gen.sh + generator = 1", + "build build.ninja: regen gen.sh", + "", + ] + .join("\n"), + )?; + space.write("gen.sh", "exit 1")?; + + // Run: regenerate and fail. + let out = space.run(&mut n2_command(vec!["out"]))?; + assert_output_contains(&out, "failed:"); + + Ok(()) +} + +/// Use "-t restat" to mark the build.ninja up to date ahead of time. +#[cfg(unix)] // TODO: this ought to work on Windows, hrm. +#[test] +fn restat() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + &[TOUCH_RULE, "build build.ninja: touch in", ""].join("\n"), + )?; + space.write("in", "")?; + + let out = space.run_expect(&mut n2_command(vec![ + "-d", + "ninja_compat", + "-t", + "restat", + "build.ninja", + ]))?; + assert_output_not_contains(&out, "touch build.ninja"); + + // Building the build file should do nothing, because restat marked it up to date. + let out = space.run_expect(&mut n2_command(vec!["build.ninja"]))?; + assert_output_not_contains(&out, "touch build.ninja"); + + // But modifying the input should cause it to be up to date. + space.write("in", "")?; + let out = space.run_expect(&mut n2_command(vec!["build.ninja"]))?; + assert_output_contains(&out, "touch build.ninja"); + + Ok(()) +} diff --git a/tests/e2e/validations.rs b/tests/e2e/validations.rs new file mode 100644 index 0000000..a0ed941 --- /dev/null +++ b/tests/e2e/validations.rs @@ -0,0 +1,90 @@ +//! Tests for the 'validations' feature, which are build edges marked with |@. + +use crate::e2e::*; + +#[test] +fn basic_validation() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + &[ + TOUCH_RULE, + "build my_validation: touch", + "build out: touch |@ my_validation", + "", + ] + .join("\n"), + )?; + space.run_expect(&mut n2_command(vec!["out"]))?; + assert!(space.read("out").is_ok()); + assert!(space.read("my_validation").is_ok()); + Ok(()) +} + +#[cfg(unix)] +#[test] +fn build_starts_before_validation_finishes() -> anyhow::Result<()> { + // When a given target has a validation, that validation is part of the + // overall build. But despite there being a build edge, the target shouldn't + // wait for the validation. + // To verify this, we make the validation command internally wait for the + // target, effectively reversing the dependency order at runtime. + let space = TestSpace::new()?; + space.write( + "build.ninja", + " +# Waits for the file $wait_for to exist, then touches $out. +rule build_slow + command = until [ -f $wait_for ]; do sleep 0.1; done; touch $out + +rule build_fast + command = touch $out + +build out: build_fast regular_input |@ validation_input +build regular_input: build_fast +build validation_input: build_slow + wait_for = out +", + )?; + space.run_expect(&mut n2_command(vec!["out"]))?; + Ok(()) +} + +#[cfg(unix)] +#[test] +fn build_fails_when_validation_fails() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + " +rule touch + command = touch $out + +rule fail + command = exit 1 + +build out: touch |@ validation_input +build validation_input: fail +", + )?; + let output = space.run(&mut n2_command(vec!["out"]))?; + assert!(!output.status.success()); + Ok(()) +} + +#[test] +fn validation_inputs_break_cycles() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + &[ + TOUCH_RULE, + "build out: touch |@ validation_input", + "build validation_input: touch out", + "", + ] + .join("\n"), + )?; + space.run_expect(&mut n2_command(vec!["out"]))?; + Ok(()) +} diff --git a/tests/e2e_test.rs b/tests/e2e_test.rs new file mode 100644 index 0000000..7f70581 --- /dev/null +++ b/tests/e2e_test.rs @@ -0,0 +1,4 @@ +//! Integration test. Runs n2 binary against a temp directory. + +// All the tests live in the e2e/ subdir. +mod e2e; diff --git a/tests/manual/.gitignore b/tests/manual/.gitignore new file mode 100644 index 0000000..e4dc11b --- /dev/null +++ b/tests/manual/.gitignore @@ -0,0 +1,2 @@ +.n2_db +.ninja_log diff --git a/tests/manual/clang-cl/.gitignore b/tests/manual/clang-cl/.gitignore new file mode 100644 index 0000000..996bd9f --- /dev/null +++ b/tests/manual/clang-cl/.gitignore @@ -0,0 +1 @@ +test.exe diff --git a/tests/manual/clang-cl/README.md b/tests/manual/clang-cl/README.md new file mode 100644 index 0000000..d1eb569 --- /dev/null +++ b/tests/manual/clang-cl/README.md @@ -0,0 +1 @@ +Invokes clang-cl to sanity check the /showIncludes handling. diff --git a/tests/manual/clang-cl/build.ninja b/tests/manual/clang-cl/build.ninja new file mode 100644 index 0000000..807c07e --- /dev/null +++ b/tests/manual/clang-cl/build.ninja @@ -0,0 +1,6 @@ +rule clang-cl + command = clang-cl /showIncludes $in + deps = msvc + +build test.exe: clang-cl test.c +default test.exe diff --git a/tests/manual/clang-cl/test.c b/tests/manual/clang-cl/test.c new file mode 100644 index 0000000..87db13e --- /dev/null +++ b/tests/manual/clang-cl/test.c @@ -0,0 +1,7 @@ +#include <stdio.h> +#include "test.h" + +int main() { + printf("hello, world: %d\n", X); + return 0; +} diff --git a/tests/manual/clang-cl/test.h b/tests/manual/clang-cl/test.h new file mode 100644 index 0000000..4abf145 --- /dev/null +++ b/tests/manual/clang-cl/test.h @@ -0,0 +1 @@ +#define X 3 diff --git a/tests/manual/prints/README.md b/tests/manual/prints/README.md new file mode 100644 index 0000000..da3426e --- /dev/null +++ b/tests/manual/prints/README.md @@ -0,0 +1,2 @@ +This manual test runs multiple subcommands that write to stdout, to verify +interleaving output. diff --git a/tests/manual/prints/build-win.ninja b/tests/manual/prints/build-win.ninja new file mode 100644 index 0000000..638ae11 --- /dev/null +++ b/tests/manual/prints/build-win.ninja @@ -0,0 +1,9 @@ +rule printy + command = prints.bat $out + +build print1: printy +build print2: printy +build print3: printy + +build out: phony print1 print2 print3 +default out diff --git a/tests/manual/prints/build.ninja b/tests/manual/prints/build.ninja new file mode 100644 index 0000000..4120f86 --- /dev/null +++ b/tests/manual/prints/build.ninja @@ -0,0 +1,9 @@ +rule printy + command = ./prints.sh $out + +build print1: printy +build print2: printy +build print3: printy + +build out: phony print1 print2 print3 +default out diff --git a/tests/manual/prints/prints.bat b/tests/manual/prints/prints.bat new file mode 100644 index 0000000..5c33a97 --- /dev/null +++ b/tests/manual/prints/prints.bat @@ -0,0 +1,7 @@ +@echo off +echo %1: 1 +timeout /T 1 /NOBREAK > nul +echo %1: 2 +timeout /T 1 /NOBREAK > nul +echo %1: 3 +timeout /T 1 /NOBREAK > nul diff --git a/tests/manual/prints/prints.sh b/tests/manual/prints/prints.sh new file mode 100755 index 0000000..6d1c3d5 --- /dev/null +++ b/tests/manual/prints/prints.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +for i in $(seq 3); do + echo $1: $i + # Uncomment me to verify which fds are open: + lsof -a -p $$ + sleep 1 +done diff --git a/tests/snapshot/README.md b/tests/snapshot/README.md new file mode 100644 index 0000000..e779580 --- /dev/null +++ b/tests/snapshot/README.md @@ -0,0 +1,44 @@ +# Snapshot tests + +This directory is for real-world outputs from tools that generate Ninja files. + +Because these outputs are large, they aren't checked in to the Ninja tree. +Instead for now it's a random zip file on Google drive at + +https://drive.google.com/file/d/1dlNAaf0XRXjV6UPkNV88JFuOBGJLdyGB/view?usp=sharing + +It expects to be unzipped directly into this directory: + +``` +$ unzip snapshot.zip +``` + +## Test data in the zip + +### llvm-cmake + +`llvm-cmake/` contains LLVM build files generated by CMake. + +https://llvm.org/docs/GettingStarted.html (note they have a CMake-specific page +that has instructions that don't work(?!)) + +``` +$ cmake -G Ninja -B build -S llvm -DCMAKE_BUILD_TYPE=Release +``` + +### llvm-gn + +`llvm-gn/` contains LLVM build files generated by the GN build system. + +Read llvm/utils/gn/README.rst in LLVM checkout for more, but in brief. + +``` +$ llvm/utils/gn/get.py +$ llvm/utils/gn/gn.py gen out/gn +``` + +## Reminder to self how I made it + +``` +$ zip -r snapshot.zip .gitignore llvm-* +``` |