diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-09-28 01:12:32 +0000 |
---|---|---|
committer | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-09-28 01:12:32 +0000 |
commit | 7a21a6f81bce08a3db7f312bd2e0b7084b3e7fff (patch) | |
tree | cbcc19fcf98479dce5ad3e7954427bd82961a348 | |
parent | db4553b1a39ef8ef84a097dfa2e795c0a4df60d8 (diff) | |
parent | 56734c9de21b723684c083ccf7fcb8dad46ca387 (diff) | |
download | google-benchmark-android14-qpr2-release.tar.gz |
Snap for 10872577 from 56734c9de21b723684c083ccf7fcb8dad46ca387 to 24Q1-releaseandroid-14.0.0_r37android-14.0.0_r36android-14.0.0_r35android-14.0.0_r34android-14.0.0_r33android-14.0.0_r32android-14.0.0_r31android-14.0.0_r30android-14.0.0_r29android14-qpr2-s5-releaseandroid14-qpr2-s4-releaseandroid14-qpr2-s3-releaseandroid14-qpr2-s2-releaseandroid14-qpr2-s1-releaseandroid14-qpr2-release
Change-Id: I033841d18cacdfc280e47e915b3a85ffad3c2962
169 files changed, 9567 insertions, 3527 deletions
diff --git a/.clang-tidy b/.clang-tidy new file mode 100644 index 0000000..56938a5 --- /dev/null +++ b/.clang-tidy @@ -0,0 +1,7 @@ +--- +Checks: 'clang-analyzer-*,readability-redundant-*,performance-*' +WarningsAsErrors: 'clang-analyzer-*,readability-redundant-*,performance-*' +HeaderFilterRegex: '.*' +AnalyzeTemporaryDtors: false +FormatStyle: none +User: user diff --git a/.github/install_bazel.sh b/.github/install_bazel.sh new file mode 100644 index 0000000..2b1f4e7 --- /dev/null +++ b/.github/install_bazel.sh @@ -0,0 +1,13 @@ +if ! bazel version; then + arch=$(uname -m) + if [ "$arch" == "aarch64" ]; then + arch="arm64" + fi + echo "Installing wget and downloading $arch Bazel binary from GitHub releases." + yum install -y wget + wget "https://github.com/bazelbuild/bazel/releases/download/6.3.0/bazel-6.3.0-linux-$arch" -O /usr/local/bin/bazel + chmod +x /usr/local/bin/bazel +else + # bazel is installed for the correct architecture + exit 0 +fi diff --git a/.github/libcxx-setup.sh b/.github/libcxx-setup.sh new file mode 100755 index 0000000..8773b9c --- /dev/null +++ b/.github/libcxx-setup.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash + +set -e + +# Checkout LLVM sources +git clone --depth=1 https://github.com/llvm/llvm-project.git llvm-project + +## Setup libc++ options +if [ -z "$BUILD_32_BITS" ]; then + export BUILD_32_BITS=OFF && echo disabling 32 bit build +fi + +## Build and install libc++ (Use unstable ABI for better sanitizer coverage) +mkdir llvm-build && cd llvm-build +cmake -DCMAKE_C_COMPILER=${CC} \ + -DCMAKE_CXX_COMPILER=${CXX} \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DCMAKE_INSTALL_PREFIX=/usr \ + -DLIBCXX_ABI_UNSTABLE=OFF \ + -DLLVM_USE_SANITIZER=${LIBCXX_SANITIZER} \ + -DLLVM_BUILD_32_BITS=${BUILD_32_BITS} \ + -DLLVM_ENABLE_RUNTIMES='libcxx;libcxxabi;libunwind' \ + -G "Unix Makefiles" \ + ../llvm-project/runtimes/ +make -j cxx cxxabi unwind +cd .. diff --git a/.github/workflows/bazel.yml b/.github/workflows/bazel.yml index d6bbe62..1cdc38c 100644 --- a/.github/workflows/bazel.yml +++ b/.github/workflows/bazel.yml @@ -5,29 +5,31 @@ on: pull_request: {} jobs: - build-and-test: - runs-on: ubuntu-latest - + build_and_test_default: + name: bazel.${{ matrix.os }}.${{ matrix.bzlmod && 'bzlmod' || 'no_bzlmod' }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + bzlmod: [false, true] steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v3 - name: mount bazel cache - uses: actions/cache@v1 + uses: actions/cache@v3 + env: + cache-name: bazel-cache with: - path: "/home/runner/.cache/bazel" - key: bazel - - - name: install bazelisk - run: | - curl -LO "https://github.com/bazelbuild/bazelisk/releases/download/v1.1.0/bazelisk-linux-amd64" - mkdir -p "${GITHUB_WORKSPACE}/bin/" - mv bazelisk-linux-amd64 "${GITHUB_WORKSPACE}/bin/bazel" - chmod +x "${GITHUB_WORKSPACE}/bin/bazel" + path: "~/.cache/bazel" + key: ${{ env.cache-name }}-${{ matrix.os }}-${{ github.ref }} + restore-keys: | + ${{ env.cache-name }}-${{ matrix.os }}-main - name: build run: | - "${GITHUB_WORKSPACE}/bin/bazel" build //... - + bazel build ${{ matrix.bzlmod && '--enable_bzlmod' || '--noenable_bzlmod' }} //:benchmark //:benchmark_main //test/... + - name: test run: | - "${GITHUB_WORKSPACE}/bin/bazel" test //test/... + bazel test ${{ matrix.bzlmod && '--enable_bzlmod' || '--noenable_bzlmod' }} --test_output=all //test/... diff --git a/.github/workflows/build-and-test-min-cmake.yml b/.github/workflows/build-and-test-min-cmake.yml new file mode 100644 index 0000000..e3e3217 --- /dev/null +++ b/.github/workflows/build-and-test-min-cmake.yml @@ -0,0 +1,46 @@ +name: build-and-test-min-cmake + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + job: + name: ${{ matrix.os }}.min-cmake + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest] + + steps: + - uses: actions/checkout@v3 + + - uses: lukka/get-cmake@latest + with: + cmakeVersion: 3.10.0 + + - name: create build environment + run: cmake -E make_directory ${{ runner.workspace }}/_build + + - name: setup cmake initial cache + run: touch compiler-cache.cmake + + - name: configure cmake + env: + CXX: ${{ matrix.compiler }} + shell: bash + working-directory: ${{ runner.workspace }}/_build + run: > + cmake -C ${{ github.workspace }}/compiler-cache.cmake + $GITHUB_WORKSPACE + -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON + -DCMAKE_CXX_VISIBILITY_PRESET=hidden + -DCMAKE_VISIBILITY_INLINES_HIDDEN=ON + + - name: build + shell: bash + working-directory: ${{ runner.workspace }}/_build + run: cmake --build . diff --git a/.github/workflows/build-and-test-perfcounters.yml b/.github/workflows/build-and-test-perfcounters.yml new file mode 100644 index 0000000..97e4d8e --- /dev/null +++ b/.github/workflows/build-and-test-perfcounters.yml @@ -0,0 +1,51 @@ +name: build-and-test-perfcounters + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + job: + # TODO(dominic): Extend this to include compiler and set through env: CC/CXX. + name: ${{ matrix.os }}.${{ matrix.build_type }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-22.04, ubuntu-20.04] + build_type: ['Release', 'Debug'] + steps: + - uses: actions/checkout@v3 + + - name: install libpfm + run: | + sudo apt update + sudo apt -y install libpfm4-dev + + - name: create build environment + run: cmake -E make_directory ${{ runner.workspace }}/_build + + - name: configure cmake + shell: bash + working-directory: ${{ runner.workspace }}/_build + run: > + cmake $GITHUB_WORKSPACE + -DBENCHMARK_ENABLE_LIBPFM=1 + -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} + + - name: build + shell: bash + working-directory: ${{ runner.workspace }}/_build + run: cmake --build . --config ${{ matrix.build_type }} + + # Skip testing, for now. It seems perf_event_open does not succeed on the + # hosting machine, very likely a permissions issue. + # TODO(mtrofin): Enable test. + # - name: test + # shell: bash + # working-directory: ${{ runner.workspace }}/_build + # run: ctest -C ${{ matrix.build_type }} --rerun-failed --output-on-failure + diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index f0f0626..b35200a 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -2,37 +2,113 @@ name: build-and-test on: push: - branches: [ master ] + branches: [ main ] pull_request: - branches: [ master ] + branches: [ main ] jobs: + # TODO: add 32-bit builds (g++ and clang++) for ubuntu + # (requires g++-multilib and libc6:i386) + # TODO: add coverage build (requires lcov) + # TODO: add clang + libc++ builds for ubuntu job: - # TODO(dominic): Extend this to include compiler and set through env: CC/CXX. - name: ${{ matrix.os }}.${{ matrix.build_type }} + name: ${{ matrix.os }}.${{ matrix.build_type }}.${{ matrix.lib }}.${{ matrix.compiler }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: - os: [ubuntu-latest, ubuntu-16.04, ubuntu-20.04, macos-latest, windows-latest] + os: [ubuntu-22.04, ubuntu-20.04, macos-latest] build_type: ['Release', 'Debug'] + compiler: ['g++', 'clang++'] + lib: ['shared', 'static'] + steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 + + - uses: lukka/get-cmake@latest + + - name: create build environment + run: cmake -E make_directory ${{ runner.workspace }}/_build + + - name: setup cmake initial cache + run: touch compiler-cache.cmake + + - name: configure cmake + env: + CXX: ${{ matrix.compiler }} + shell: bash + working-directory: ${{ runner.workspace }}/_build + run: > + cmake -C ${{ github.workspace }}/compiler-cache.cmake + $GITHUB_WORKSPACE + -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON + -DBUILD_SHARED_LIBS=${{ matrix.lib == 'shared' }} + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} + -DCMAKE_CXX_COMPILER=${{ env.CXX }} + -DCMAKE_CXX_VISIBILITY_PRESET=hidden + -DCMAKE_VISIBILITY_INLINES_HIDDEN=ON + + - name: build + shell: bash + working-directory: ${{ runner.workspace }}/_build + run: cmake --build . --config ${{ matrix.build_type }} + + - name: test + shell: bash + working-directory: ${{ runner.workspace }}/_build + run: ctest -C ${{ matrix.build_type }} -VV + + msvc: + name: ${{ matrix.os }}.${{ matrix.build_type }}.${{ matrix.lib }}.${{ matrix.msvc }} + runs-on: ${{ matrix.os }} + defaults: + run: + shell: powershell + strategy: + fail-fast: false + matrix: + msvc: + - VS-16-2019 + - VS-17-2022 + arch: + - x64 + build_type: + - Debug + - Release + lib: + - shared + - static + include: + - msvc: VS-16-2019 + os: windows-2019 + generator: 'Visual Studio 16 2019' + - msvc: VS-17-2022 + os: windows-2022 + generator: 'Visual Studio 17 2022' + + steps: + - uses: actions/checkout@v2 + + - uses: lukka/get-cmake@latest + + - name: configure cmake + run: > + cmake -S . -B _build/ + -A ${{ matrix.arch }} + -G "${{ matrix.generator }}" + -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON + -DBUILD_SHARED_LIBS=${{ matrix.lib == 'shared' }} + + - name: build + run: cmake --build _build/ --config ${{ matrix.build_type }} - - name: create build environment - run: cmake -E make_directory ${{ runner.workspace }}/_build + - name: setup test environment + # Make sure gmock and benchmark DLLs can be found + run: > + echo "$((Get-Item .).FullName)/_build/bin/${{ matrix.build_type }}" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append; + echo "$((Get-Item .).FullName)/_build/src/${{ matrix.build_type }}" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append; - - name: configure cmake - shell: bash - working-directory: ${{ runner.workspace }}/_build - run: cmake -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} + - name: test + run: ctest --test-dir _build/ -C ${{ matrix.build_type }} -VV - - name: build - shell: bash - working-directory: ${{ runner.workspace }}/_build - run: cmake --build . --config ${{ matrix.build_type }} - - name: test - shell: bash - working-directory: ${{ runner.workspace }}/_build - run: ctest -C ${{ matrix.build_type }} diff --git a/.github/workflows/clang-format-lint.yml b/.github/workflows/clang-format-lint.yml new file mode 100644 index 0000000..77ce1f8 --- /dev/null +++ b/.github/workflows/clang-format-lint.yml @@ -0,0 +1,17 @@ +name: clang-format-lint +on: + push: {} + pull_request: {} + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - uses: DoozyX/clang-format-lint-action@v0.13 + with: + source: './include/benchmark ./src ./test' + extensions: 'h,cc' + clangFormatVersion: 12 + style: Google diff --git a/.github/workflows/clang-tidy.yml b/.github/workflows/clang-tidy.yml new file mode 100644 index 0000000..2eaab9c --- /dev/null +++ b/.github/workflows/clang-tidy.yml @@ -0,0 +1,38 @@ +name: clang-tidy + +on: + push: {} + pull_request: {} + +jobs: + job: + name: run-clang-tidy + runs-on: ubuntu-latest + strategy: + fail-fast: false + steps: + - uses: actions/checkout@v3 + + - name: install clang-tidy + run: sudo apt update && sudo apt -y install clang-tidy + + - name: create build environment + run: cmake -E make_directory ${{ runner.workspace }}/_build + + - name: configure cmake + shell: bash + working-directory: ${{ runner.workspace }}/_build + run: > + cmake $GITHUB_WORKSPACE + -DBENCHMARK_ENABLE_ASSEMBLY_TESTS=OFF + -DBENCHMARK_ENABLE_LIBPFM=OFF + -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON + -DCMAKE_C_COMPILER=clang + -DCMAKE_CXX_COMPILER=clang++ + -DCMAKE_EXPORT_COMPILE_COMMANDS=ON + -DGTEST_COMPILE_COMMANDS=OFF + + - name: run + shell: bash + working-directory: ${{ runner.workspace }}/_build + run: run-clang-tidy diff --git a/.github/workflows/doxygen.yml b/.github/workflows/doxygen.yml new file mode 100644 index 0000000..da92c46 --- /dev/null +++ b/.github/workflows/doxygen.yml @@ -0,0 +1,28 @@ +name: doxygen + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + build-and-deploy: + name: Build HTML documentation + runs-on: ubuntu-latest + steps: + - name: Fetching sources + uses: actions/checkout@v3 + + - name: Installing build dependencies + run: | + sudo apt update + sudo apt install doxygen gcc git + + - name: Creating build directory + run: mkdir build + + - name: Building HTML documentation with Doxygen + run: | + cmake -S . -B build -DBENCHMARK_ENABLE_TESTING:BOOL=OFF -DBENCHMARK_ENABLE_DOXYGEN:BOOL=ON -DBENCHMARK_INSTALL_DOCS:BOOL=ON + cmake --build build --target benchmark_doxygen diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index c869674..c6939b5 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -2,9 +2,9 @@ name: pylint on: push: - branches: [ master ] + branches: [ main ] pull_request: - branches: [ master ] + branches: [ main ] jobs: pylint: @@ -12,15 +12,17 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python 3.8 uses: actions/setup-python@v1 with: python-version: 3.8 + - name: Install dependencies run: | python -m pip install --upgrade pip pip install pylint pylint-exit conan + - name: Run pylint run: | pylint `find . -name '*.py'|xargs` || pylint-exit $? diff --git a/.github/workflows/sanitizer.yml b/.github/workflows/sanitizer.yml new file mode 100644 index 0000000..86cccf4 --- /dev/null +++ b/.github/workflows/sanitizer.yml @@ -0,0 +1,96 @@ +name: sanitizer + +on: + push: {} + pull_request: {} + +env: + UBSAN_OPTIONS: "print_stacktrace=1" + +jobs: + job: + name: ${{ matrix.sanitizer }}.${{ matrix.build_type }} + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + build_type: ['Debug', 'RelWithDebInfo'] + sanitizer: ['asan', 'ubsan', 'tsan', 'msan'] + + steps: + - uses: actions/checkout@v3 + + - name: configure msan env + if: matrix.sanitizer == 'msan' + run: | + echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=memory -fsanitize-memory-track-origins" >> $GITHUB_ENV + echo "LIBCXX_SANITIZER=MemoryWithOrigins" >> $GITHUB_ENV + + - name: configure ubsan env + if: matrix.sanitizer == 'ubsan' + run: | + echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=undefined -fno-sanitize-recover=all" >> $GITHUB_ENV + echo "LIBCXX_SANITIZER=Undefined" >> $GITHUB_ENV + + - name: configure asan env + if: matrix.sanitizer == 'asan' + run: | + echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=address -fno-sanitize-recover=all" >> $GITHUB_ENV + echo "LIBCXX_SANITIZER=Address" >> $GITHUB_ENV + + - name: configure tsan env + if: matrix.sanitizer == 'tsan' + run: | + echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=thread -fno-sanitize-recover=all" >> $GITHUB_ENV + echo "LIBCXX_SANITIZER=Thread" >> $GITHUB_ENV + + - name: fine-tune asan options + # in asan we get an error from std::regex. ignore it. + if: matrix.sanitizer == 'asan' + run: | + echo "ASAN_OPTIONS=alloc_dealloc_mismatch=0" >> $GITHUB_ENV + + - name: setup clang + uses: egor-tensin/setup-clang@v1 + with: + version: latest + platform: x64 + + - name: configure clang + run: | + echo "CC=cc" >> $GITHUB_ENV + echo "CXX=c++" >> $GITHUB_ENV + + - name: build libc++ (non-asan) + if: matrix.sanitizer != 'asan' + run: | + "${GITHUB_WORKSPACE}/.github/libcxx-setup.sh" + echo "EXTRA_CXX_FLAGS=-stdlib=libc++ -L ${GITHUB_WORKSPACE}/llvm-build/lib -lc++abi -Isystem${GITHUB_WORKSPACE}/llvm-build/include -Isystem${GITHUB_WORKSPACE}/llvm-build/include/c++/v1 -Wl,-rpath,${GITHUB_WORKSPACE}/llvm-build/lib" >> $GITHUB_ENV + + - name: create build environment + run: cmake -E make_directory ${{ runner.workspace }}/_build + + - name: configure cmake + shell: bash + working-directory: ${{ runner.workspace }}/_build + run: > + VERBOSE=1 + cmake $GITHUB_WORKSPACE + -DBENCHMARK_ENABLE_ASSEMBLY_TESTS=OFF + -DBENCHMARK_ENABLE_LIBPFM=OFF + -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON + -DCMAKE_C_COMPILER=${{ env.CC }} + -DCMAKE_CXX_COMPILER=${{ env.CXX }} + -DCMAKE_C_FLAGS="${{ env.EXTRA_FLAGS }}" + -DCMAKE_CXX_FLAGS="${{ env.EXTRA_FLAGS }} ${{ env.EXTRA_CXX_FLAGS }}" + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} + + - name: build + shell: bash + working-directory: ${{ runner.workspace }}/_build + run: cmake --build . --config ${{ matrix.build_type }} + + - name: test + shell: bash + working-directory: ${{ runner.workspace }}/_build + run: ctest -C ${{ matrix.build_type }} -VV diff --git a/.github/workflows/test_bindings.yml b/.github/workflows/test_bindings.yml index 273d7f9..e01bb7b 100644 --- a/.github/workflows/test_bindings.yml +++ b/.github/workflows/test_bindings.yml @@ -2,23 +2,28 @@ name: test-bindings on: push: - branches: [master] + branches: [main] pull_request: - branches: [master] + branches: [main] jobs: python_bindings: - runs-on: ubuntu-latest + name: Test GBM Python bindings on ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ ubuntu-latest, macos-latest, windows-2019 ] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python - uses: actions/setup-python@v1 + uses: actions/setup-python@v4 with: - python-version: 3.8 - - name: Install benchmark + python-version: 3.11 + - name: Install GBM Python bindings on ${{ matrix.os}} run: - python setup.py install - - name: Run example bindings + python -m pip install wheel . + - name: Run bindings example on ${{ matrix.os }} run: python bindings/python/google_benchmark/example.py diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml new file mode 100644 index 0000000..1f73bff --- /dev/null +++ b/.github/workflows/wheels.yml @@ -0,0 +1,79 @@ +name: Build and upload Python wheels + +on: + workflow_dispatch: + release: + types: + - published + +jobs: + build_sdist: + name: Build source distribution + runs-on: ubuntu-latest + steps: + - name: Check out repo + uses: actions/checkout@v3 + + - name: Install Python 3.11 + uses: actions/setup-python@v4 + with: + python-version: 3.11 + + - name: Build and check sdist + run: | + python setup.py sdist + - name: Upload sdist + uses: actions/upload-artifact@v3 + with: + name: dist + path: dist/*.tar.gz + + build_wheels: + name: Build Google Benchmark wheels on ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, macos-latest, windows-2019] + + steps: + - name: Check out Google Benchmark + uses: actions/checkout@v3 + + - name: Set up QEMU + if: runner.os == 'Linux' + uses: docker/setup-qemu-action@v2 + with: + platforms: all + + - name: Build wheels on ${{ matrix.os }} using cibuildwheel + uses: pypa/cibuildwheel@v2.14.1 + env: + CIBW_BUILD: 'cp38-* cp39-* cp310-* cp311-*' + CIBW_SKIP: "*-musllinux_*" + CIBW_TEST_SKIP: "*-macosx_arm64" + CIBW_ARCHS_LINUX: x86_64 aarch64 + CIBW_ARCHS_MACOS: x86_64 arm64 + CIBW_ARCHS_WINDOWS: AMD64 + CIBW_BEFORE_ALL_LINUX: bash .github/install_bazel.sh + CIBW_TEST_COMMAND: python {project}/bindings/python/google_benchmark/example.py + + - name: Upload Google Benchmark ${{ matrix.os }} wheels + uses: actions/upload-artifact@v3 + with: + name: dist + path: ./wheelhouse/*.whl + + pypi_upload: + name: Publish google-benchmark wheels to PyPI + needs: [build_sdist, build_wheels] + runs-on: ubuntu-latest + steps: + - uses: actions/download-artifact@v3 + with: + name: dist + path: dist + + - uses: pypa/gh-action-pypi-publish@v1.6.4 + with: + user: __token__ + password: ${{ secrets.PYPI_PASSWORD }} @@ -11,6 +11,7 @@ *.swp *.pyc __pycache__ +.DS_Store # lcov *.lcov diff --git a/.travis-libcxx-setup.sh b/.travis-libcxx-setup.sh deleted file mode 100644 index a591743..0000000 --- a/.travis-libcxx-setup.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -# Install a newer CMake version -curl -sSL https://cmake.org/files/v3.6/cmake-3.6.1-Linux-x86_64.sh -o install-cmake.sh -chmod +x install-cmake.sh -sudo ./install-cmake.sh --prefix=/usr/local --skip-license - -# Checkout LLVM sources -git clone --depth=1 https://github.com/llvm-mirror/llvm.git llvm-source -git clone --depth=1 https://github.com/llvm-mirror/libcxx.git llvm-source/projects/libcxx -git clone --depth=1 https://github.com/llvm-mirror/libcxxabi.git llvm-source/projects/libcxxabi - -# Setup libc++ options -if [ -z "$BUILD_32_BITS" ]; then - export BUILD_32_BITS=OFF && echo disabling 32 bit build -fi - -# Build and install libc++ (Use unstable ABI for better sanitizer coverage) -mkdir llvm-build && cd llvm-build -cmake -DCMAKE_C_COMPILER=${C_COMPILER} -DCMAKE_CXX_COMPILER=${COMPILER} \ - -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_INSTALL_PREFIX=/usr \ - -DLIBCXX_ABI_UNSTABLE=ON \ - -DLLVM_USE_SANITIZER=${LIBCXX_SANITIZER} \ - -DLLVM_BUILD_32_BITS=${BUILD_32_BITS} \ - ../llvm-source -make cxx -j2 -sudo make install-cxxabi install-cxx -cd ../ diff --git a/.travis.yml b/.travis.yml index 36e343d..8cfed3d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,10 +11,6 @@ matrix: - lcov env: COMPILER=g++ C_COMPILER=gcc BUILD_TYPE=Coverage - compiler: gcc - env: COMPILER=g++ C_COMPILER=gcc BUILD_TYPE=Debug - - compiler: gcc - env: COMPILER=g++ C_COMPILER=gcc BUILD_TYPE=Release - - compiler: gcc addons: apt: packages: @@ -44,10 +40,6 @@ matrix: - COMPILER=g++-6 C_COMPILER=gcc-6 BUILD_TYPE=Debug - ENABLE_SANITIZER=1 - EXTRA_FLAGS="-fno-omit-frame-pointer -g -O2 -fsanitize=undefined,address -fuse-ld=gold" - - compiler: clang - env: COMPILER=clang++ C_COMPILER=clang BUILD_TYPE=Debug - - compiler: clang - env: COMPILER=clang++ C_COMPILER=clang BUILD_TYPE=Release # Clang w/ libc++ - compiler: clang dist: xenial @@ -150,29 +142,14 @@ matrix: osx_image: xcode8.3 compiler: clang env: - - COMPILER=clang++ BUILD_TYPE=Debug - - os: osx - osx_image: xcode8.3 - compiler: clang - env: - - COMPILER=clang++ BUILD_TYPE=Release - - os: osx - osx_image: xcode8.3 - compiler: clang - env: - COMPILER=clang++ - BUILD_TYPE=Release - BUILD_32_BITS=ON - EXTRA_FLAGS="-m32" - - os: osx - osx_image: xcode9.4 - compiler: gcc - env: - - COMPILER=g++-7 C_COMPILER=gcc-7 BUILD_TYPE=Debug before_script: - if [ -n "${LIBCXX_BUILD}" ]; then - source .travis-libcxx-setup.sh; + source .libcxx-setup.sh; fi - if [ -n "${ENABLE_SANITIZER}" ]; then export EXTRA_OPTIONS="-DBENCHMARK_ENABLE_ASSEMBLY_TESTS=OFF"; @@ -13,6 +13,7 @@ Alex Steele <steeleal123@gmail.com> Andriy Berestovskyy <berestovskyy@gmail.com> Arne Beer <arne@twobeer.de> Carto +Cezary Skrzyński <czars1988@gmail.com> Christian Wassermann <christian_wassermann@web.de> Christopher Seymour <chris.j.seymour@hotmail.com> Colin Braley <braley.colin@gmail.com> @@ -21,14 +22,18 @@ David Coeurjolly <david.coeurjolly@liris.cnrs.fr> Deniz Evrenci <denizevrenci@gmail.com> Dirac Research Dominik Czarnota <dominik.b.czarnota@gmail.com> +Dominik Korman <kormandominik@gmail.com> +Donald Aingworth <donalds_junk_mail@yahoo.com> Eric Backus <eric_backus@alum.mit.edu> Eric Fiselier <eric@efcs.ca> Eugene Zhuk <eugene.zhuk@gmail.com> Evgeny Safronov <division494@gmail.com> +Fabien Pichot <pichot.fabien@gmail.com> Federico Ficarelli <federico.ficarelli@gmail.com> Felix Homann <linuxaudio@showlabor.de> Gergő Szitár <szitar.gergo@gmail.com> Google Inc. +Henrique Bucher <hbucher@gmail.com> International Business Machines Corporation Ismael Jimenez Martinez <ismael.jimenez.martinez@gmail.com> Jern-Kuan Leong <jernkuan@gmail.com> @@ -39,20 +44,28 @@ Jussi Knuuttila <jussi.knuuttila@gmail.com> Kaito Udagawa <umireon@gmail.com> Kishan Kumar <kumar.kishan@outlook.com> Lei Xu <eddyxu@gmail.com> +Marcel Jacobse <mjacobse@uni-bremen.de> Matt Clarkson <mattyclarkson@gmail.com> Maxim Vafin <maxvafin@gmail.com> +Mike Apodaca <gatorfax@gmail.com> +Min-Yih Hsu <yihshyng223@gmail.com> MongoDB Inc. Nick Hutchinson <nshutchinson@gmail.com> +Norman Heino <norman.heino@gmail.com> Oleksandr Sochka <sasha.sochka@gmail.com> Ori Livneh <ori.livneh@gmail.com> Paul Redmond <paul.redmond@gmail.com> Radoslav Yovchev <radoslav.tm@gmail.com> +Raghu Raja <raghu@enfabrica.net> +Rainer Orth <ro@cebitec.uni-bielefeld.de> Roman Lebedev <lebedev.ri@gmail.com> Sayan Bhattacharjee <aero.sayan@gmail.com> +Shapr3D <google-contributors@shapr3d.com> Shuo Chen <chenshuo@chenshuo.com> +Staffan Tjernstrom <staffantj@gmail.com> Steinar H. Gunderson <sgunderson@bigfoot.com> Stripe, Inc. +Tobias Schmidt <tobias.schmidt@in.tum.de> Yixuan Qiu <yixuanq@gmail.com> Yusuke Suzuki <utatane.tea@gmail.com> Zbigniew Skowron <zbychs@gmail.com> -Min-Yih Hsu <yihshyng223@gmail.com> diff --git a/BUILD.bazel b/BUILD.bazel index eb35b62..60d31d2 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -1,15 +1,37 @@ -load("@rules_cc//cc:defs.bzl", "cc_library") - licenses(["notice"]) config_setting( + name = "qnx", + constraint_values = ["@platforms//os:qnx"], + values = { + "cpu": "x64_qnx", + }, + visibility = [":__subpackages__"], +) + +config_setting( name = "windows", + constraint_values = ["@platforms//os:windows"], values = { "cpu": "x64_windows", }, visibility = [":__subpackages__"], ) +config_setting( + name = "macos", + constraint_values = ["@platforms//os:macos"], + visibility = ["//visibility:public"], +) + +config_setting( + name = "perfcounters", + define_values = { + "pfm": "1", + }, + visibility = [":__subpackages__"], +) + cc_library( name = "benchmark", srcs = glob( @@ -19,19 +41,40 @@ cc_library( ], exclude = ["src/benchmark_main.cc"], ), - hdrs = ["include/benchmark/benchmark.h"], + hdrs = [ + "include/benchmark/benchmark.h", + "include/benchmark/export.h", + ], linkopts = select({ ":windows": ["-DEFAULTLIB:shlwapi.lib"], "//conditions:default": ["-pthread"], }), + copts = select({ + ":windows": [], + "//conditions:default": ["-Werror=old-style-cast"], + }), strip_include_prefix = "include", visibility = ["//visibility:public"], + # Only static linking is allowed; no .so will be produced. + # Using `defines` (i.e. not `local_defines`) means that no + # dependent rules need to bother about defining the macro. + linkstatic = True, + defines = [ + "BENCHMARK_STATIC_DEFINE", + ] + select({ + ":perfcounters": ["HAVE_LIBPFM"], + "//conditions:default": [], + }), + deps = select({ + ":perfcounters": ["@libpfm//:libpfm"], + "//conditions:default": [], + }), ) cc_library( name = "benchmark_main", srcs = ["src/benchmark_main.cc"], - hdrs = ["include/benchmark/benchmark.h"], + hdrs = ["include/benchmark/benchmark.h", "include/benchmark/export.h"], strip_include_prefix = "include", visibility = ["//visibility:public"], deps = [":benchmark"], diff --git a/CMakeLists.txt b/CMakeLists.txt index 1007254..ffd7dee 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,30 +1,34 @@ -cmake_minimum_required (VERSION 3.5.1) - -foreach(p - CMP0048 # OK to clear PROJECT_VERSION on project() - CMP0054 # CMake 3.1 - CMP0056 # export EXE_LINKER_FLAGS to try_run - CMP0057 # Support no if() IN_LIST operator - CMP0063 # Honor visibility properties for all targets - CMP0077 # Allow option() overrides in importing projects - ) - if(POLICY ${p}) - cmake_policy(SET ${p} NEW) - endif() -endforeach() +# Require CMake 3.10. If available, use the policies up to CMake 3.22. +cmake_minimum_required (VERSION 3.10...3.22) -project (benchmark CXX) +project (benchmark VERSION 1.8.3 LANGUAGES CXX) option(BENCHMARK_ENABLE_TESTING "Enable testing of the benchmark library." ON) option(BENCHMARK_ENABLE_EXCEPTIONS "Enable the use of exceptions in the benchmark library." ON) option(BENCHMARK_ENABLE_LTO "Enable link time optimisation of the benchmark library." OFF) option(BENCHMARK_USE_LIBCXX "Build and test using libc++ as the standard library." OFF) +option(BENCHMARK_ENABLE_WERROR "Build Release candidates with -Werror." ON) +option(BENCHMARK_FORCE_WERROR "Build Release candidates with -Werror regardless of compiler issues." OFF) + +if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "PGI") + # PGC++ maybe reporting false positives. + set(BENCHMARK_ENABLE_WERROR OFF) +endif() +if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "NVHPC") + set(BENCHMARK_ENABLE_WERROR OFF) +endif() +if(BENCHMARK_FORCE_WERROR) + set(BENCHMARK_ENABLE_WERROR ON) +endif(BENCHMARK_FORCE_WERROR) + if(NOT MSVC) option(BENCHMARK_BUILD_32_BITS "Build a 32 bit version of the library." OFF) else() set(BENCHMARK_BUILD_32_BITS OFF CACHE BOOL "Build a 32 bit version of the library - unsupported when using MSVC)" FORCE) endif() option(BENCHMARK_ENABLE_INSTALL "Enable installation of benchmark. (Projects embedding benchmark may want to turn this OFF.)" ON) +option(BENCHMARK_ENABLE_DOXYGEN "Build documentation with Doxygen." OFF) +option(BENCHMARK_INSTALL_DOCS "Enable installation of documentation." ON) # Allow unmet dependencies to be met using CMake's ExternalProject mechanics, which # may require downloading the source code. @@ -33,8 +37,25 @@ option(BENCHMARK_DOWNLOAD_DEPENDENCIES "Allow the downloading and in-tree buildi # This option can be used to disable building and running unit tests which depend on gtest # in cases where it is not possible to build or find a valid version of gtest. option(BENCHMARK_ENABLE_GTEST_TESTS "Enable building the unit tests which depend on gtest" ON) +option(BENCHMARK_USE_BUNDLED_GTEST "Use bundled GoogleTest. If disabled, the find_package(GTest) will be used." ON) + +option(BENCHMARK_ENABLE_LIBPFM "Enable performance counters provided by libpfm" OFF) + +# Export only public symbols +set(CMAKE_CXX_VISIBILITY_PRESET hidden) +set(CMAKE_VISIBILITY_INLINES_HIDDEN ON) + +if(MSVC) + # As of CMake 3.18, CMAKE_SYSTEM_PROCESSOR is not set properly for MSVC and + # cross-compilation (e.g. Host=x86_64, target=aarch64) requires using the + # undocumented, but working variable. + # See https://gitlab.kitware.com/cmake/cmake/-/issues/15170 + set(CMAKE_SYSTEM_PROCESSOR ${MSVC_CXX_ARCHITECTURE_ID}) + if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "ARM") + set(CMAKE_CROSSCOMPILING TRUE) + endif() +endif() -set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) set(ENABLE_ASSEMBLY_TESTS_DEFAULT OFF) function(should_enable_assembly_tests) if(CMAKE_BUILD_TYPE) @@ -81,24 +102,43 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") include(GetGitVersion) get_git_version(GIT_VERSION) +# If no git version can be determined, use the version +# from the project() command +if ("${GIT_VERSION}" STREQUAL "0.0.0") + set(VERSION "${benchmark_VERSION}") +else() + set(VERSION "${GIT_VERSION}") +endif() # Tell the user what versions we are using -string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" VERSION ${GIT_VERSION}) -message(STATUS "Version: ${VERSION}") +message(STATUS "Google Benchmark version: ${VERSION}") # The version of the libraries set(GENERIC_LIB_VERSION ${VERSION}) string(SUBSTRING ${VERSION} 0 1 GENERIC_LIB_SOVERSION) # Import our CMake modules -include(CheckCXXCompilerFlag) include(AddCXXCompilerFlag) +include(CheckCXXCompilerFlag) +include(CheckLibraryExists) include(CXXFeatureCheck) +check_library_exists(rt shm_open "" HAVE_LIB_RT) + if (BENCHMARK_BUILD_32_BITS) add_required_cxx_compiler_flag(-m32) endif() if (MSVC) + set(BENCHMARK_CXX_STANDARD 14) +else() + set(BENCHMARK_CXX_STANDARD 11) +endif() + +set(CMAKE_CXX_STANDARD ${BENCHMARK_CXX_STANDARD}) +set(CMAKE_CXX_STANDARD_REQUIRED YES) +set(CMAKE_CXX_EXTENSIONS OFF) + +if (MSVC) # Turn compiler warnings up to 11 string(REGEX REPLACE "[-/]W[1-4]" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4") @@ -130,44 +170,43 @@ if (MSVC) set(CMAKE_EXE_LINKER_FLAGS_MINSIZEREL "${CMAKE_EXE_LINKER_FLAGS_MINSIZEREL} /LTCG") endif() else() - # Try and enable C++11. Don't use C++14 because it doesn't work in some - # configurations. - add_cxx_compiler_flag(-std=c++11) - if (NOT HAVE_CXX_FLAG_STD_CXX11) - add_cxx_compiler_flag(-std=c++0x) - endif() - # Turn compiler warnings up to 11 add_cxx_compiler_flag(-Wall) add_cxx_compiler_flag(-Wextra) add_cxx_compiler_flag(-Wshadow) - add_cxx_compiler_flag(-Werror RELEASE) - add_cxx_compiler_flag(-Werror RELWITHDEBINFO) - add_cxx_compiler_flag(-Werror MINSIZEREL) - # Disabled until googletest (gmock) stops emitting variadic macro warnings - #add_cxx_compiler_flag(-pedantic) - #add_cxx_compiler_flag(-pedantic-errors) + add_cxx_compiler_flag(-Wfloat-equal) + add_cxx_compiler_flag(-Wold-style-cast) + if(BENCHMARK_ENABLE_WERROR) + add_cxx_compiler_flag(-Werror) + endif() + if (NOT BENCHMARK_ENABLE_TESTING) + # Disable warning when compiling tests as gtest does not use 'override'. + add_cxx_compiler_flag(-Wsuggest-override) + endif() + add_cxx_compiler_flag(-pedantic) + add_cxx_compiler_flag(-pedantic-errors) add_cxx_compiler_flag(-Wshorten-64-to-32) add_cxx_compiler_flag(-fstrict-aliasing) # Disable warnings regarding deprecated parts of the library while building # and testing those parts of the library. add_cxx_compiler_flag(-Wno-deprecated-declarations) - if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel") + if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel" OR CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM") # Intel silently ignores '-Wno-deprecated-declarations', # warning no. 1786 must be explicitly disabled. # See #631 for rationale. add_cxx_compiler_flag(-wd1786) + add_cxx_compiler_flag(-fno-finite-math-only) endif() # Disable deprecation warnings for release builds (when -Werror is enabled). - add_cxx_compiler_flag(-Wno-deprecated RELEASE) - add_cxx_compiler_flag(-Wno-deprecated RELWITHDEBINFO) - add_cxx_compiler_flag(-Wno-deprecated MINSIZEREL) + if(BENCHMARK_ENABLE_WERROR) + add_cxx_compiler_flag(-Wno-deprecated) + endif() if (NOT BENCHMARK_ENABLE_EXCEPTIONS) add_cxx_compiler_flag(-fno-exceptions) endif() if (HAVE_CXX_FLAG_FSTRICT_ALIASING) - if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "Intel") #ICC17u2: Many false positives for Wstrict-aliasing + if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "Intel" AND NOT CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM") #ICC17u2: Many false positives for Wstrict-aliasing add_cxx_compiler_flag(-Wstrict-aliasing) endif() endif() @@ -176,12 +215,12 @@ else() add_cxx_compiler_flag(-wd654) add_cxx_compiler_flag(-Wthread-safety) if (HAVE_CXX_FLAG_WTHREAD_SAFETY) - cxx_feature_check(THREAD_SAFETY_ATTRIBUTES) + cxx_feature_check(THREAD_SAFETY_ATTRIBUTES "-DINCLUDE_DIRECTORIES=${PROJECT_SOURCE_DIR}/include") endif() # On most UNIX like platforms g++ and clang++ define _GNU_SOURCE as a # predefined macro, which turns on all of the wonderful libc extensions. - # However g++ doesn't do this in Cygwin so we have to define it ourselfs + # However g++ doesn't do this in Cygwin so we have to define it ourselves # since we depend on GNU/POSIX/BSD extensions. if (CYGWIN) add_definitions(-D_GNU_SOURCE=1) @@ -232,7 +271,8 @@ if (BENCHMARK_USE_LIBCXX) if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") add_cxx_compiler_flag(-stdlib=libc++) elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR - "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel") + "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel" OR + "${CMAKE_CXX_COMPILER_ID}" STREQUAL "IntelLLVM") add_cxx_compiler_flag(-nostdinc++) message(WARNING "libc++ header path must be manually specified using CMAKE_CXX_FLAGS") # Adding -nodefaultlibs directly to CMAKE_<TYPE>_LINKER_FLAGS will break @@ -269,6 +309,11 @@ cxx_feature_check(STEADY_CLOCK) # Ensure we have pthreads set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) +cxx_feature_check(PTHREAD_AFFINITY) + +if (BENCHMARK_ENABLE_LIBPFM) + find_package(PFM) +endif() # Set up directories include_directories(${PROJECT_SOURCE_DIR}/include) @@ -281,7 +326,15 @@ if (BENCHMARK_ENABLE_TESTING) if (BENCHMARK_ENABLE_GTEST_TESTS AND NOT (TARGET gtest AND TARGET gtest_main AND TARGET gmock AND TARGET gmock_main)) - include(GoogleTest) + if (BENCHMARK_USE_BUNDLED_GTEST) + include(GoogleTest) + else() + find_package(GTest CONFIG REQUIRED) + add_library(gtest ALIAS GTest::gtest) + add_library(gtest_main ALIAS GTest::gtest_main) + add_library(gmock ALIAS GTest::gmock) + add_library(gmock_main ALIAS GTest::gmock_main) + endif() endif() add_subdirectory(test) endif() diff --git a/CONTRIBUTORS b/CONTRIBUTORS index b5e1aa4..95bcad0 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -27,7 +27,9 @@ Albert Pretorius <pretoalb@gmail.com> Alex Steele <steelal123@gmail.com> Andriy Berestovskyy <berestovskyy@gmail.com> Arne Beer <arne@twobeer.de> +Bátor Tallér <bator.taller@shapr3d.com> Billy Robert O'Neal III <billy.oneal@gmail.com> <bion@microsoft.com> +Cezary Skrzyński <czars1988@gmail.com> Chris Kennelly <ckennelly@google.com> <ckennelly@ckennelly.com> Christian Wassermann <christian_wassermann@web.de> Christopher Seymour <chris.j.seymour@hotmail.com> @@ -38,16 +40,20 @@ David Coeurjolly <david.coeurjolly@liris.cnrs.fr> Deniz Evrenci <denizevrenci@gmail.com> Dominic Hamon <dma@stripysock.com> <dominic@google.com> Dominik Czarnota <dominik.b.czarnota@gmail.com> +Dominik Korman <kormandominik@gmail.com> +Donald Aingworth <donalds_junk_mail@yahoo.com> Eric Backus <eric_backus@alum.mit.edu> Eric Fiselier <eric@efcs.ca> Eugene Zhuk <eugene.zhuk@gmail.com> Evgeny Safronov <division494@gmail.com> +Fabien Pichot <pichot.fabien@gmail.com> Fanbo Meng <fanbo.meng@ibm.com> Federico Ficarelli <federico.ficarelli@gmail.com> Felix Homann <linuxaudio@showlabor.de> Geoffrey Martin-Noble <gcmn@google.com> <gmngeoffrey@gmail.com> Gergő Szitár <szitar.gergo@gmail.com> Hannes Hauswedell <h2@fsfe.org> +Henrique Bucher <hbucher@gmail.com> Ismael Jimenez Martinez <ismael.jimenez.martinez@gmail.com> Jern-Kuan Leong <jernkuan@gmail.com> JianXiong Zhou <zhoujianxiong2@gmail.com> @@ -55,19 +61,25 @@ Joao Paulo Magalhaes <joaoppmagalhaes@gmail.com> John Millikin <jmillikin@stripe.com> Jordan Williams <jwillikers@protonmail.com> Jussi Knuuttila <jussi.knuuttila@gmail.com> -Kai Wolf <kai.wolf@gmail.com> Kaito Udagawa <umireon@gmail.com> +Kai Wolf <kai.wolf@gmail.com> Kishan Kumar <kumar.kishan@outlook.com> Lei Xu <eddyxu@gmail.com> +Marcel Jacobse <mjacobse@uni-bremen.de> Matt Clarkson <mattyclarkson@gmail.com> Maxim Vafin <maxvafin@gmail.com> +Mike Apodaca <gatorfax@gmail.com> +Min-Yih Hsu <yihshyng223@gmail.com> Nick Hutchinson <nshutchinson@gmail.com> +Norman Heino <norman.heino@gmail.com> Oleksandr Sochka <sasha.sochka@gmail.com> Ori Livneh <ori.livneh@gmail.com> Pascal Leroy <phl@google.com> Paul Redmond <paul.redmond@gmail.com> Pierre Phaneuf <pphaneuf@google.com> Radoslav Yovchev <radoslav.tm@gmail.com> +Raghu Raja <raghu@enfabrica.net> +Rainer Orth <ro@cebitec.uni-bielefeld.de> Raul Marin <rmrodriguez@cartodb.com> Ray Glover <ray.glover@uk.ibm.com> Robert Guo <robert.guo@mongodb.com> @@ -75,9 +87,9 @@ Roman Lebedev <lebedev.ri@gmail.com> Sayan Bhattacharjee <aero.sayan@gmail.com> Shuo Chen <chenshuo@chenshuo.com> Steven Wan <wan.yu@ibm.com> +Tobias Schmidt <tobias.schmidt@in.tum.de> Tobias Ulvgård <tobias.ulvgard@dirac.se> Tom Madams <tom.ej.madams@gmail.com> <tmadams@google.com> Yixuan Qiu <yixuanq@gmail.com> Yusuke Suzuki <utatane.tea@gmail.com> Zbigniew Skowron <zbychs@gmail.com> -Min-Yih Hsu <yihshyng223@gmail.com> @@ -1,3 +1,7 @@ +# This project was upgraded with external_updater. +# Usage: tools/external_updater/updater.sh update google-benchmark +# For more info, check https://cs.android.com/android/platform/superproject/+/main:tools/external_updater/README.md + name: "google-benchmark" description: "A library to support the benchmarking of functions, similar to unit-tests." third_party { @@ -9,11 +13,11 @@ third_party { type: GIT value: "https://github.com/google/benchmark.git" } - version: "ea5a5bbff491fd625c6e3458f6edd680b8bd5452" + version: "v1.8.3" license_type: NOTICE last_upgrade_date { - year: 2021 - month: 2 - day: 12 + year: 2023 + month: 9 + day: 22 } } diff --git a/MODULE.bazel b/MODULE.bazel new file mode 100644 index 0000000..37a5f5d --- /dev/null +++ b/MODULE.bazel @@ -0,0 +1,24 @@ +module(name = "google_benchmark", version="1.8.3") + +bazel_dep(name = "bazel_skylib", version = "1.4.1") +bazel_dep(name = "platforms", version = "0.0.6") +bazel_dep(name = "rules_foreign_cc", version = "0.9.0") +bazel_dep(name = "rules_cc", version = "0.0.6") +bazel_dep(name = "rules_python", version = "0.24.0", dev_dependency = True) +bazel_dep(name = "googletest", version = "1.12.1", repo_name = "com_google_googletest", dev_dependency = True) +bazel_dep(name = "libpfm", version = "4.11.0") + +# Register a toolchain for Python 3.9 to be able to build numpy. Python +# versions >=3.10 are problematic. +# A second reason for this is to be able to build Python hermetically instead +# of relying on the changing default version from rules_python. + +python = use_extension("@rules_python//python/extensions:python.bzl", "python", dev_dependency = True) +python.toolchain(python_version = "3.9") + +pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip", dev_dependency = True) +pip.parse( + hub_name="tools_pip_deps", + python_version = "3.9", + requirements_lock="//tools:requirements.txt") +use_repo(pip, "tools_pip_deps") @@ -1,13 +1,12 @@ # Benchmark [![build-and-test](https://github.com/google/benchmark/workflows/build-and-test/badge.svg)](https://github.com/google/benchmark/actions?query=workflow%3Abuild-and-test) +[![bazel](https://github.com/google/benchmark/actions/workflows/bazel.yml/badge.svg)](https://github.com/google/benchmark/actions/workflows/bazel.yml) [![pylint](https://github.com/google/benchmark/workflows/pylint/badge.svg)](https://github.com/google/benchmark/actions?query=workflow%3Apylint) [![test-bindings](https://github.com/google/benchmark/workflows/test-bindings/badge.svg)](https://github.com/google/benchmark/actions?query=workflow%3Atest-bindings) - -[![Build Status](https://travis-ci.org/google/benchmark.svg?branch=master)](https://travis-ci.org/google/benchmark) -[![Build status](https://ci.appveyor.com/api/projects/status/u0qsyp7t1tk7cpxs/branch/master?svg=true)](https://ci.appveyor.com/project/google/benchmark/branch/master) [![Coverage Status](https://coveralls.io/repos/google/benchmark/badge.svg)](https://coveralls.io/r/google/benchmark) +[![Discord](https://discordapp.com/api/guilds/1125694995928719494/widget.png?style=shield)](https://discord.gg/cz7UX7wKC2) A library to benchmark code snippets, similar to unit tests. Example: @@ -27,23 +26,28 @@ BENCHMARK(BM_SomeFunction); BENCHMARK_MAIN(); ``` +## Getting Started + To get started, see [Requirements](#requirements) and [Installation](#installation). See [Usage](#usage) for a full example and the -[User Guide](#user-guide) for a more comprehensive feature overview. +[User Guide](docs/user_guide.md) for a more comprehensive feature overview. -It may also help to read the [Google Test documentation](https://github.com/google/googletest/blob/master/googletest/docs/primer.md) +It may also help to read the [Google Test documentation](https://github.com/google/googletest/blob/main/docs/primer.md) as some of the structural aspects of the APIs are similar. -### Resources +## Resources [Discussion group](https://groups.google.com/d/forum/benchmark-discuss) -IRC channel: [freenode](https://freenode.net) #googlebenchmark +IRC channels: +* [libera](https://libera.chat) #benchmark [Additional Tooling Documentation](docs/tools.md) [Assembly Testing Documentation](docs/AssemblyTests.md) +[Building and installing Python bindings](docs/python_bindings.md) + ## Requirements The library can be used with C++03. However, it requires C++11 to build, @@ -56,27 +60,25 @@ The following minimum versions are required to build the library: * Visual Studio 14 2015 * Intel 2015 Update 1 -See [Platform-Specific Build Instructions](#platform-specific-build-instructions). +See [Platform-Specific Build Instructions](docs/platform_specific_build_instructions.md). ## Installation This describes the installation process using cmake. As pre-requisites, you'll need git and cmake installed. -_See [dependencies.md](dependencies.md) for more details regarding supported +_See [dependencies.md](docs/dependencies.md) for more details regarding supported versions of build tools._ ```bash # Check out the library. $ git clone https://github.com/google/benchmark.git -# Benchmark requires Google Test as a dependency. Add the source tree as a subdirectory. -$ git clone https://github.com/google/googletest.git benchmark/googletest # Go to the library root directory $ cd benchmark # Make a build directory to place the build output. $ cmake -E make_directory "build" -# Generate build system files with cmake. -$ cmake -E chdir "build" cmake -DCMAKE_BUILD_TYPE=Release ../ +# Generate build system files with cmake, and download any dependencies. +$ cmake -E chdir "build" cmake -DBENCHMARK_DOWNLOAD_DEPENDENCIES=on -DCMAKE_BUILD_TYPE=Release ../ # or, starting with CMake 3.13, use a simpler form: # cmake -DCMAKE_BUILD_TYPE=Release -S . -B "build" # Build the library. @@ -110,10 +112,10 @@ sudo cmake --build "build" --config Release --target install Note that Google Benchmark requires Google Test to build and run the tests. This dependency can be provided two ways: -* Checkout the Google Test sources into `benchmark/googletest` as above. +* Checkout the Google Test sources into `benchmark/googletest`. * Otherwise, if `-DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON` is specified during - configuration, the library will automatically download and build any required - dependencies. + configuration as above, the library will automatically download and build + any required dependencies. If you do not wish to build and run the tests, add `-DBENCHMARK_ENABLE_GTEST_TESTS=OFF` to `CMAKE_ARGS`. @@ -136,6 +138,12 @@ cache variables, if autodetection fails. If you are using clang, you may need to set `LLVMAR_EXECUTABLE`, `LLVMNM_EXECUTABLE` and `LLVMRANLIB_EXECUTABLE` cmake cache variables. +To enable sanitizer checks (eg., `asan` and `tsan`), add: +``` + -DCMAKE_C_FLAGS="-g -O2 -fno-omit-frame-pointer -fsanitize=address -fsanitize=thread -fno-sanitize-recover=all" + -DCMAKE_CXX_FLAGS="-g -O2 -fno-omit-frame-pointer -fsanitize=address -fsanitize=thread -fno-sanitize-recover=all " +``` + ### Stable and Experimental Library Versions The main branch contains the latest stable version of the benchmarking library; @@ -192,7 +200,7 @@ Alternatively, link against the `benchmark_main` library and remove `BENCHMARK_MAIN();` above to get the same behavior. The compiled executable will run all benchmarks by default. Pass the `--help` -flag for option information or see the guide below. +flag for option information or see the [User Guide](docs/user_guide.md). ### Usage with CMake @@ -213,1111 +221,3 @@ Either way, link to the library as follows. ```cmake target_link_libraries(MyTarget benchmark::benchmark) ``` - -## Platform Specific Build Instructions - -### Building with GCC - -When the library is built using GCC it is necessary to link with the pthread -library due to how GCC implements `std::thread`. Failing to link to pthread will -lead to runtime exceptions (unless you're using libc++), not linker errors. See -[issue #67](https://github.com/google/benchmark/issues/67) for more details. You -can link to pthread by adding `-pthread` to your linker command. Note, you can -also use `-lpthread`, but there are potential issues with ordering of command -line parameters if you use that. - -### Building with Visual Studio 2015 or 2017 - -The `shlwapi` library (`-lshlwapi`) is required to support a call to `CPUInfo` which reads the registry. Either add `shlwapi.lib` under `[ Configuration Properties > Linker > Input ]`, or use the following: - -``` -// Alternatively, can add libraries using linker options. -#ifdef _WIN32 -#pragma comment ( lib, "Shlwapi.lib" ) -#ifdef _DEBUG -#pragma comment ( lib, "benchmarkd.lib" ) -#else -#pragma comment ( lib, "benchmark.lib" ) -#endif -#endif -``` - -Can also use the graphical version of CMake: -* Open `CMake GUI`. -* Under `Where to build the binaries`, same path as source plus `build`. -* Under `CMAKE_INSTALL_PREFIX`, same path as source plus `install`. -* Click `Configure`, `Generate`, `Open Project`. -* If build fails, try deleting entire directory and starting again, or unticking options to build less. - -### Building with Intel 2015 Update 1 or Intel System Studio Update 4 - -See instructions for building with Visual Studio. Once built, right click on the solution and change the build to Intel. - -### Building on Solaris - -If you're running benchmarks on solaris, you'll want the kstat library linked in -too (`-lkstat`). - -## User Guide - -### Command Line - -[Output Formats](#output-formats) - -[Output Files](#output-files) - -[Running Benchmarks](#running-benchmarks) - -[Running a Subset of Benchmarks](#running-a-subset-of-benchmarks) - -[Result Comparison](#result-comparison) - -### Library - -[Runtime and Reporting Considerations](#runtime-and-reporting-considerations) - -[Passing Arguments](#passing-arguments) - -[Calculating Asymptotic Complexity](#asymptotic-complexity) - -[Templated Benchmarks](#templated-benchmarks) - -[Fixtures](#fixtures) - -[Custom Counters](#custom-counters) - -[Multithreaded Benchmarks](#multithreaded-benchmarks) - -[CPU Timers](#cpu-timers) - -[Manual Timing](#manual-timing) - -[Setting the Time Unit](#setting-the-time-unit) - -[Preventing Optimization](#preventing-optimization) - -[Reporting Statistics](#reporting-statistics) - -[Custom Statistics](#custom-statistics) - -[Using RegisterBenchmark](#using-register-benchmark) - -[Exiting with an Error](#exiting-with-an-error) - -[A Faster KeepRunning Loop](#a-faster-keep-running-loop) - -[Disabling CPU Frequency Scaling](#disabling-cpu-frequency-scaling) - - -<a name="output-formats" /> - -### Output Formats - -The library supports multiple output formats. Use the -`--benchmark_format=<console|json|csv>` flag (or set the -`BENCHMARK_FORMAT=<console|json|csv>` environment variable) to set -the format type. `console` is the default format. - -The Console format is intended to be a human readable format. By default -the format generates color output. Context is output on stderr and the -tabular data on stdout. Example tabular output looks like: - -``` -Benchmark Time(ns) CPU(ns) Iterations ----------------------------------------------------------------------- -BM_SetInsert/1024/1 28928 29349 23853 133.097kB/s 33.2742k items/s -BM_SetInsert/1024/8 32065 32913 21375 949.487kB/s 237.372k items/s -BM_SetInsert/1024/10 33157 33648 21431 1.13369MB/s 290.225k items/s -``` - -The JSON format outputs human readable json split into two top level attributes. -The `context` attribute contains information about the run in general, including -information about the CPU and the date. -The `benchmarks` attribute contains a list of every benchmark run. Example json -output looks like: - -```json -{ - "context": { - "date": "2015/03/17-18:40:25", - "num_cpus": 40, - "mhz_per_cpu": 2801, - "cpu_scaling_enabled": false, - "build_type": "debug" - }, - "benchmarks": [ - { - "name": "BM_SetInsert/1024/1", - "iterations": 94877, - "real_time": 29275, - "cpu_time": 29836, - "bytes_per_second": 134066, - "items_per_second": 33516 - }, - { - "name": "BM_SetInsert/1024/8", - "iterations": 21609, - "real_time": 32317, - "cpu_time": 32429, - "bytes_per_second": 986770, - "items_per_second": 246693 - }, - { - "name": "BM_SetInsert/1024/10", - "iterations": 21393, - "real_time": 32724, - "cpu_time": 33355, - "bytes_per_second": 1199226, - "items_per_second": 299807 - } - ] -} -``` - -The CSV format outputs comma-separated values. The `context` is output on stderr -and the CSV itself on stdout. Example CSV output looks like: - -``` -name,iterations,real_time,cpu_time,bytes_per_second,items_per_second,label -"BM_SetInsert/1024/1",65465,17890.7,8407.45,475768,118942, -"BM_SetInsert/1024/8",116606,18810.1,9766.64,3.27646e+06,819115, -"BM_SetInsert/1024/10",106365,17238.4,8421.53,4.74973e+06,1.18743e+06, -``` - -<a name="output-files" /> - -### Output Files - -Write benchmark results to a file with the `--benchmark_out=<filename>` option -(or set `BENCHMARK_OUT`). Specify the output format with -`--benchmark_out_format={json|console|csv}` (or set -`BENCHMARK_OUT_FORMAT={json|console|csv}`). Note that specifying -`--benchmark_out` does not suppress the console output. - -<a name="running-benchmarks" /> - -### Running Benchmarks - -Benchmarks are executed by running the produced binaries. Benchmarks binaries, -by default, accept options that may be specified either through their command -line interface or by setting environment variables before execution. For every -`--option_flag=<value>` CLI switch, a corresponding environment variable -`OPTION_FLAG=<value>` exist and is used as default if set (CLI switches always - prevails). A complete list of CLI options is available running benchmarks - with the `--help` switch. - -<a name="running-a-subset-of-benchmarks" /> - -### Running a Subset of Benchmarks - -The `--benchmark_filter=<regex>` option (or `BENCHMARK_FILTER=<regex>` -environment variable) can be used to only run the benchmarks that match -the specified `<regex>`. For example: - -```bash -$ ./run_benchmarks.x --benchmark_filter=BM_memcpy/32 -Run on (1 X 2300 MHz CPU ) -2016-06-25 19:34:24 -Benchmark Time CPU Iterations ----------------------------------------------------- -BM_memcpy/32 11 ns 11 ns 79545455 -BM_memcpy/32k 2181 ns 2185 ns 324074 -BM_memcpy/32 12 ns 12 ns 54687500 -BM_memcpy/32k 1834 ns 1837 ns 357143 -``` - -<a name="result-comparison" /> - -### Result comparison - -It is possible to compare the benchmarking results. -See [Additional Tooling Documentation](docs/tools.md) - -<a name="runtime-and-reporting-considerations" /> - -### Runtime and Reporting Considerations - -When the benchmark binary is executed, each benchmark function is run serially. -The number of iterations to run is determined dynamically by running the -benchmark a few times and measuring the time taken and ensuring that the -ultimate result will be statistically stable. As such, faster benchmark -functions will be run for more iterations than slower benchmark functions, and -the number of iterations is thus reported. - -In all cases, the number of iterations for which the benchmark is run is -governed by the amount of time the benchmark takes. Concretely, the number of -iterations is at least one, not more than 1e9, until CPU time is greater than -the minimum time, or the wallclock time is 5x minimum time. The minimum time is -set per benchmark by calling `MinTime` on the registered benchmark object. - -Average timings are then reported over the iterations run. If multiple -repetitions are requested using the `--benchmark_repetitions` command-line -option, or at registration time, the benchmark function will be run several -times and statistical results across these repetitions will also be reported. - -As well as the per-benchmark entries, a preamble in the report will include -information about the machine on which the benchmarks are run. - -<a name="passing-arguments" /> - -### Passing Arguments - -Sometimes a family of benchmarks can be implemented with just one routine that -takes an extra argument to specify which one of the family of benchmarks to -run. For example, the following code defines a family of benchmarks for -measuring the speed of `memcpy()` calls of different lengths: - -```c++ -static void BM_memcpy(benchmark::State& state) { - char* src = new char[state.range(0)]; - char* dst = new char[state.range(0)]; - memset(src, 'x', state.range(0)); - for (auto _ : state) - memcpy(dst, src, state.range(0)); - state.SetBytesProcessed(int64_t(state.iterations()) * - int64_t(state.range(0))); - delete[] src; - delete[] dst; -} -BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10); -``` - -The preceding code is quite repetitive, and can be replaced with the following -short-hand. The following invocation will pick a few appropriate arguments in -the specified range and will generate a benchmark for each such argument. - -```c++ -BENCHMARK(BM_memcpy)->Range(8, 8<<10); -``` - -By default the arguments in the range are generated in multiples of eight and -the command above selects [ 8, 64, 512, 4k, 8k ]. In the following code the -range multiplier is changed to multiples of two. - -```c++ -BENCHMARK(BM_memcpy)->RangeMultiplier(2)->Range(8, 8<<10); -``` - -Now arguments generated are [ 8, 16, 32, 64, 128, 256, 512, 1024, 2k, 4k, 8k ]. - -The preceding code shows a method of defining a sparse range. The following -example shows a method of defining a dense range. It is then used to benchmark -the performance of `std::vector` initialization for uniformly increasing sizes. - -```c++ -static void BM_DenseRange(benchmark::State& state) { - for(auto _ : state) { - std::vector<int> v(state.range(0), state.range(0)); - benchmark::DoNotOptimize(v.data()); - benchmark::ClobberMemory(); - } -} -BENCHMARK(BM_DenseRange)->DenseRange(0, 1024, 128); -``` - -Now arguments generated are [ 0, 128, 256, 384, 512, 640, 768, 896, 1024 ]. - -You might have a benchmark that depends on two or more inputs. For example, the -following code defines a family of benchmarks for measuring the speed of set -insertion. - -```c++ -static void BM_SetInsert(benchmark::State& state) { - std::set<int> data; - for (auto _ : state) { - state.PauseTiming(); - data = ConstructRandomSet(state.range(0)); - state.ResumeTiming(); - for (int j = 0; j < state.range(1); ++j) - data.insert(RandomNumber()); - } -} -BENCHMARK(BM_SetInsert) - ->Args({1<<10, 128}) - ->Args({2<<10, 128}) - ->Args({4<<10, 128}) - ->Args({8<<10, 128}) - ->Args({1<<10, 512}) - ->Args({2<<10, 512}) - ->Args({4<<10, 512}) - ->Args({8<<10, 512}); -``` - -The preceding code is quite repetitive, and can be replaced with the following -short-hand. The following macro will pick a few appropriate arguments in the -product of the two specified ranges and will generate a benchmark for each such -pair. - -```c++ -BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {128, 512}}); -``` - -Some benchmarks may require specific argument values that cannot be expressed -with `Ranges`. In this case, `ArgsProduct` offers the ability to generate a -benchmark input for each combination in the product of the supplied vectors. - -```c++ -BENCHMARK(BM_SetInsert) - ->ArgsProduct({{1<<10, 3<<10, 8<<10}, {20, 40, 60, 80}}) -// would generate the same benchmark arguments as -BENCHMARK(BM_SetInsert) - ->Args({1<<10, 20}) - ->Args({3<<10, 20}) - ->Args({8<<10, 20}) - ->Args({3<<10, 40}) - ->Args({8<<10, 40}) - ->Args({1<<10, 40}) - ->Args({1<<10, 60}) - ->Args({3<<10, 60}) - ->Args({8<<10, 60}) - ->Args({1<<10, 80}) - ->Args({3<<10, 80}) - ->Args({8<<10, 80}); -``` - -For more complex patterns of inputs, passing a custom function to `Apply` allows -programmatic specification of an arbitrary set of arguments on which to run the -benchmark. The following example enumerates a dense range on one parameter, -and a sparse range on the second. - -```c++ -static void CustomArguments(benchmark::internal::Benchmark* b) { - for (int i = 0; i <= 10; ++i) - for (int j = 32; j <= 1024*1024; j *= 8) - b->Args({i, j}); -} -BENCHMARK(BM_SetInsert)->Apply(CustomArguments); -``` - -#### Passing Arbitrary Arguments to a Benchmark - -In C++11 it is possible to define a benchmark that takes an arbitrary number -of extra arguments. The `BENCHMARK_CAPTURE(func, test_case_name, ...args)` -macro creates a benchmark that invokes `func` with the `benchmark::State` as -the first argument followed by the specified `args...`. -The `test_case_name` is appended to the name of the benchmark and -should describe the values passed. - -```c++ -template <class ...ExtraArgs> -void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) { - [...] -} -// Registers a benchmark named "BM_takes_args/int_string_test" that passes -// the specified values to `extra_args`. -BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc")); -``` - -Note that elements of `...args` may refer to global variables. Users should -avoid modifying global state inside of a benchmark. - -<a name="asymptotic-complexity" /> - -### Calculating Asymptotic Complexity (Big O) - -Asymptotic complexity might be calculated for a family of benchmarks. The -following code will calculate the coefficient for the high-order term in the -running time and the normalized root-mean square error of string comparison. - -```c++ -static void BM_StringCompare(benchmark::State& state) { - std::string s1(state.range(0), '-'); - std::string s2(state.range(0), '-'); - for (auto _ : state) { - benchmark::DoNotOptimize(s1.compare(s2)); - } - state.SetComplexityN(state.range(0)); -} -BENCHMARK(BM_StringCompare) - ->RangeMultiplier(2)->Range(1<<10, 1<<18)->Complexity(benchmark::oN); -``` - -As shown in the following invocation, asymptotic complexity might also be -calculated automatically. - -```c++ -BENCHMARK(BM_StringCompare) - ->RangeMultiplier(2)->Range(1<<10, 1<<18)->Complexity(); -``` - -The following code will specify asymptotic complexity with a lambda function, -that might be used to customize high-order term calculation. - -```c++ -BENCHMARK(BM_StringCompare)->RangeMultiplier(2) - ->Range(1<<10, 1<<18)->Complexity([](benchmark::IterationCount n)->double{return n; }); -``` - -<a name="templated-benchmarks" /> - -### Templated Benchmarks - -This example produces and consumes messages of size `sizeof(v)` `range_x` -times. It also outputs throughput in the absence of multiprogramming. - -```c++ -template <class Q> void BM_Sequential(benchmark::State& state) { - Q q; - typename Q::value_type v; - for (auto _ : state) { - for (int i = state.range(0); i--; ) - q.push(v); - for (int e = state.range(0); e--; ) - q.Wait(&v); - } - // actually messages, not bytes: - state.SetBytesProcessed( - static_cast<int64_t>(state.iterations())*state.range(0)); -} -BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10); -``` - -Three macros are provided for adding benchmark templates. - -```c++ -#ifdef BENCHMARK_HAS_CXX11 -#define BENCHMARK_TEMPLATE(func, ...) // Takes any number of parameters. -#else // C++ < C++11 -#define BENCHMARK_TEMPLATE(func, arg1) -#endif -#define BENCHMARK_TEMPLATE1(func, arg1) -#define BENCHMARK_TEMPLATE2(func, arg1, arg2) -``` - -<a name="fixtures" /> - -### Fixtures - -Fixture tests are created by first defining a type that derives from -`::benchmark::Fixture` and then creating/registering the tests using the -following macros: - -* `BENCHMARK_F(ClassName, Method)` -* `BENCHMARK_DEFINE_F(ClassName, Method)` -* `BENCHMARK_REGISTER_F(ClassName, Method)` - -For Example: - -```c++ -class MyFixture : public benchmark::Fixture { -public: - void SetUp(const ::benchmark::State& state) { - } - - void TearDown(const ::benchmark::State& state) { - } -}; - -BENCHMARK_F(MyFixture, FooTest)(benchmark::State& st) { - for (auto _ : st) { - ... - } -} - -BENCHMARK_DEFINE_F(MyFixture, BarTest)(benchmark::State& st) { - for (auto _ : st) { - ... - } -} -/* BarTest is NOT registered */ -BENCHMARK_REGISTER_F(MyFixture, BarTest)->Threads(2); -/* BarTest is now registered */ -``` - -#### Templated Fixtures - -Also you can create templated fixture by using the following macros: - -* `BENCHMARK_TEMPLATE_F(ClassName, Method, ...)` -* `BENCHMARK_TEMPLATE_DEFINE_F(ClassName, Method, ...)` - -For example: - -```c++ -template<typename T> -class MyFixture : public benchmark::Fixture {}; - -BENCHMARK_TEMPLATE_F(MyFixture, IntTest, int)(benchmark::State& st) { - for (auto _ : st) { - ... - } -} - -BENCHMARK_TEMPLATE_DEFINE_F(MyFixture, DoubleTest, double)(benchmark::State& st) { - for (auto _ : st) { - ... - } -} - -BENCHMARK_REGISTER_F(MyFixture, DoubleTest)->Threads(2); -``` - -<a name="custom-counters" /> - -### Custom Counters - -You can add your own counters with user-defined names. The example below -will add columns "Foo", "Bar" and "Baz" in its output: - -```c++ -static void UserCountersExample1(benchmark::State& state) { - double numFoos = 0, numBars = 0, numBazs = 0; - for (auto _ : state) { - // ... count Foo,Bar,Baz events - } - state.counters["Foo"] = numFoos; - state.counters["Bar"] = numBars; - state.counters["Baz"] = numBazs; -} -``` - -The `state.counters` object is a `std::map` with `std::string` keys -and `Counter` values. The latter is a `double`-like class, via an implicit -conversion to `double&`. Thus you can use all of the standard arithmetic -assignment operators (`=,+=,-=,*=,/=`) to change the value of each counter. - -In multithreaded benchmarks, each counter is set on the calling thread only. -When the benchmark finishes, the counters from each thread will be summed; -the resulting sum is the value which will be shown for the benchmark. - -The `Counter` constructor accepts three parameters: the value as a `double` -; a bit flag which allows you to show counters as rates, and/or as per-thread -iteration, and/or as per-thread averages, and/or iteration invariants, -and/or finally inverting the result; and a flag specifying the 'unit' - i.e. -is 1k a 1000 (default, `benchmark::Counter::OneK::kIs1000`), or 1024 -(`benchmark::Counter::OneK::kIs1024`)? - -```c++ - // sets a simple counter - state.counters["Foo"] = numFoos; - - // Set the counter as a rate. It will be presented divided - // by the duration of the benchmark. - // Meaning: per one second, how many 'foo's are processed? - state.counters["FooRate"] = Counter(numFoos, benchmark::Counter::kIsRate); - - // Set the counter as a rate. It will be presented divided - // by the duration of the benchmark, and the result inverted. - // Meaning: how many seconds it takes to process one 'foo'? - state.counters["FooInvRate"] = Counter(numFoos, benchmark::Counter::kIsRate | benchmark::Counter::kInvert); - - // Set the counter as a thread-average quantity. It will - // be presented divided by the number of threads. - state.counters["FooAvg"] = Counter(numFoos, benchmark::Counter::kAvgThreads); - - // There's also a combined flag: - state.counters["FooAvgRate"] = Counter(numFoos,benchmark::Counter::kAvgThreadsRate); - - // This says that we process with the rate of state.range(0) bytes every iteration: - state.counters["BytesProcessed"] = Counter(state.range(0), benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::OneK::kIs1024); -``` - -When you're compiling in C++11 mode or later you can use `insert()` with -`std::initializer_list`: - -```c++ - // With C++11, this can be done: - state.counters.insert({{"Foo", numFoos}, {"Bar", numBars}, {"Baz", numBazs}}); - // ... instead of: - state.counters["Foo"] = numFoos; - state.counters["Bar"] = numBars; - state.counters["Baz"] = numBazs; -``` - -#### Counter Reporting - -When using the console reporter, by default, user counters are printed at -the end after the table, the same way as ``bytes_processed`` and -``items_processed``. This is best for cases in which there are few counters, -or where there are only a couple of lines per benchmark. Here's an example of -the default output: - -``` ------------------------------------------------------------------------------- -Benchmark Time CPU Iterations UserCounters... ------------------------------------------------------------------------------- -BM_UserCounter/threads:8 2248 ns 10277 ns 68808 Bar=16 Bat=40 Baz=24 Foo=8 -BM_UserCounter/threads:1 9797 ns 9788 ns 71523 Bar=2 Bat=5 Baz=3 Foo=1024m -BM_UserCounter/threads:2 4924 ns 9842 ns 71036 Bar=4 Bat=10 Baz=6 Foo=2 -BM_UserCounter/threads:4 2589 ns 10284 ns 68012 Bar=8 Bat=20 Baz=12 Foo=4 -BM_UserCounter/threads:8 2212 ns 10287 ns 68040 Bar=16 Bat=40 Baz=24 Foo=8 -BM_UserCounter/threads:16 1782 ns 10278 ns 68144 Bar=32 Bat=80 Baz=48 Foo=16 -BM_UserCounter/threads:32 1291 ns 10296 ns 68256 Bar=64 Bat=160 Baz=96 Foo=32 -BM_UserCounter/threads:4 2615 ns 10307 ns 68040 Bar=8 Bat=20 Baz=12 Foo=4 -BM_Factorial 26 ns 26 ns 26608979 40320 -BM_Factorial/real_time 26 ns 26 ns 26587936 40320 -BM_CalculatePiRange/1 16 ns 16 ns 45704255 0 -BM_CalculatePiRange/8 73 ns 73 ns 9520927 3.28374 -BM_CalculatePiRange/64 609 ns 609 ns 1140647 3.15746 -BM_CalculatePiRange/512 4900 ns 4901 ns 142696 3.14355 -``` - -If this doesn't suit you, you can print each counter as a table column by -passing the flag `--benchmark_counters_tabular=true` to the benchmark -application. This is best for cases in which there are a lot of counters, or -a lot of lines per individual benchmark. Note that this will trigger a -reprinting of the table header any time the counter set changes between -individual benchmarks. Here's an example of corresponding output when -`--benchmark_counters_tabular=true` is passed: - -``` ---------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations Bar Bat Baz Foo ---------------------------------------------------------------------------------------- -BM_UserCounter/threads:8 2198 ns 9953 ns 70688 16 40 24 8 -BM_UserCounter/threads:1 9504 ns 9504 ns 73787 2 5 3 1 -BM_UserCounter/threads:2 4775 ns 9550 ns 72606 4 10 6 2 -BM_UserCounter/threads:4 2508 ns 9951 ns 70332 8 20 12 4 -BM_UserCounter/threads:8 2055 ns 9933 ns 70344 16 40 24 8 -BM_UserCounter/threads:16 1610 ns 9946 ns 70720 32 80 48 16 -BM_UserCounter/threads:32 1192 ns 9948 ns 70496 64 160 96 32 -BM_UserCounter/threads:4 2506 ns 9949 ns 70332 8 20 12 4 --------------------------------------------------------------- -Benchmark Time CPU Iterations --------------------------------------------------------------- -BM_Factorial 26 ns 26 ns 26392245 40320 -BM_Factorial/real_time 26 ns 26 ns 26494107 40320 -BM_CalculatePiRange/1 15 ns 15 ns 45571597 0 -BM_CalculatePiRange/8 74 ns 74 ns 9450212 3.28374 -BM_CalculatePiRange/64 595 ns 595 ns 1173901 3.15746 -BM_CalculatePiRange/512 4752 ns 4752 ns 147380 3.14355 -BM_CalculatePiRange/4k 37970 ns 37972 ns 18453 3.14184 -BM_CalculatePiRange/32k 303733 ns 303744 ns 2305 3.14162 -BM_CalculatePiRange/256k 2434095 ns 2434186 ns 288 3.1416 -BM_CalculatePiRange/1024k 9721140 ns 9721413 ns 71 3.14159 -BM_CalculatePi/threads:8 2255 ns 9943 ns 70936 -``` - -Note above the additional header printed when the benchmark changes from -``BM_UserCounter`` to ``BM_Factorial``. This is because ``BM_Factorial`` does -not have the same counter set as ``BM_UserCounter``. - -<a name="multithreaded-benchmarks"/> - -### Multithreaded Benchmarks - -In a multithreaded test (benchmark invoked by multiple threads simultaneously), -it is guaranteed that none of the threads will start until all have reached -the start of the benchmark loop, and all will have finished before any thread -exits the benchmark loop. (This behavior is also provided by the `KeepRunning()` -API) As such, any global setup or teardown can be wrapped in a check against the thread -index: - -```c++ -static void BM_MultiThreaded(benchmark::State& state) { - if (state.thread_index == 0) { - // Setup code here. - } - for (auto _ : state) { - // Run the test as normal. - } - if (state.thread_index == 0) { - // Teardown code here. - } -} -BENCHMARK(BM_MultiThreaded)->Threads(2); -``` - -If the benchmarked code itself uses threads and you want to compare it to -single-threaded code, you may want to use real-time ("wallclock") measurements -for latency comparisons: - -```c++ -BENCHMARK(BM_test)->Range(8, 8<<10)->UseRealTime(); -``` - -Without `UseRealTime`, CPU time is used by default. - -<a name="cpu-timers" /> - -### CPU Timers - -By default, the CPU timer only measures the time spent by the main thread. -If the benchmark itself uses threads internally, this measurement may not -be what you are looking for. Instead, there is a way to measure the total -CPU usage of the process, by all the threads. - -```c++ -void callee(int i); - -static void MyMain(int size) { -#pragma omp parallel for - for(int i = 0; i < size; i++) - callee(i); -} - -static void BM_OpenMP(benchmark::State& state) { - for (auto _ : state) - MyMain(state.range(0)); -} - -// Measure the time spent by the main thread, use it to decide for how long to -// run the benchmark loop. Depending on the internal implementation detail may -// measure to anywhere from near-zero (the overhead spent before/after work -// handoff to worker thread[s]) to the whole single-thread time. -BENCHMARK(BM_OpenMP)->Range(8, 8<<10); - -// Measure the user-visible time, the wall clock (literally, the time that -// has passed on the clock on the wall), use it to decide for how long to -// run the benchmark loop. This will always be meaningful, an will match the -// time spent by the main thread in single-threaded case, in general decreasing -// with the number of internal threads doing the work. -BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->UseRealTime(); - -// Measure the total CPU consumption, use it to decide for how long to -// run the benchmark loop. This will always measure to no less than the -// time spent by the main thread in single-threaded case. -BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->MeasureProcessCPUTime(); - -// A mixture of the last two. Measure the total CPU consumption, but use the -// wall clock to decide for how long to run the benchmark loop. -BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->MeasureProcessCPUTime()->UseRealTime(); -``` - -#### Controlling Timers - -Normally, the entire duration of the work loop (`for (auto _ : state) {}`) -is measured. But sometimes, it is necessary to do some work inside of -that loop, every iteration, but without counting that time to the benchmark time. -That is possible, although it is not recommended, since it has high overhead. - -```c++ -static void BM_SetInsert_With_Timer_Control(benchmark::State& state) { - std::set<int> data; - for (auto _ : state) { - state.PauseTiming(); // Stop timers. They will not count until they are resumed. - data = ConstructRandomSet(state.range(0)); // Do something that should not be measured - state.ResumeTiming(); // And resume timers. They are now counting again. - // The rest will be measured. - for (int j = 0; j < state.range(1); ++j) - data.insert(RandomNumber()); - } -} -BENCHMARK(BM_SetInsert_With_Timer_Control)->Ranges({{1<<10, 8<<10}, {128, 512}}); -``` - -<a name="manual-timing" /> - -### Manual Timing - -For benchmarking something for which neither CPU time nor real-time are -correct or accurate enough, completely manual timing is supported using -the `UseManualTime` function. - -When `UseManualTime` is used, the benchmarked code must call -`SetIterationTime` once per iteration of the benchmark loop to -report the manually measured time. - -An example use case for this is benchmarking GPU execution (e.g. OpenCL -or CUDA kernels, OpenGL or Vulkan or Direct3D draw calls), which cannot -be accurately measured using CPU time or real-time. Instead, they can be -measured accurately using a dedicated API, and these measurement results -can be reported back with `SetIterationTime`. - -```c++ -static void BM_ManualTiming(benchmark::State& state) { - int microseconds = state.range(0); - std::chrono::duration<double, std::micro> sleep_duration { - static_cast<double>(microseconds) - }; - - for (auto _ : state) { - auto start = std::chrono::high_resolution_clock::now(); - // Simulate some useful workload with a sleep - std::this_thread::sleep_for(sleep_duration); - auto end = std::chrono::high_resolution_clock::now(); - - auto elapsed_seconds = - std::chrono::duration_cast<std::chrono::duration<double>>( - end - start); - - state.SetIterationTime(elapsed_seconds.count()); - } -} -BENCHMARK(BM_ManualTiming)->Range(1, 1<<17)->UseManualTime(); -``` - -<a name="setting-the-time-unit" /> - -### Setting the Time Unit - -If a benchmark runs a few milliseconds it may be hard to visually compare the -measured times, since the output data is given in nanoseconds per default. In -order to manually set the time unit, you can specify it manually: - -```c++ -BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); -``` - -<a name="preventing-optimization" /> - -### Preventing Optimization - -To prevent a value or expression from being optimized away by the compiler -the `benchmark::DoNotOptimize(...)` and `benchmark::ClobberMemory()` -functions can be used. - -```c++ -static void BM_test(benchmark::State& state) { - for (auto _ : state) { - int x = 0; - for (int i=0; i < 64; ++i) { - benchmark::DoNotOptimize(x += i); - } - } -} -``` - -`DoNotOptimize(<expr>)` forces the *result* of `<expr>` to be stored in either -memory or a register. For GNU based compilers it acts as read/write barrier -for global memory. More specifically it forces the compiler to flush pending -writes to memory and reload any other values as necessary. - -Note that `DoNotOptimize(<expr>)` does not prevent optimizations on `<expr>` -in any way. `<expr>` may even be removed entirely when the result is already -known. For example: - -```c++ - /* Example 1: `<expr>` is removed entirely. */ - int foo(int x) { return x + 42; } - while (...) DoNotOptimize(foo(0)); // Optimized to DoNotOptimize(42); - - /* Example 2: Result of '<expr>' is only reused */ - int bar(int) __attribute__((const)); - while (...) DoNotOptimize(bar(0)); // Optimized to: - // int __result__ = bar(0); - // while (...) DoNotOptimize(__result__); -``` - -The second tool for preventing optimizations is `ClobberMemory()`. In essence -`ClobberMemory()` forces the compiler to perform all pending writes to global -memory. Memory managed by block scope objects must be "escaped" using -`DoNotOptimize(...)` before it can be clobbered. In the below example -`ClobberMemory()` prevents the call to `v.push_back(42)` from being optimized -away. - -```c++ -static void BM_vector_push_back(benchmark::State& state) { - for (auto _ : state) { - std::vector<int> v; - v.reserve(1); - benchmark::DoNotOptimize(v.data()); // Allow v.data() to be clobbered. - v.push_back(42); - benchmark::ClobberMemory(); // Force 42 to be written to memory. - } -} -``` - -Note that `ClobberMemory()` is only available for GNU or MSVC based compilers. - -<a name="reporting-statistics" /> - -### Statistics: Reporting the Mean, Median and Standard Deviation of Repeated Benchmarks - -By default each benchmark is run once and that single result is reported. -However benchmarks are often noisy and a single result may not be representative -of the overall behavior. For this reason it's possible to repeatedly rerun the -benchmark. - -The number of runs of each benchmark is specified globally by the -`--benchmark_repetitions` flag or on a per benchmark basis by calling -`Repetitions` on the registered benchmark object. When a benchmark is run more -than once the mean, median and standard deviation of the runs will be reported. - -Additionally the `--benchmark_report_aggregates_only={true|false}`, -`--benchmark_display_aggregates_only={true|false}` flags or -`ReportAggregatesOnly(bool)`, `DisplayAggregatesOnly(bool)` functions can be -used to change how repeated tests are reported. By default the result of each -repeated run is reported. When `report aggregates only` option is `true`, -only the aggregates (i.e. mean, median and standard deviation, maybe complexity -measurements if they were requested) of the runs is reported, to both the -reporters - standard output (console), and the file. -However when only the `display aggregates only` option is `true`, -only the aggregates are displayed in the standard output, while the file -output still contains everything. -Calling `ReportAggregatesOnly(bool)` / `DisplayAggregatesOnly(bool)` on a -registered benchmark object overrides the value of the appropriate flag for that -benchmark. - -<a name="custom-statistics" /> - -### Custom Statistics - -While having mean, median and standard deviation is nice, this may not be -enough for everyone. For example you may want to know what the largest -observation is, e.g. because you have some real-time constraints. This is easy. -The following code will specify a custom statistic to be calculated, defined -by a lambda function. - -```c++ -void BM_spin_empty(benchmark::State& state) { - for (auto _ : state) { - for (int x = 0; x < state.range(0); ++x) { - benchmark::DoNotOptimize(x); - } - } -} - -BENCHMARK(BM_spin_empty) - ->ComputeStatistics("max", [](const std::vector<double>& v) -> double { - return *(std::max_element(std::begin(v), std::end(v))); - }) - ->Arg(512); -``` - -<a name="using-register-benchmark" /> - -### Using RegisterBenchmark(name, fn, args...) - -The `RegisterBenchmark(name, func, args...)` function provides an alternative -way to create and register benchmarks. -`RegisterBenchmark(name, func, args...)` creates, registers, and returns a -pointer to a new benchmark with the specified `name` that invokes -`func(st, args...)` where `st` is a `benchmark::State` object. - -Unlike the `BENCHMARK` registration macros, which can only be used at the global -scope, the `RegisterBenchmark` can be called anywhere. This allows for -benchmark tests to be registered programmatically. - -Additionally `RegisterBenchmark` allows any callable object to be registered -as a benchmark. Including capturing lambdas and function objects. - -For Example: -```c++ -auto BM_test = [](benchmark::State& st, auto Inputs) { /* ... */ }; - -int main(int argc, char** argv) { - for (auto& test_input : { /* ... */ }) - benchmark::RegisterBenchmark(test_input.name(), BM_test, test_input); - benchmark::Initialize(&argc, argv); - benchmark::RunSpecifiedBenchmarks(); -} -``` - -<a name="exiting-with-an-error" /> - -### Exiting with an Error - -When errors caused by external influences, such as file I/O and network -communication, occur within a benchmark the -`State::SkipWithError(const char* msg)` function can be used to skip that run -of benchmark and report the error. Note that only future iterations of the -`KeepRunning()` are skipped. For the ranged-for version of the benchmark loop -Users must explicitly exit the loop, otherwise all iterations will be performed. -Users may explicitly return to exit the benchmark immediately. - -The `SkipWithError(...)` function may be used at any point within the benchmark, -including before and after the benchmark loop. Moreover, if `SkipWithError(...)` -has been used, it is not required to reach the benchmark loop and one may return -from the benchmark function early. - -For example: - -```c++ -static void BM_test(benchmark::State& state) { - auto resource = GetResource(); - if (!resource.good()) { - state.SkipWithError("Resource is not good!"); - // KeepRunning() loop will not be entered. - } - while (state.KeepRunning()) { - auto data = resource.read_data(); - if (!resource.good()) { - state.SkipWithError("Failed to read data!"); - break; // Needed to skip the rest of the iteration. - } - do_stuff(data); - } -} - -static void BM_test_ranged_fo(benchmark::State & state) { - auto resource = GetResource(); - if (!resource.good()) { - state.SkipWithError("Resource is not good!"); - return; // Early return is allowed when SkipWithError() has been used. - } - for (auto _ : state) { - auto data = resource.read_data(); - if (!resource.good()) { - state.SkipWithError("Failed to read data!"); - break; // REQUIRED to prevent all further iterations. - } - do_stuff(data); - } -} -``` -<a name="a-faster-keep-running-loop" /> - -### A Faster KeepRunning Loop - -In C++11 mode, a ranged-based for loop should be used in preference to -the `KeepRunning` loop for running the benchmarks. For example: - -```c++ -static void BM_Fast(benchmark::State &state) { - for (auto _ : state) { - FastOperation(); - } -} -BENCHMARK(BM_Fast); -``` - -The reason the ranged-for loop is faster than using `KeepRunning`, is -because `KeepRunning` requires a memory load and store of the iteration count -ever iteration, whereas the ranged-for variant is able to keep the iteration count -in a register. - -For example, an empty inner loop of using the ranged-based for method looks like: - -```asm -# Loop Init - mov rbx, qword ptr [r14 + 104] - call benchmark::State::StartKeepRunning() - test rbx, rbx - je .LoopEnd -.LoopHeader: # =>This Inner Loop Header: Depth=1 - add rbx, -1 - jne .LoopHeader -.LoopEnd: -``` - -Compared to an empty `KeepRunning` loop, which looks like: - -```asm -.LoopHeader: # in Loop: Header=BB0_3 Depth=1 - cmp byte ptr [rbx], 1 - jne .LoopInit -.LoopBody: # =>This Inner Loop Header: Depth=1 - mov rax, qword ptr [rbx + 8] - lea rcx, [rax + 1] - mov qword ptr [rbx + 8], rcx - cmp rax, qword ptr [rbx + 104] - jb .LoopHeader - jmp .LoopEnd -.LoopInit: - mov rdi, rbx - call benchmark::State::StartKeepRunning() - jmp .LoopBody -.LoopEnd: -``` - -Unless C++03 compatibility is required, the ranged-for variant of writing -the benchmark loop should be preferred. - -<a name="disabling-cpu-frequency-scaling" /> - -### Disabling CPU Frequency Scaling - -If you see this error: - -``` -***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. -``` - -you might want to disable the CPU frequency scaling while running the benchmark: - -```bash -sudo cpupower frequency-set --governor performance -./mybench -sudo cpupower frequency-set --governor powersave -``` @@ -1,51 +1,22 @@ workspace(name = "com_github_google_benchmark") -load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") +load("//:bazel/benchmark_deps.bzl", "benchmark_deps") -http_archive( - name = "rules_cc", - strip_prefix = "rules_cc-a508235df92e71d537fcbae0c7c952ea6957a912", - urls = ["https://github.com/bazelbuild/rules_cc/archive/a508235df92e71d537fcbae0c7c952ea6957a912.zip"], - sha256 = "d7dc12c1d5bc1a87474de8e3d17b7731a4dcebcfb8aa3990fe8ac7734ef12f2f", -) +benchmark_deps() -http_archive( - name = "com_google_absl", - sha256 = "f41868f7a938605c92936230081175d1eae87f6ea2c248f41077c8f88316f111", - strip_prefix = "abseil-cpp-20200225.2", - urls = ["https://github.com/abseil/abseil-cpp/archive/20200225.2.tar.gz"], -) +load("@rules_foreign_cc//foreign_cc:repositories.bzl", "rules_foreign_cc_dependencies") -http_archive( - name = "com_google_googletest", - strip_prefix = "googletest-3f0cf6b62ad1eb50d8736538363d3580dd640c3e", - urls = ["https://github.com/google/googletest/archive/3f0cf6b62ad1eb50d8736538363d3580dd640c3e.zip"], - sha256 = "8f827dd550db8b4fdf73904690df0be9fccc161017c9038a724bc9a0617a1bc8", -) +rules_foreign_cc_dependencies() -http_archive( - name = "pybind11", - build_file = "@//bindings/python:pybind11.BUILD", - sha256 = "1eed57bc6863190e35637290f97a20c81cfe4d9090ac0a24f3bbf08f265eb71d", - strip_prefix = "pybind11-2.4.3", - urls = ["https://github.com/pybind/pybind11/archive/v2.4.3.tar.gz"], +load("@rules_python//python:pip.bzl", pip3_install="pip_install") + +pip3_install( + name = "tools_pip_deps", + requirements = "//tools:requirements.txt", ) new_local_repository( name = "python_headers", build_file = "@//bindings/python:python_headers.BUILD", - path = "/usr/include/python3.6", # May be overwritten by setup.py. -) - -http_archive( - name = "rules_python", - url = "https://github.com/bazelbuild/rules_python/releases/download/0.1.0/rules_python-0.1.0.tar.gz", - sha256 = "b6d46438523a3ec0f3cead544190ee13223a52f6a6765a29eae7b7cc24cc83a0", -) - -load("@rules_python//python:pip.bzl", pip3_install="pip_install") - -pip3_install( - name = "py_deps", - requirements = "//:requirements.txt", + path = "<PYTHON_INCLUDE_PATH>", # May be overwritten by setup.py. ) diff --git a/WORKSPACE.bzlmod b/WORKSPACE.bzlmod new file mode 100644 index 0000000..9526376 --- /dev/null +++ b/WORKSPACE.bzlmod @@ -0,0 +1,2 @@ +# This file marks the root of the Bazel workspace. +# See MODULE.bazel for dependencies and setup. diff --git a/_config.yml b/_config.yml index 1885487..1fa5ff8 100644 --- a/_config.yml +++ b/_config.yml @@ -1 +1,2 @@ -theme: jekyll-theme-midnight
\ No newline at end of file +theme: jekyll-theme-midnight +markdown: GFM diff --git a/bazel/benchmark_deps.bzl b/bazel/benchmark_deps.bzl new file mode 100644 index 0000000..667065f --- /dev/null +++ b/bazel/benchmark_deps.bzl @@ -0,0 +1,65 @@ +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") +load("@bazel_tools//tools/build_defs/repo:git.bzl", "new_git_repository") + +def benchmark_deps(): + """Loads dependencies required to build Google Benchmark.""" + + if "bazel_skylib" not in native.existing_rules(): + http_archive( + name = "bazel_skylib", + sha256 = "f7be3474d42aae265405a592bb7da8e171919d74c16f082a5457840f06054728", + urls = [ + "https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/1.2.1/bazel-skylib-1.2.1.tar.gz", + "https://github.com/bazelbuild/bazel-skylib/releases/download/1.2.1/bazel-skylib-1.2.1.tar.gz", + ], + ) + + if "rules_foreign_cc" not in native.existing_rules(): + http_archive( + name = "rules_foreign_cc", + sha256 = "bcd0c5f46a49b85b384906daae41d277b3dc0ff27c7c752cc51e43048a58ec83", + strip_prefix = "rules_foreign_cc-0.7.1", + url = "https://github.com/bazelbuild/rules_foreign_cc/archive/0.7.1.tar.gz", + ) + + if "rules_python" not in native.existing_rules(): + http_archive( + name = "rules_python", + url = "https://github.com/bazelbuild/rules_python/releases/download/0.1.0/rules_python-0.1.0.tar.gz", + sha256 = "b6d46438523a3ec0f3cead544190ee13223a52f6a6765a29eae7b7cc24cc83a0", + ) + + if "com_google_absl" not in native.existing_rules(): + http_archive( + name = "com_google_absl", + sha256 = "f41868f7a938605c92936230081175d1eae87f6ea2c248f41077c8f88316f111", + strip_prefix = "abseil-cpp-20200225.2", + urls = ["https://github.com/abseil/abseil-cpp/archive/20200225.2.tar.gz"], + ) + + if "com_google_googletest" not in native.existing_rules(): + new_git_repository( + name = "com_google_googletest", + remote = "https://github.com/google/googletest.git", + tag = "release-1.11.0", + ) + + if "nanobind" not in native.existing_rules(): + new_git_repository( + name = "nanobind", + remote = "https://github.com/wjakob/nanobind.git", + tag = "v1.4.0", + build_file = "@//bindings/python:nanobind.BUILD", + recursive_init_submodules = True, + ) + + if "libpfm" not in native.existing_rules(): + # Downloaded from v4.9.0 tag at https://sourceforge.net/p/perfmon2/libpfm4/ref/master/tags/ + http_archive( + name = "libpfm", + build_file = str(Label("//tools:libpfm.BUILD.bazel")), + sha256 = "5da5f8872bde14b3634c9688d980f68bda28b510268723cc12973eedbab9fecc", + type = "tar.gz", + strip_prefix = "libpfm-4.11.0", + urls = ["https://sourceforge.net/projects/perfmon2/files/libpfm4/libpfm-4.11.0.tar.gz/download"], + ) diff --git a/bindings/python/build_defs.bzl b/bindings/python/build_defs.bzl index 45907aa..009820a 100644 --- a/bindings/python/build_defs.bzl +++ b/bindings/python/build_defs.bzl @@ -8,8 +8,8 @@ def py_extension(name, srcs, hdrs = [], copts = [], features = [], deps = []): shared_lib_name = name + shared_lib_suffix native.cc_binary( name = shared_lib_name, - linkshared = 1, - linkstatic = 1, + linkshared = True, + linkstatic = True, srcs = srcs + hdrs, copts = copts, features = features, diff --git a/bindings/python/google_benchmark/BUILD b/bindings/python/google_benchmark/BUILD index 3c1561f..89ec76e 100644 --- a/bindings/python/google_benchmark/BUILD +++ b/bindings/python/google_benchmark/BUILD @@ -6,7 +6,6 @@ py_library( visibility = ["//visibility:public"], deps = [ ":_benchmark", - # pip; absl:app ], ) @@ -17,10 +16,13 @@ py_extension( "-fexceptions", "-fno-strict-aliasing", ], - features = ["-use_header_modules"], + features = [ + "-use_header_modules", + "-parse_headers", + ], deps = [ "//:benchmark", - "@pybind11", + "@nanobind", "@python_headers", ], ) diff --git a/bindings/python/google_benchmark/__init__.py b/bindings/python/google_benchmark/__init__.py index f31285e..642d78a 100644 --- a/bindings/python/google_benchmark/__init__.py +++ b/bindings/python/google_benchmark/__init__.py @@ -26,6 +26,7 @@ Example usage: if __name__ == '__main__': benchmark.main() """ +import atexit from absl import app from google_benchmark import _benchmark @@ -44,6 +45,7 @@ from google_benchmark._benchmark import ( oNLogN, oAuto, oLambda, + State, ) @@ -64,9 +66,10 @@ __all__ = [ "oNLogN", "oAuto", "oLambda", + "State", ] -__version__ = "0.2.0" +__version__ = "1.8.3" class __OptionMaker: @@ -101,7 +104,7 @@ class __OptionMaker: options = self.make(func_or_options) options.builder_calls.append((builder_name, args, kwargs)) # The decorator returns Options so it is not technically a decorator - # and needs a final call to @regiser + # and needs a final call to @register return options return __decorator @@ -110,7 +113,7 @@ class __OptionMaker: # Alias for nicer API. -# We have to instanciate an object, even if stateless, to be able to use __getattr__ +# We have to instantiate an object, even if stateless, to be able to use __getattr__ # on option.range option = __OptionMaker() @@ -156,3 +159,4 @@ def main(argv=None): # Methods for use with custom main function. initialize = _benchmark.Initialize run_benchmarks = _benchmark.RunSpecifiedBenchmarks +atexit.register(_benchmark.ClearRegisteredBenchmarks) diff --git a/bindings/python/google_benchmark/benchmark.cc b/bindings/python/google_benchmark/benchmark.cc index d80816e..f444769 100644 --- a/bindings/python/google_benchmark/benchmark.cc +++ b/bindings/python/google_benchmark/benchmark.cc @@ -1,20 +1,17 @@ // Benchmark for Python. -#include <map> -#include <string> -#include <vector> - -#include "pybind11/operators.h" -#include "pybind11/pybind11.h" -#include "pybind11/stl.h" -#include "pybind11/stl_bind.h" - #include "benchmark/benchmark.h" -PYBIND11_MAKE_OPAQUE(benchmark::UserCounters); +#include "nanobind/nanobind.h" +#include "nanobind/operators.h" +#include "nanobind/stl/bind_map.h" +#include "nanobind/stl/string.h" +#include "nanobind/stl/vector.h" + +NB_MAKE_OPAQUE(benchmark::UserCounters); namespace { -namespace py = ::pybind11; +namespace nb = nanobind; std::vector<std::string> Initialize(const std::vector<std::string>& argv) { // The `argv` pointers here become invalid when this function returns, but @@ -37,15 +34,16 @@ std::vector<std::string> Initialize(const std::vector<std::string>& argv) { return remaining_argv; } -benchmark::internal::Benchmark* RegisterBenchmark(const char* name, - py::function f) { +benchmark::internal::Benchmark* RegisterBenchmark(const std::string& name, + nb::callable f) { return benchmark::RegisterBenchmark( name, [f](benchmark::State& state) { f(&state); }); } -PYBIND11_MODULE(_benchmark, m) { +NB_MODULE(_benchmark, m) { + using benchmark::TimeUnit; - py::enum_<TimeUnit>(m, "TimeUnit") + nb::enum_<TimeUnit>(m, "TimeUnit") .value("kNanosecond", TimeUnit::kNanosecond) .value("kMicrosecond", TimeUnit::kMicrosecond) .value("kMillisecond", TimeUnit::kMillisecond) @@ -53,72 +51,74 @@ PYBIND11_MODULE(_benchmark, m) { .export_values(); using benchmark::BigO; - py::enum_<BigO>(m, "BigO") + nb::enum_<BigO>(m, "BigO") .value("oNone", BigO::oNone) .value("o1", BigO::o1) .value("oN", BigO::oN) .value("oNSquared", BigO::oNSquared) .value("oNCubed", BigO::oNCubed) .value("oLogN", BigO::oLogN) - .value("oNLogN", BigO::oLogN) + .value("oNLogN", BigO::oNLogN) .value("oAuto", BigO::oAuto) .value("oLambda", BigO::oLambda) .export_values(); using benchmark::internal::Benchmark; - py::class_<Benchmark>(m, "Benchmark") - // For methods returning a pointer tor the current object, reference - // return policy is used to ask pybind not to take ownership oof the + nb::class_<Benchmark>(m, "Benchmark") + // For methods returning a pointer to the current object, reference + // return policy is used to ask nanobind not to take ownership of the // returned object and avoid calling delete on it. // https://pybind11.readthedocs.io/en/stable/advanced/functions.html#return-value-policies // // For methods taking a const std::vector<...>&, a copy is created // because a it is bound to a Python list. // https://pybind11.readthedocs.io/en/stable/advanced/cast/stl.html - .def("unit", &Benchmark::Unit, py::return_value_policy::reference) - .def("arg", &Benchmark::Arg, py::return_value_policy::reference) - .def("args", &Benchmark::Args, py::return_value_policy::reference) - .def("range", &Benchmark::Range, py::return_value_policy::reference, - py::arg("start"), py::arg("limit")) + .def("unit", &Benchmark::Unit, nb::rv_policy::reference) + .def("arg", &Benchmark::Arg, nb::rv_policy::reference) + .def("args", &Benchmark::Args, nb::rv_policy::reference) + .def("range", &Benchmark::Range, nb::rv_policy::reference, + nb::arg("start"), nb::arg("limit")) .def("dense_range", &Benchmark::DenseRange, - py::return_value_policy::reference, py::arg("start"), - py::arg("limit"), py::arg("step") = 1) - .def("ranges", &Benchmark::Ranges, py::return_value_policy::reference) + nb::rv_policy::reference, nb::arg("start"), + nb::arg("limit"), nb::arg("step") = 1) + .def("ranges", &Benchmark::Ranges, nb::rv_policy::reference) .def("args_product", &Benchmark::ArgsProduct, - py::return_value_policy::reference) - .def("arg_name", &Benchmark::ArgName, py::return_value_policy::reference) + nb::rv_policy::reference) + .def("arg_name", &Benchmark::ArgName, nb::rv_policy::reference) .def("arg_names", &Benchmark::ArgNames, - py::return_value_policy::reference) + nb::rv_policy::reference) .def("range_pair", &Benchmark::RangePair, - py::return_value_policy::reference, py::arg("lo1"), py::arg("hi1"), - py::arg("lo2"), py::arg("hi2")) + nb::rv_policy::reference, nb::arg("lo1"), nb::arg("hi1"), + nb::arg("lo2"), nb::arg("hi2")) .def("range_multiplier", &Benchmark::RangeMultiplier, - py::return_value_policy::reference) - .def("min_time", &Benchmark::MinTime, py::return_value_policy::reference) + nb::rv_policy::reference) + .def("min_time", &Benchmark::MinTime, nb::rv_policy::reference) + .def("min_warmup_time", &Benchmark::MinWarmUpTime, + nb::rv_policy::reference) .def("iterations", &Benchmark::Iterations, - py::return_value_policy::reference) + nb::rv_policy::reference) .def("repetitions", &Benchmark::Repetitions, - py::return_value_policy::reference) + nb::rv_policy::reference) .def("report_aggregates_only", &Benchmark::ReportAggregatesOnly, - py::return_value_policy::reference, py::arg("value") = true) + nb::rv_policy::reference, nb::arg("value") = true) .def("display_aggregates_only", &Benchmark::DisplayAggregatesOnly, - py::return_value_policy::reference, py::arg("value") = true) + nb::rv_policy::reference, nb::arg("value") = true) .def("measure_process_cpu_time", &Benchmark::MeasureProcessCPUTime, - py::return_value_policy::reference) + nb::rv_policy::reference) .def("use_real_time", &Benchmark::UseRealTime, - py::return_value_policy::reference) + nb::rv_policy::reference) .def("use_manual_time", &Benchmark::UseManualTime, - py::return_value_policy::reference) + nb::rv_policy::reference) .def( "complexity", (Benchmark * (Benchmark::*)(benchmark::BigO)) & Benchmark::Complexity, - py::return_value_policy::reference, - py::arg("complexity") = benchmark::oAuto); + nb::rv_policy::reference, + nb::arg("complexity") = benchmark::oAuto); using benchmark::Counter; - py::class_<Counter> py_counter(m, "Counter"); + nb::class_<Counter> py_counter(m, "Counter"); - py::enum_<Counter::Flags>(py_counter, "Flags") + nb::enum_<Counter::Flags>(py_counter, "Flags") .value("kDefaults", Counter::Flags::kDefaults) .value("kIsRate", Counter::Flags::kIsRate) .value("kAvgThreads", Counter::Flags::kAvgThreads) @@ -130,52 +130,55 @@ PYBIND11_MODULE(_benchmark, m) { .value("kAvgIterationsRate", Counter::Flags::kAvgIterationsRate) .value("kInvert", Counter::Flags::kInvert) .export_values() - .def(py::self | py::self); + .def(nb::self | nb::self); - py::enum_<Counter::OneK>(py_counter, "OneK") + nb::enum_<Counter::OneK>(py_counter, "OneK") .value("kIs1000", Counter::OneK::kIs1000) .value("kIs1024", Counter::OneK::kIs1024) .export_values(); py_counter - .def(py::init<double, Counter::Flags, Counter::OneK>(), - py::arg("value") = 0., py::arg("flags") = Counter::kDefaults, - py::arg("k") = Counter::kIs1000) - .def(py::init([](double value) { return Counter(value); })) - .def_readwrite("value", &Counter::value) - .def_readwrite("flags", &Counter::flags) - .def_readwrite("oneK", &Counter::oneK); - py::implicitly_convertible<py::float_, Counter>(); - py::implicitly_convertible<py::int_, Counter>(); - - py::bind_map<benchmark::UserCounters>(m, "UserCounters"); + .def(nb::init<double, Counter::Flags, Counter::OneK>(), + nb::arg("value") = 0., nb::arg("flags") = Counter::kDefaults, + nb::arg("k") = Counter::kIs1000) + .def("__init__", ([](Counter *c, double value) { new (c) Counter(value); })) + .def_rw("value", &Counter::value) + .def_rw("flags", &Counter::flags) + .def_rw("oneK", &Counter::oneK) + .def(nb::init_implicit<double>()); + + nb::implicitly_convertible<nb::int_, Counter>(); + + nb::bind_map<benchmark::UserCounters>(m, "UserCounters"); using benchmark::State; - py::class_<State>(m, "State") + nb::class_<State>(m, "State") .def("__bool__", &State::KeepRunning) - .def_property_readonly("keep_running", &State::KeepRunning) + .def_prop_ro("keep_running", &State::KeepRunning) .def("pause_timing", &State::PauseTiming) .def("resume_timing", &State::ResumeTiming) .def("skip_with_error", &State::SkipWithError) - .def_property_readonly("error_occured", &State::error_occurred) + .def_prop_ro("error_occurred", &State::error_occurred) .def("set_iteration_time", &State::SetIterationTime) - .def_property("bytes_processed", &State::bytes_processed, + .def_prop_rw("bytes_processed", &State::bytes_processed, &State::SetBytesProcessed) - .def_property("complexity_n", &State::complexity_length_n, + .def_prop_rw("complexity_n", &State::complexity_length_n, &State::SetComplexityN) - .def_property("items_processed", &State::items_processed, - &State::SetItemsProcessed) - .def("set_label", (void (State::*)(const char*)) & State::SetLabel) - .def("range", &State::range, py::arg("pos") = 0) - .def_property_readonly("iterations", &State::iterations) - .def_readwrite("counters", &State::counters) - .def_readonly("thread_index", &State::thread_index) - .def_readonly("threads", &State::threads); + .def_prop_rw("items_processed", &State::items_processed, + &State::SetItemsProcessed) + .def("set_label", &State::SetLabel) + .def("range", &State::range, nb::arg("pos") = 0) + .def_prop_ro("iterations", &State::iterations) + .def_prop_ro("name", &State::name) + .def_rw("counters", &State::counters) + .def_prop_ro("thread_index", &State::thread_index) + .def_prop_ro("threads", &State::threads); m.def("Initialize", Initialize); m.def("RegisterBenchmark", RegisterBenchmark, - py::return_value_policy::reference); + nb::rv_policy::reference); m.def("RunSpecifiedBenchmarks", []() { benchmark::RunSpecifiedBenchmarks(); }); + m.def("ClearRegisteredBenchmarks", benchmark::ClearRegisteredBenchmarks); }; } // namespace diff --git a/bindings/python/google_benchmark/example.py b/bindings/python/google_benchmark/example.py index 9134e8c..d95a043 100644 --- a/bindings/python/google_benchmark/example.py +++ b/bindings/python/google_benchmark/example.py @@ -72,7 +72,7 @@ def manual_timing(state): @benchmark.register def custom_counters(state): - """Collect cutom metric using benchmark.Counter.""" + """Collect custom metric using benchmark.Counter.""" num_foo = 0.0 while state: # Benchmark some code here @@ -102,7 +102,7 @@ def with_options(state): @benchmark.register(name="sum_million_microseconds") @benchmark.option.unit(benchmark.kMicrosecond) -def with_options(state): +def with_options2(state): while state: sum(range(1_000_000)) diff --git a/bindings/python/nanobind.BUILD b/bindings/python/nanobind.BUILD new file mode 100644 index 0000000..cd9faf9 --- /dev/null +++ b/bindings/python/nanobind.BUILD @@ -0,0 +1,17 @@ +cc_library( + name = "nanobind", + srcs = glob([ + "src/*.cpp" + ]), + copts = ["-fexceptions"], + includes = ["include", "ext/robin_map/include"], + textual_hdrs = glob( + [ + "include/**/*.h", + "src/*.h", + "ext/robin_map/include/tsl/*.h", + ], + ), + deps = ["@python_headers"], + visibility = ["//visibility:public"], +) diff --git a/bindings/python/pybind11.BUILD b/bindings/python/pybind11.BUILD deleted file mode 100644 index bc83350..0000000 --- a/bindings/python/pybind11.BUILD +++ /dev/null @@ -1,20 +0,0 @@ -cc_library( - name = "pybind11", - hdrs = glob( - include = [ - "include/pybind11/*.h", - "include/pybind11/detail/*.h", - ], - exclude = [ - "include/pybind11/common.h", - "include/pybind11/eigen.h", - ], - ), - copts = [ - "-fexceptions", - "-Wno-undefined-inline", - "-Wno-pragma-once-outside-header", - ], - includes = ["include"], - visibility = ["//visibility:public"], -) diff --git a/bindings/python/requirements.txt b/bindings/python/requirements.txt deleted file mode 100644 index f5bbe7e..0000000 --- a/bindings/python/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -absl-py>=0.7.1 - diff --git a/cmake/AddCXXCompilerFlag.cmake b/cmake/AddCXXCompilerFlag.cmake index d0d2099..858589e 100644 --- a/cmake/AddCXXCompilerFlag.cmake +++ b/cmake/AddCXXCompilerFlag.cmake @@ -34,9 +34,11 @@ function(add_cxx_compiler_flag FLAG) check_cxx_compiler_flag("${FLAG}" ${MANGLED_FLAG}) set(CMAKE_REQUIRED_FLAGS "${OLD_CMAKE_REQUIRED_FLAGS}") if(${MANGLED_FLAG}) - set(VARIANT ${ARGV1}) - if(ARGV1) + if(ARGC GREATER 1) + set(VARIANT ${ARGV1}) string(TOUPPER "_${VARIANT}" VARIANT) + else() + set(VARIANT "") endif() set(CMAKE_CXX_FLAGS${VARIANT} "${CMAKE_CXX_FLAGS${VARIANT}} ${BENCHMARK_CXX_FLAGS${VARIANT}} ${FLAG}" PARENT_SCOPE) endif() @@ -49,9 +51,11 @@ function(add_required_cxx_compiler_flag FLAG) check_cxx_compiler_flag("${FLAG}" ${MANGLED_FLAG}) set(CMAKE_REQUIRED_FLAGS "${OLD_CMAKE_REQUIRED_FLAGS}") if(${MANGLED_FLAG}) - set(VARIANT ${ARGV1}) - if(ARGV1) + if(ARGC GREATER 1) + set(VARIANT ${ARGV1}) string(TOUPPER "_${VARIANT}" VARIANT) + else() + set(VARIANT "") endif() set(CMAKE_CXX_FLAGS${VARIANT} "${CMAKE_CXX_FLAGS${VARIANT}} ${FLAG}" PARENT_SCOPE) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${FLAG}" PARENT_SCOPE) diff --git a/cmake/CXXFeatureCheck.cmake b/cmake/CXXFeatureCheck.cmake index 62e6741..e514826 100644 --- a/cmake/CXXFeatureCheck.cmake +++ b/cmake/CXXFeatureCheck.cmake @@ -17,6 +17,8 @@ if(__cxx_feature_check) endif() set(__cxx_feature_check INCLUDED) +option(CXXFEATURECHECK_DEBUG OFF) + function(cxx_feature_check FILE) string(TOLOWER ${FILE} FILE) string(TOUPPER ${FILE} VAR) @@ -27,18 +29,22 @@ function(cxx_feature_check FILE) return() endif() + set(FEATURE_CHECK_CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS}) if (ARGC GREATER 1) message(STATUS "Enabling additional flags: ${ARGV1}") - list(APPEND BENCHMARK_CXX_LINKER_FLAGS ${ARGV1}) + list(APPEND FEATURE_CHECK_CMAKE_FLAGS ${ARGV1}) endif() if (NOT DEFINED COMPILE_${FEATURE}) - message(STATUS "Performing Test ${FEATURE}") if(CMAKE_CROSSCOMPILING) + message(STATUS "Cross-compiling to test ${FEATURE}") try_compile(COMPILE_${FEATURE} ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp - CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS} - LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES}) + CXX_STANDARD 11 + CXX_STANDARD_REQUIRED ON + CMAKE_FLAGS ${FEATURE_CHECK_CMAKE_FLAGS} + LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES} + OUTPUT_VARIABLE COMPILE_OUTPUT_VAR) if(COMPILE_${FEATURE}) message(WARNING "If you see build failures due to cross compilation, try setting HAVE_${VAR} to 0") @@ -47,11 +53,14 @@ function(cxx_feature_check FILE) set(RUN_${FEATURE} 1 CACHE INTERNAL "") endif() else() - message(STATUS "Performing Test ${FEATURE}") + message(STATUS "Compiling and running to test ${FEATURE}") try_run(RUN_${FEATURE} COMPILE_${FEATURE} ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp - CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS} - LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES}) + CXX_STANDARD 11 + CXX_STANDARD_REQUIRED ON + CMAKE_FLAGS ${FEATURE_CHECK_CMAKE_FLAGS} + LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES} + COMPILE_OUTPUT_VARIABLE COMPILE_OUTPUT_VAR) endif() endif() @@ -61,7 +70,11 @@ function(cxx_feature_check FILE) add_definitions(-DHAVE_${VAR}) else() if(NOT COMPILE_${FEATURE}) - message(STATUS "Performing Test ${FEATURE} -- failed to compile") + if(CXXFEATURECHECK_DEBUG) + message(STATUS "Performing Test ${FEATURE} -- failed to compile: ${COMPILE_OUTPUT_VAR}") + else() + message(STATUS "Performing Test ${FEATURE} -- failed to compile") + endif() else() message(STATUS "Performing Test ${FEATURE} -- compiled but failed to run") endif() diff --git a/cmake/Config.cmake.in b/cmake/Config.cmake.in index 6e9256e..2e15f0c 100644 --- a/cmake/Config.cmake.in +++ b/cmake/Config.cmake.in @@ -1 +1,7 @@ +@PACKAGE_INIT@ + +include (CMakeFindDependencyMacro) + +find_dependency (Threads) + include("${CMAKE_CURRENT_LIST_DIR}/@targets_export_name@.cmake") diff --git a/cmake/GetGitVersion.cmake b/cmake/GetGitVersion.cmake index 4f10f22..04a1f9b 100644 --- a/cmake/GetGitVersion.cmake +++ b/cmake/GetGitVersion.cmake @@ -20,16 +20,20 @@ set(__get_git_version INCLUDED) function(get_git_version var) if(GIT_EXECUTABLE) - execute_process(COMMAND ${GIT_EXECUTABLE} describe --match "v[0-9]*.[0-9]*.[0-9]*" --abbrev=8 + execute_process(COMMAND ${GIT_EXECUTABLE} describe --tags --match "v[0-9]*.[0-9]*.[0-9]*" --abbrev=8 WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} RESULT_VARIABLE status - OUTPUT_VARIABLE GIT_VERSION + OUTPUT_VARIABLE GIT_DESCRIBE_VERSION ERROR_QUIET) - if(${status}) - set(GIT_VERSION "v0.0.0") + if(status) + set(GIT_DESCRIBE_VERSION "v0.0.0") + endif() + + string(STRIP ${GIT_DESCRIBE_VERSION} GIT_DESCRIBE_VERSION) + if(GIT_DESCRIBE_VERSION MATCHES v[^-]*-) + string(REGEX REPLACE "v([^-]*)-([0-9]+)-.*" "\\1.\\2" GIT_VERSION ${GIT_DESCRIBE_VERSION}) else() - string(STRIP ${GIT_VERSION} GIT_VERSION) - string(REGEX REPLACE "-[0-9]+-g" "-" GIT_VERSION ${GIT_VERSION}) + string(REGEX REPLACE "v(.*)" "\\1" GIT_VERSION ${GIT_DESCRIBE_VERSION}) endif() # Work out if the repository is dirty @@ -43,12 +47,12 @@ function(get_git_version var) ERROR_QUIET) string(COMPARE NOTEQUAL "${GIT_DIFF_INDEX}" "" GIT_DIRTY) if (${GIT_DIRTY}) - set(GIT_VERSION "${GIT_VERSION}-dirty") + set(GIT_DESCRIBE_VERSION "${GIT_DESCRIBE_VERSION}-dirty") endif() + message(STATUS "git version: ${GIT_DESCRIBE_VERSION} normalized to ${GIT_VERSION}") else() - set(GIT_VERSION "v0.0.0") + set(GIT_VERSION "0.0.0") endif() - message(STATUS "git Version: ${GIT_VERSION}") set(${var} ${GIT_VERSION} PARENT_SCOPE) endfunction() diff --git a/cmake/GoogleTest.cmake b/cmake/GoogleTest.cmake index dd611fc..e66e9d1 100644 --- a/cmake/GoogleTest.cmake +++ b/cmake/GoogleTest.cmake @@ -35,7 +35,24 @@ add_subdirectory(${GOOGLETEST_SOURCE_DIR} ${GOOGLETEST_BINARY_DIR} EXCLUDE_FROM_ALL) -set_target_properties(gtest PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gtest,INTERFACE_INCLUDE_DIRECTORIES>) -set_target_properties(gtest_main PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gtest_main,INTERFACE_INCLUDE_DIRECTORIES>) -set_target_properties(gmock PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gmock,INTERFACE_INCLUDE_DIRECTORIES>) -set_target_properties(gmock_main PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gmock_main,INTERFACE_INCLUDE_DIRECTORIES>) +# googletest doesn't seem to want to stay build warning clean so let's not hurt ourselves. +if (MSVC) + target_compile_options(gtest PRIVATE "/wd4244" "/wd4722") + target_compile_options(gtest_main PRIVATE "/wd4244" "/wd4722") + target_compile_options(gmock PRIVATE "/wd4244" "/wd4722") + target_compile_options(gmock_main PRIVATE "/wd4244" "/wd4722") +else() + target_compile_options(gtest PRIVATE "-w") + target_compile_options(gtest_main PRIVATE "-w") + target_compile_options(gmock PRIVATE "-w") + target_compile_options(gmock_main PRIVATE "-w") +endif() + +if(NOT DEFINED GTEST_COMPILE_COMMANDS) + set(GTEST_COMPILE_COMMANDS ON) +endif() + +set_target_properties(gtest PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gtest,INTERFACE_INCLUDE_DIRECTORIES> EXPORT_COMPILE_COMMANDS ${GTEST_COMPILE_COMMANDS}) +set_target_properties(gtest_main PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gtest_main,INTERFACE_INCLUDE_DIRECTORIES> EXPORT_COMPILE_COMMANDS ${GTEST_COMPILE_COMMANDS}) +set_target_properties(gmock PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gmock,INTERFACE_INCLUDE_DIRECTORIES> EXPORT_COMPILE_COMMANDS ${GTEST_COMPILE_COMMANDS}) +set_target_properties(gmock_main PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gmock_main,INTERFACE_INCLUDE_DIRECTORIES> EXPORT_COMPILE_COMMANDS ${GTEST_COMPILE_COMMANDS}) diff --git a/cmake/GoogleTest.cmake.in b/cmake/GoogleTest.cmake.in index fd957ff..ce653ac 100644 --- a/cmake/GoogleTest.cmake.in +++ b/cmake/GoogleTest.cmake.in @@ -31,13 +31,14 @@ if(EXISTS "${GOOGLETEST_PATH}" AND IS_DIRECTORY "${GOOGLETEST_PATH}" ) else() if(NOT ALLOW_DOWNLOADING_GOOGLETEST) - message(SEND_ERROR "Did not find Google Test sources! Either pass correct path in GOOGLETEST_PATH, or enable BENCHMARK_DOWNLOAD_DEPENDENCIES, or disable BENCHMARK_ENABLE_GTEST_TESTS / BENCHMARK_ENABLE_TESTING.") + message(SEND_ERROR "Did not find Google Test sources! Either pass correct path in GOOGLETEST_PATH, or enable BENCHMARK_DOWNLOAD_DEPENDENCIES, or disable BENCHMARK_USE_BUNDLED_GTEST, or disable BENCHMARK_ENABLE_GTEST_TESTS / BENCHMARK_ENABLE_TESTING.") + return() else() message(WARNING "Did not find Google Test sources! Fetching from web...") ExternalProject_Add( googletest GIT_REPOSITORY https://github.com/google/googletest.git - GIT_TAG master + GIT_TAG "release-1.11.0" PREFIX "${CMAKE_BINARY_DIR}" STAMP_DIR "${CMAKE_BINARY_DIR}/stamp" DOWNLOAD_DIR "${CMAKE_BINARY_DIR}/download" diff --git a/cmake/Modules/FindPFM.cmake b/cmake/Modules/FindPFM.cmake new file mode 100644 index 0000000..4c1ce93 --- /dev/null +++ b/cmake/Modules/FindPFM.cmake @@ -0,0 +1,28 @@ +# If successful, the following variables will be defined: +# PFM_FOUND. +# PFM_LIBRARIES +# PFM_INCLUDE_DIRS +# the following target will be defined: +# PFM::libpfm + +include(FeatureSummary) +include(FindPackageHandleStandardArgs) + +set_package_properties(PFM PROPERTIES + URL http://perfmon2.sourceforge.net/ + DESCRIPTION "A helper library to develop monitoring tools" + PURPOSE "Used to program specific performance monitoring events") + +find_library(PFM_LIBRARY NAMES pfm) +find_path(PFM_INCLUDE_DIR NAMES perfmon/pfmlib.h) + +find_package_handle_standard_args(PFM REQUIRED_VARS PFM_LIBRARY PFM_INCLUDE_DIR) + +if (PFM_FOUND AND NOT TARGET PFM::libpfm) + add_library(PFM::libpfm UNKNOWN IMPORTED) + set_target_properties(PFM::libpfm PROPERTIES + IMPORTED_LOCATION "${PFM_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${PFM_INCLUDE_DIR}") +endif() + +mark_as_advanced(PFM_LIBRARY PFM_INCLUDE_DIR) diff --git a/cmake/benchmark.pc.in b/cmake/benchmark.pc.in index 34beb01..9dae881 100644 --- a/cmake/benchmark.pc.in +++ b/cmake/benchmark.pc.in @@ -1,7 +1,7 @@ prefix=@CMAKE_INSTALL_PREFIX@ exec_prefix=${prefix} -libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@ -includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@ +libdir=@CMAKE_INSTALL_FULL_LIBDIR@ +includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@ Name: @PROJECT_NAME@ Description: Google microbenchmark framework diff --git a/cmake/pthread_affinity.cpp b/cmake/pthread_affinity.cpp new file mode 100644 index 0000000..7b143bc --- /dev/null +++ b/cmake/pthread_affinity.cpp @@ -0,0 +1,16 @@ +#include <pthread.h> +int main() { + cpu_set_t set; + CPU_ZERO(&set); + for (int i = 0; i < CPU_SETSIZE; ++i) { + CPU_SET(i, &set); + CPU_CLR(i, &set); + } + pthread_t self = pthread_self(); + int ret; + ret = pthread_getaffinity_np(self, sizeof(set), &set); + if (ret != 0) return ret; + ret = pthread_setaffinity_np(self, sizeof(set), &set); + if (ret != 0) return ret; + return 0; +} diff --git a/conan/CMakeLists.txt b/conan/CMakeLists.txt deleted file mode 100644 index 15b92ca..0000000 --- a/conan/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -cmake_minimum_required(VERSION 2.8.11) -project(cmake_wrapper) - -include(conanbuildinfo.cmake) -conan_basic_setup() - -include(${CMAKE_SOURCE_DIR}/CMakeListsOriginal.txt) diff --git a/conan/test_package/CMakeLists.txt b/conan/test_package/CMakeLists.txt deleted file mode 100644 index 089a6c7..0000000 --- a/conan/test_package/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -cmake_minimum_required(VERSION 2.8.11) -project(test_package) - -set(CMAKE_VERBOSE_MAKEFILE TRUE) - -include(${CMAKE_BINARY_DIR}/conanbuildinfo.cmake) -conan_basic_setup() - -add_executable(${PROJECT_NAME} test_package.cpp) -target_link_libraries(${PROJECT_NAME} ${CONAN_LIBS}) diff --git a/conan/test_package/conanfile.py b/conan/test_package/conanfile.py deleted file mode 100644 index d63f408..0000000 --- a/conan/test_package/conanfile.py +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -from conans import ConanFile, CMake -import os - - -class TestPackageConan(ConanFile): - settings = "os", "compiler", "build_type", "arch" - generators = "cmake" - - def build(self): - cmake = CMake(self) - cmake.configure() - cmake.build() - - def test(self): - bin_path = os.path.join("bin", "test_package") - self.run(bin_path, run_environment=True) diff --git a/conan/test_package/test_package.cpp b/conan/test_package/test_package.cpp deleted file mode 100644 index 4fa7ec0..0000000 --- a/conan/test_package/test_package.cpp +++ /dev/null @@ -1,18 +0,0 @@ -#include "benchmark/benchmark.h" - -void BM_StringCreation(benchmark::State& state) { - while (state.KeepRunning()) - std::string empty_string; -} - -BENCHMARK(BM_StringCreation); - -void BM_StringCopy(benchmark::State& state) { - std::string x = "hello"; - while (state.KeepRunning()) - std::string copy(x); -} - -BENCHMARK(BM_StringCopy); - -BENCHMARK_MAIN(); diff --git a/conanfile.py b/conanfile.py deleted file mode 100644 index e31fc52..0000000 --- a/conanfile.py +++ /dev/null @@ -1,79 +0,0 @@ -from conans import ConanFile, CMake, tools -from conans.errors import ConanInvalidConfiguration -import shutil -import os - - -class GoogleBenchmarkConan(ConanFile): - name = "benchmark" - description = "A microbenchmark support library." - topics = ("conan", "benchmark", "google", "microbenchmark") - url = "https://github.com/google/benchmark" - homepage = "https://github.com/google/benchmark" - author = "Google Inc." - license = "Apache-2.0" - exports_sources = ["*"] - generators = "cmake" - - settings = "arch", "build_type", "compiler", "os" - options = { - "shared": [True, False], - "fPIC": [True, False], - "enable_lto": [True, False], - "enable_exceptions": [True, False] - } - default_options = {"shared": False, "fPIC": True, "enable_lto": False, "enable_exceptions": True} - - _build_subfolder = "." - - def source(self): - # Wrap the original CMake file to call conan_basic_setup - shutil.move("CMakeLists.txt", "CMakeListsOriginal.txt") - shutil.move(os.path.join("conan", "CMakeLists.txt"), "CMakeLists.txt") - - def config_options(self): - if self.settings.os == "Windows": - if self.settings.compiler == "Visual Studio" and float(self.settings.compiler.version.value) <= 12: - raise ConanInvalidConfiguration("{} {} does not support Visual Studio <= 12".format(self.name, self.version)) - del self.options.fPIC - - def configure(self): - if self.settings.os == "Windows" and self.options.shared: - raise ConanInvalidConfiguration("Windows shared builds are not supported right now, see issue #639") - - def _configure_cmake(self): - cmake = CMake(self) - - cmake.definitions["BENCHMARK_ENABLE_TESTING"] = "OFF" - cmake.definitions["BENCHMARK_ENABLE_GTEST_TESTS"] = "OFF" - cmake.definitions["BENCHMARK_ENABLE_LTO"] = "ON" if self.options.enable_lto else "OFF" - cmake.definitions["BENCHMARK_ENABLE_EXCEPTIONS"] = "ON" if self.options.enable_exceptions else "OFF" - - # See https://github.com/google/benchmark/pull/638 for Windows 32 build explanation - if self.settings.os != "Windows": - cmake.definitions["BENCHMARK_BUILD_32_BITS"] = "ON" if "64" not in str(self.settings.arch) else "OFF" - cmake.definitions["BENCHMARK_USE_LIBCXX"] = "ON" if (str(self.settings.compiler.libcxx) == "libc++") else "OFF" - else: - cmake.definitions["BENCHMARK_USE_LIBCXX"] = "OFF" - - cmake.configure(build_folder=self._build_subfolder) - return cmake - - def build(self): - cmake = self._configure_cmake() - cmake.build() - - def package(self): - cmake = self._configure_cmake() - cmake.install() - - self.copy(pattern="LICENSE", dst="licenses") - - def package_info(self): - self.cpp_info.libs = tools.collect_libs(self) - if self.settings.os == "Linux": - self.cpp_info.libs.extend(["pthread", "rt"]) - elif self.settings.os == "Windows": - self.cpp_info.libs.append("shlwapi") - elif self.settings.os == "SunOS": - self.cpp_info.libs.append("kstat") diff --git a/dependencies.md b/dependencies.md deleted file mode 100644 index 6289b4e..0000000 --- a/dependencies.md +++ /dev/null @@ -1,18 +0,0 @@ -# Build tool dependency policy - -To ensure the broadest compatibility when building the benchmark library, but -still allow forward progress, we require any build tooling to be available for: - -* Debian stable AND -* The last two Ubuntu LTS releases AND - -Currently, this means using build tool versions that are available for Ubuntu -16.04 (Xenial), Ubuntu 18.04 (Bionic), and Debian stretch. - -_Note, [travis](.travis.yml) runs under Ubuntu 14.04 (Trusty) for linux builds._ - -## cmake -The current supported version is cmake 3.5.1 as of 2018-06-06. - -_Note, this version is also available for Ubuntu 14.04, the previous Ubuntu LTS -release, as `cmake3`._ diff --git a/docs/AssemblyTests.md b/docs/AssemblyTests.md index 1fbdc26..89df7ca 100644 --- a/docs/AssemblyTests.md +++ b/docs/AssemblyTests.md @@ -111,6 +111,7 @@ between compilers or compiler versions. A common example of this is matching stack frame addresses. In this case regular expressions can be used to match the differing bits of output. For example: +<!-- {% raw %} --> ```c++ int ExternInt; struct Point { int x, y, z; }; @@ -127,6 +128,7 @@ extern "C" void test_store_point() { // CHECK: ret } ``` +<!-- {% endraw %} --> ## Current Requirements and Limitations diff --git a/docs/_config.yml b/docs/_config.yml index 1885487..32f9f2e 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -1 +1,3 @@ -theme: jekyll-theme-midnight
\ No newline at end of file +theme: jekyll-theme-minimal +logo: /assets/images/icon_black.png +show_downloads: true diff --git a/docs/assets/images/icon.png b/docs/assets/images/icon.png Binary files differnew file mode 100644 index 0000000..b982604 --- /dev/null +++ b/docs/assets/images/icon.png diff --git a/docs/assets/images/icon.xcf b/docs/assets/images/icon.xcf Binary files differnew file mode 100644 index 0000000..f2f0be4 --- /dev/null +++ b/docs/assets/images/icon.xcf diff --git a/docs/assets/images/icon_black.png b/docs/assets/images/icon_black.png Binary files differnew file mode 100644 index 0000000..656ae79 --- /dev/null +++ b/docs/assets/images/icon_black.png diff --git a/docs/assets/images/icon_black.xcf b/docs/assets/images/icon_black.xcf Binary files differnew file mode 100644 index 0000000..430e7ba --- /dev/null +++ b/docs/assets/images/icon_black.xcf diff --git a/docs/dependencies.md b/docs/dependencies.md new file mode 100644 index 0000000..07760e1 --- /dev/null +++ b/docs/dependencies.md @@ -0,0 +1,13 @@ +# Build tool dependency policy + +We follow the [Foundational C++ support policy](https://opensource.google/documentation/policies/cplusplus-support) for our build tools. In +particular the ["Build Systems" section](https://opensource.google/documentation/policies/cplusplus-support#build-systems). + +## CMake + +The current supported version is CMake 3.10 as of 2023-08-10. Most modern +distributions include newer versions, for example: + +* Ubuntu 20.04 provides CMake 3.16.3 +* Debian 11.4 provides CMake 3.18.4 +* Ubuntu 22.04 provides CMake 3.22.1 diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..9cada96 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,12 @@ +# Benchmark + +* [Assembly Tests](AssemblyTests.md) +* [Dependencies](dependencies.md) +* [Perf Counters](perf_counters.md) +* [Platform Specific Build Instructions](platform_specific_build_instructions.md) +* [Python Bindings](python_bindings.md) +* [Random Interleaving](random_interleaving.md) +* [Reducing Variance](reducing_variance.md) +* [Releasing](releasing.md) +* [Tools](tools.md) +* [User Guide](user_guide.md) diff --git a/docs/perf_counters.md b/docs/perf_counters.md new file mode 100644 index 0000000..f342092 --- /dev/null +++ b/docs/perf_counters.md @@ -0,0 +1,35 @@ +<a name="perf-counters" /> + +# User-Requested Performance Counters + +When running benchmarks, the user may choose to request collection of +performance counters. This may be useful in investigation scenarios - narrowing +down the cause of a regression; or verifying that the underlying cause of a +performance improvement matches expectations. + +This feature is available if: + +* The benchmark is run on an architecture featuring a Performance Monitoring + Unit (PMU), +* The benchmark is compiled with support for collecting counters. Currently, + this requires [libpfm](http://perfmon2.sourceforge.net/), which is built as a + dependency via Bazel. + +The feature does not require modifying benchmark code. Counter collection is +handled at the boundaries where timer collection is also handled. + +To opt-in: +* If using a Bazel build, add `--define pfm=1` to your build flags +* If using CMake: + * Install `libpfm4-dev`, e.g. `apt-get install libpfm4-dev`. + * Enable the CMake flag `BENCHMARK_ENABLE_LIBPFM` in `CMakeLists.txt`. + +To use, pass a comma-separated list of counter names through the +`--benchmark_perf_counters` flag. The names are decoded through libpfm - meaning, +they are platform specific, but some (e.g. `CYCLES` or `INSTRUCTIONS`) are +mapped by libpfm to platform-specifics - see libpfm +[documentation](http://perfmon2.sourceforge.net/docs.html) for more details. + +The counter values are reported back through the [User Counters](../README.md#custom-counters) +mechanism, meaning, they are available in all the formats (e.g. JSON) supported +by User Counters. diff --git a/docs/platform_specific_build_instructions.md b/docs/platform_specific_build_instructions.md new file mode 100644 index 0000000..2d5d6c4 --- /dev/null +++ b/docs/platform_specific_build_instructions.md @@ -0,0 +1,48 @@ +# Platform Specific Build Instructions + +## Building with GCC + +When the library is built using GCC it is necessary to link with the pthread +library due to how GCC implements `std::thread`. Failing to link to pthread will +lead to runtime exceptions (unless you're using libc++), not linker errors. See +[issue #67](https://github.com/google/benchmark/issues/67) for more details. You +can link to pthread by adding `-pthread` to your linker command. Note, you can +also use `-lpthread`, but there are potential issues with ordering of command +line parameters if you use that. + +On QNX, the pthread library is part of libc and usually included automatically +(see +[`pthread_create()`](https://www.qnx.com/developers/docs/7.1/index.html#com.qnx.doc.neutrino.lib_ref/topic/p/pthread_create.html)). +There's no separate pthread library to link. + +## Building with Visual Studio 2015 or 2017 + +The `shlwapi` library (`-lshlwapi`) is required to support a call to `CPUInfo` which reads the registry. Either add `shlwapi.lib` under `[ Configuration Properties > Linker > Input ]`, or use the following: + +``` +// Alternatively, can add libraries using linker options. +#ifdef _WIN32 +#pragma comment ( lib, "Shlwapi.lib" ) +#ifdef _DEBUG +#pragma comment ( lib, "benchmarkd.lib" ) +#else +#pragma comment ( lib, "benchmark.lib" ) +#endif +#endif +``` + +Can also use the graphical version of CMake: +* Open `CMake GUI`. +* Under `Where to build the binaries`, same path as source plus `build`. +* Under `CMAKE_INSTALL_PREFIX`, same path as source plus `install`. +* Click `Configure`, `Generate`, `Open Project`. +* If build fails, try deleting entire directory and starting again, or unticking options to build less. + +## Building with Intel 2015 Update 1 or Intel System Studio Update 4 + +See instructions for building with Visual Studio. Once built, right click on the solution and change the build to Intel. + +## Building on Solaris + +If you're running benchmarks on solaris, you'll want the kstat library linked in +too (`-lkstat`).
\ No newline at end of file diff --git a/docs/python_bindings.md b/docs/python_bindings.md new file mode 100644 index 0000000..6a7aab0 --- /dev/null +++ b/docs/python_bindings.md @@ -0,0 +1,34 @@ +# Building and installing Python bindings + +Python bindings are available as wheels on [PyPI](https://pypi.org/project/google-benchmark/) for importing and +using Google Benchmark directly in Python. +Currently, pre-built wheels exist for macOS (both ARM64 and Intel x86), Linux x86-64 and 64-bit Windows. +Supported Python versions are Python 3.7 - 3.10. + +To install Google Benchmark's Python bindings, run: + +```bash +python -m pip install --upgrade pip # for manylinux2014 support +python -m pip install google-benchmark +``` + +In order to keep your system Python interpreter clean, it is advisable to run these commands in a virtual +environment. See the [official Python documentation](https://docs.python.org/3/library/venv.html) +on how to create virtual environments. + +To build a wheel directly from source, you can follow these steps: +```bash +git clone https://github.com/google/benchmark.git +cd benchmark +# create a virtual environment and activate it +python3 -m venv venv --system-site-packages +source venv/bin/activate # .\venv\Scripts\Activate.ps1 on Windows + +# upgrade Python's system-wide packages +python -m pip install --upgrade pip setuptools wheel +# builds the wheel and stores it in the directory "wheelhouse". +python -m pip wheel . -w wheelhouse +``` + +NB: Building wheels from source requires Bazel. For platform-specific instructions on how to install Bazel, +refer to the [Bazel installation docs](https://bazel.build/install). diff --git a/docs/random_interleaving.md b/docs/random_interleaving.md new file mode 100644 index 0000000..c083036 --- /dev/null +++ b/docs/random_interleaving.md @@ -0,0 +1,13 @@ +<a name="interleaving" /> + +# Random Interleaving + +[Random Interleaving](https://github.com/google/benchmark/issues/1051) is a +technique to lower run-to-run variance. It randomly interleaves repetitions of a +microbenchmark with repetitions from other microbenchmarks in the same benchmark +test. Data shows it is able to lower run-to-run variance by +[40%](https://github.com/google/benchmark/issues/1051) on average. + +To use, you mainly need to set `--benchmark_enable_random_interleaving=true`, +and optionally specify non-zero repetition count `--benchmark_repetitions=9` +and optionally decrease the per-repetition time `--benchmark_min_time=0.1`. diff --git a/docs/reducing_variance.md b/docs/reducing_variance.md new file mode 100644 index 0000000..e566ab9 --- /dev/null +++ b/docs/reducing_variance.md @@ -0,0 +1,100 @@ +# Reducing Variance + +<a name="disabling-cpu-frequency-scaling" /> + +## Disabling CPU Frequency Scaling + +If you see this error: + +``` +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +``` + +you might want to disable the CPU frequency scaling while running the +benchmark, as well as consider other ways to stabilize the performance of +your system while benchmarking. + +See [Reducing Variance](reducing_variance.md) for more information. + +Exactly how to do this depends on the Linux distribution, +desktop environment, and installed programs. Specific details are a moving +target, so we will not attempt to exhaustively document them here. + +One simple option is to use the `cpupower` program to change the +performance governor to "performance". This tool is maintained along with +the Linux kernel and provided by your distribution. + +It must be run as root, like this: + +```bash +sudo cpupower frequency-set --governor performance +``` + +After this you can verify that all CPUs are using the performance governor +by running this command: + +```bash +cpupower frequency-info -o proc +``` + +The benchmarks you subsequently run will have less variance. + +<a name="reducing-variance" /> + +## Reducing Variance in Benchmarks + +The Linux CPU frequency governor [discussed +above](user_guide#disabling-cpu-frequency-scaling) is not the only source +of noise in benchmarks. Some, but not all, of the sources of variance +include: + +1. On multi-core machines not all CPUs/CPU cores/CPU threads run the same + speed, so running a benchmark one time and then again may give a + different result depending on which CPU it ran on. +2. CPU scaling features that run on the CPU, like Intel's Turbo Boost and + AMD Turbo Core and Precision Boost, can temporarily change the CPU + frequency even when the using the "performance" governor on Linux. +3. Context switching between CPUs, or scheduling competition on the CPU the + benchmark is running on. +4. Intel Hyperthreading or AMD SMT causing the same issue as above. +5. Cache effects caused by code running on other CPUs. +6. Non-uniform memory architectures (NUMA). + +These can cause variance in benchmarks results within a single run +(`--benchmark_repetitions=N`) or across multiple runs of the benchmark +program. + +Reducing sources of variance is OS and architecture dependent, which is one +reason some companies maintain machines dedicated to performance testing. + +Some of the easier and and effective ways of reducing variance on a typical +Linux workstation are: + +1. Use the performance governor as [discussed +above](user_guide#disabling-cpu-frequency-scaling). +1. Disable processor boosting by: + ```sh + echo 0 | sudo tee /sys/devices/system/cpu/cpufreq/boost + ``` + See the Linux kernel's + [boost.txt](https://www.kernel.org/doc/Documentation/cpu-freq/boost.txt) + for more information. +2. Set the benchmark program's task affinity to a fixed cpu. For example: + ```sh + taskset -c 0 ./mybenchmark + ``` +3. Disabling Hyperthreading/SMT. This can be done in the Bios or using the + `/sys` file system (see the LLVM project's [Benchmarking + tips](https://llvm.org/docs/Benchmarking.html)). +4. Close other programs that do non-trivial things based on timers, such as + your web browser, desktop environment, etc. +5. Reduce the working set of your benchmark to fit within the L1 cache, but + do be aware that this may lead you to optimize for an unrelistic + situation. + +Further resources on this topic: + +1. The LLVM project's [Benchmarking + tips](https://llvm.org/docs/Benchmarking.html). +1. The Arch Wiki [Cpu frequency +scaling](https://wiki.archlinux.org/title/CPU_frequency_scaling) page. diff --git a/docs/releasing.md b/docs/releasing.md index f0cd701..cdf4159 100644 --- a/docs/releasing.md +++ b/docs/releasing.md @@ -1,16 +1,41 @@ # How to release -* Make sure you're on master and synced to HEAD -* Ensure the project builds and tests run (sanity check only, obviously) +* Make sure you're on main and synced to HEAD +* Ensure the project builds and tests run * `parallel -j0 exec ::: test/*_test` can help ensure everything at least passes * Prepare release notes * `git log $(git describe --abbrev=0 --tags)..HEAD` gives you the list of commits between the last annotated tag and HEAD * Pick the most interesting. +* Create one last commit that updates the version saved in `CMakeLists.txt`, `MODULE.bazel` + and the `__version__` variable in `bindings/python/google_benchmark/__init__.py`to the + release version you're creating. (This version will be used if benchmark is installed + from the archive you'll be creating in the next step.) + +``` +project (benchmark VERSION 1.8.0 LANGUAGES CXX) +``` + +``` +module(name = "com_github_google_benchmark", version="1.8.0") +``` + +```python +# bindings/python/google_benchmark/__init__.py + +# ... + +__version__ = "1.8.0" # <-- change this to the release version you are creating + +# ... +``` + * Create a release through github's interface * Note this will create a lightweight tag. * Update this to an annotated tag: * `git pull --tags` * `git tag -a -f <tag> <tag>` - * `git push --force origin` + * `git push --force --tags origin` +* Confirm that the "Build and upload Python wheels" action runs to completion + * run it manually if it hasn't run diff --git a/docs/tools.md b/docs/tools.md index f2d0c49..411f41d 100644 --- a/docs/tools.md +++ b/docs/tools.md @@ -186,6 +186,146 @@ Benchmark Time CPU Time Old This is a mix of the previous two modes, two (potentially different) benchmark binaries are run, and a different filter is applied to each one. As you can note, the values in `Time` and `CPU` columns are calculated as `(new - old) / |old|`. +### Note: Interpreting the output + +Performance measurements are an art, and performance comparisons are doubly so. +Results are often noisy and don't necessarily have large absolute differences to +them, so just by visual inspection, it is not at all apparent if two +measurements are actually showing a performance change or not. It is even more +confusing with multiple benchmark repetitions. + +Thankfully, what we can do, is use statistical tests on the results to determine +whether the performance has statistically-significantly changed. `compare.py` +uses [Mann–Whitney U +test](https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test), with a null +hypothesis being that there's no difference in performance. + +**The below output is a summary of a benchmark comparison with statistics +provided for a multi-threaded process.** +``` +Benchmark Time CPU Time Old Time New CPU Old CPU New +----------------------------------------------------------------------------------------------------------------------------- +benchmark/threads:1/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 27 vs 27 +benchmark/threads:1/process_time/real_time_mean -0.1442 -0.1442 90 77 90 77 +benchmark/threads:1/process_time/real_time_median -0.1444 -0.1444 90 77 90 77 +benchmark/threads:1/process_time/real_time_stddev +0.3974 +0.3933 0 0 0 0 +benchmark/threads:1/process_time/real_time_cv +0.6329 +0.6280 0 0 0 0 +OVERALL_GEOMEAN -0.1442 -0.1442 0 0 0 0 +``` +-------------------------------------------- +Here's a breakdown of each row: + +**benchmark/threads:1/process_time/real_time_pvalue**: This shows the _p-value_ for +the statistical test comparing the performance of the process running with one +thread. A value of 0.0000 suggests a statistically significant difference in +performance. The comparison was conducted using the U Test (Mann-Whitney +U Test) with 27 repetitions for each case. + +**benchmark/threads:1/process_time/real_time_mean**: This shows the relative +difference in mean execution time between two different cases. The negative +value (-0.1442) implies that the new process is faster by about 14.42%. The old +time was 90 units, while the new time is 77 units. + +**benchmark/threads:1/process_time/real_time_median**: Similarly, this shows the +relative difference in the median execution time. Again, the new process is +faster by 14.44%. + +**benchmark/threads:1/process_time/real_time_stddev**: This is the relative +difference in the standard deviation of the execution time, which is a measure +of how much variation or dispersion there is from the mean. A positive value +(+0.3974) implies there is more variance in the execution time in the new +process. + +**benchmark/threads:1/process_time/real_time_cv**: CV stands for Coefficient of +Variation. It is the ratio of the standard deviation to the mean. It provides a +standardized measure of dispersion. An increase (+0.6329) indicates more +relative variability in the new process. + +**OVERALL_GEOMEAN**: Geomean stands for geometric mean, a type of average that is +less influenced by outliers. The negative value indicates a general improvement +in the new process. However, given the values are all zero for the old and new +times, this seems to be a mistake or placeholder in the output. + +----------------------------------------- + + + +Let's first try to see what the different columns represent in the above +`compare.py` benchmarking output: + + 1. **Benchmark:** The name of the function being benchmarked, along with the + size of the input (after the slash). + + 2. **Time:** The average time per operation, across all iterations. + + 3. **CPU:** The average CPU time per operation, across all iterations. + + 4. **Iterations:** The number of iterations the benchmark was run to get a + stable estimate. + + 5. **Time Old and Time New:** These represent the average time it takes for a + function to run in two different scenarios or versions. For example, you + might be comparing how fast a function runs before and after you make some + changes to it. + + 6. **CPU Old and CPU New:** These show the average amount of CPU time that the + function uses in two different scenarios or versions. This is similar to + Time Old and Time New, but focuses on CPU usage instead of overall time. + +In the comparison section, the relative differences in both time and CPU time +are displayed for each input size. + + +A statistically-significant difference is determined by a **p-value**, which is +a measure of the probability that the observed difference could have occurred +just by random chance. A smaller p-value indicates stronger evidence against the +null hypothesis. + +**Therefore:** + 1. If the p-value is less than the chosen significance level (alpha), we + reject the null hypothesis and conclude the benchmarks are significantly + different. + 2. If the p-value is greater than or equal to alpha, we fail to reject the + null hypothesis and treat the two benchmarks as similar. + + + +The result of said the statistical test is additionally communicated through color coding: +```diff ++ Green: +``` + The benchmarks are _**statistically different**_. This could mean the + performance has either **significantly improved** or **significantly + deteriorated**. You should look at the actual performance numbers to see which + is the case. +```diff +- Red: +``` + The benchmarks are _**statistically similar**_. This means the performance + **hasn't significantly changed**. + +In statistical terms, **'green'** means we reject the null hypothesis that +there's no difference in performance, and **'red'** means we fail to reject the +null hypothesis. This might seem counter-intuitive if you're expecting 'green' +to mean 'improved performance' and 'red' to mean 'worsened performance'. +```bash + But remember, in this context: + + 'Success' means 'successfully finding a difference'. + 'Failure' means 'failing to find a difference'. +``` + + +Also, please note that **even if** we determine that there **is** a +statistically-significant difference between the two measurements, it does not +_necessarily_ mean that the actual benchmarks that were measured **are** +different, or vice versa, even if we determine that there is **no** +statistically-significant difference between the two measurements, it does not +necessarily mean that the actual benchmarks that were measured **are not** +different. + + + ### U test If there is a sufficient repetition count of the benchmarks, the tool can do diff --git a/docs/user_guide.md b/docs/user_guide.md new file mode 100644 index 0000000..2ceb13e --- /dev/null +++ b/docs/user_guide.md @@ -0,0 +1,1266 @@ +# User Guide + +## Command Line + +[Output Formats](#output-formats) + +[Output Files](#output-files) + +[Running Benchmarks](#running-benchmarks) + +[Running a Subset of Benchmarks](#running-a-subset-of-benchmarks) + +[Result Comparison](#result-comparison) + +[Extra Context](#extra-context) + +## Library + +[Runtime and Reporting Considerations](#runtime-and-reporting-considerations) + +[Setup/Teardown](#setupteardown) + +[Passing Arguments](#passing-arguments) + +[Custom Benchmark Name](#custom-benchmark-name) + +[Calculating Asymptotic Complexity](#asymptotic-complexity) + +[Templated Benchmarks](#templated-benchmarks) + +[Fixtures](#fixtures) + +[Custom Counters](#custom-counters) + +[Multithreaded Benchmarks](#multithreaded-benchmarks) + +[CPU Timers](#cpu-timers) + +[Manual Timing](#manual-timing) + +[Setting the Time Unit](#setting-the-time-unit) + +[Random Interleaving](random_interleaving.md) + +[User-Requested Performance Counters](perf_counters.md) + +[Preventing Optimization](#preventing-optimization) + +[Reporting Statistics](#reporting-statistics) + +[Custom Statistics](#custom-statistics) + +[Memory Usage](#memory-usage) + +[Using RegisterBenchmark](#using-register-benchmark) + +[Exiting with an Error](#exiting-with-an-error) + +[A Faster `KeepRunning` Loop](#a-faster-keep-running-loop) + +## Benchmarking Tips + +[Disabling CPU Frequency Scaling](#disabling-cpu-frequency-scaling) + +[Reducing Variance in Benchmarks](reducing_variance.md) + +<a name="output-formats" /> + +## Output Formats + +The library supports multiple output formats. Use the +`--benchmark_format=<console|json|csv>` flag (or set the +`BENCHMARK_FORMAT=<console|json|csv>` environment variable) to set +the format type. `console` is the default format. + +The Console format is intended to be a human readable format. By default +the format generates color output. Context is output on stderr and the +tabular data on stdout. Example tabular output looks like: + +``` +Benchmark Time(ns) CPU(ns) Iterations +---------------------------------------------------------------------- +BM_SetInsert/1024/1 28928 29349 23853 133.097kB/s 33.2742k items/s +BM_SetInsert/1024/8 32065 32913 21375 949.487kB/s 237.372k items/s +BM_SetInsert/1024/10 33157 33648 21431 1.13369MB/s 290.225k items/s +``` + +The JSON format outputs human readable json split into two top level attributes. +The `context` attribute contains information about the run in general, including +information about the CPU and the date. +The `benchmarks` attribute contains a list of every benchmark run. Example json +output looks like: + +```json +{ + "context": { + "date": "2015/03/17-18:40:25", + "num_cpus": 40, + "mhz_per_cpu": 2801, + "cpu_scaling_enabled": false, + "build_type": "debug" + }, + "benchmarks": [ + { + "name": "BM_SetInsert/1024/1", + "iterations": 94877, + "real_time": 29275, + "cpu_time": 29836, + "bytes_per_second": 134066, + "items_per_second": 33516 + }, + { + "name": "BM_SetInsert/1024/8", + "iterations": 21609, + "real_time": 32317, + "cpu_time": 32429, + "bytes_per_second": 986770, + "items_per_second": 246693 + }, + { + "name": "BM_SetInsert/1024/10", + "iterations": 21393, + "real_time": 32724, + "cpu_time": 33355, + "bytes_per_second": 1199226, + "items_per_second": 299807 + } + ] +} +``` + +The CSV format outputs comma-separated values. The `context` is output on stderr +and the CSV itself on stdout. Example CSV output looks like: + +``` +name,iterations,real_time,cpu_time,bytes_per_second,items_per_second,label +"BM_SetInsert/1024/1",65465,17890.7,8407.45,475768,118942, +"BM_SetInsert/1024/8",116606,18810.1,9766.64,3.27646e+06,819115, +"BM_SetInsert/1024/10",106365,17238.4,8421.53,4.74973e+06,1.18743e+06, +``` + +<a name="output-files" /> + +## Output Files + +Write benchmark results to a file with the `--benchmark_out=<filename>` option +(or set `BENCHMARK_OUT`). Specify the output format with +`--benchmark_out_format={json|console|csv}` (or set +`BENCHMARK_OUT_FORMAT={json|console|csv}`). Note that the 'csv' reporter is +deprecated and the saved `.csv` file +[is not parsable](https://github.com/google/benchmark/issues/794) by csv +parsers. + +Specifying `--benchmark_out` does not suppress the console output. + +<a name="running-benchmarks" /> + +## Running Benchmarks + +Benchmarks are executed by running the produced binaries. Benchmarks binaries, +by default, accept options that may be specified either through their command +line interface or by setting environment variables before execution. For every +`--option_flag=<value>` CLI switch, a corresponding environment variable +`OPTION_FLAG=<value>` exist and is used as default if set (CLI switches always + prevails). A complete list of CLI options is available running benchmarks + with the `--help` switch. + +<a name="running-a-subset-of-benchmarks" /> + +## Running a Subset of Benchmarks + +The `--benchmark_filter=<regex>` option (or `BENCHMARK_FILTER=<regex>` +environment variable) can be used to only run the benchmarks that match +the specified `<regex>`. For example: + +```bash +$ ./run_benchmarks.x --benchmark_filter=BM_memcpy/32 +Run on (1 X 2300 MHz CPU ) +2016-06-25 19:34:24 +Benchmark Time CPU Iterations +---------------------------------------------------- +BM_memcpy/32 11 ns 11 ns 79545455 +BM_memcpy/32k 2181 ns 2185 ns 324074 +BM_memcpy/32 12 ns 12 ns 54687500 +BM_memcpy/32k 1834 ns 1837 ns 357143 +``` + +## Disabling Benchmarks + +It is possible to temporarily disable benchmarks by renaming the benchmark +function to have the prefix "DISABLED_". This will cause the benchmark to +be skipped at runtime. + +<a name="result-comparison" /> + +## Result comparison + +It is possible to compare the benchmarking results. +See [Additional Tooling Documentation](tools.md) + +<a name="extra-context" /> + +## Extra Context + +Sometimes it's useful to add extra context to the content printed before the +results. By default this section includes information about the CPU on which +the benchmarks are running. If you do want to add more context, you can use +the `benchmark_context` command line flag: + +```bash +$ ./run_benchmarks --benchmark_context=pwd=`pwd` +Run on (1 x 2300 MHz CPU) +pwd: /home/user/benchmark/ +Benchmark Time CPU Iterations +---------------------------------------------------- +BM_memcpy/32 11 ns 11 ns 79545455 +BM_memcpy/32k 2181 ns 2185 ns 324074 +``` + +You can get the same effect with the API: + +```c++ + benchmark::AddCustomContext("foo", "bar"); +``` + +Note that attempts to add a second value with the same key will fail with an +error message. + +<a name="runtime-and-reporting-considerations" /> + +## Runtime and Reporting Considerations + +When the benchmark binary is executed, each benchmark function is run serially. +The number of iterations to run is determined dynamically by running the +benchmark a few times and measuring the time taken and ensuring that the +ultimate result will be statistically stable. As such, faster benchmark +functions will be run for more iterations than slower benchmark functions, and +the number of iterations is thus reported. + +In all cases, the number of iterations for which the benchmark is run is +governed by the amount of time the benchmark takes. Concretely, the number of +iterations is at least one, not more than 1e9, until CPU time is greater than +the minimum time, or the wallclock time is 5x minimum time. The minimum time is +set per benchmark by calling `MinTime` on the registered benchmark object. + +Furthermore warming up a benchmark might be necessary in order to get +stable results because of e.g caching effects of the code under benchmark. +Warming up means running the benchmark a given amount of time, before +results are actually taken into account. The amount of time for which +the warmup should be run can be set per benchmark by calling +`MinWarmUpTime` on the registered benchmark object or for all benchmarks +using the `--benchmark_min_warmup_time` command-line option. Note that +`MinWarmUpTime` will overwrite the value of `--benchmark_min_warmup_time` +for the single benchmark. How many iterations the warmup run of each +benchmark takes is determined the same way as described in the paragraph +above. Per default the warmup phase is set to 0 seconds and is therefore +disabled. + +Average timings are then reported over the iterations run. If multiple +repetitions are requested using the `--benchmark_repetitions` command-line +option, or at registration time, the benchmark function will be run several +times and statistical results across these repetitions will also be reported. + +As well as the per-benchmark entries, a preamble in the report will include +information about the machine on which the benchmarks are run. + +<a name="setup-teardown" /> + +## Setup/Teardown + +Global setup/teardown specific to each benchmark can be done by +passing a callback to Setup/Teardown: + +The setup/teardown callbacks will be invoked once for each benchmark. If the +benchmark is multi-threaded (will run in k threads), they will be invoked +exactly once before each run with k threads. + +If the benchmark uses different size groups of threads, the above will be true +for each size group. + +Eg., + +```c++ +static void DoSetup(const benchmark::State& state) { +} + +static void DoTeardown(const benchmark::State& state) { +} + +static void BM_func(benchmark::State& state) {...} + +BENCHMARK(BM_func)->Arg(1)->Arg(3)->Threads(16)->Threads(32)->Setup(DoSetup)->Teardown(DoTeardown); + +``` + +In this example, `DoSetup` and `DoTearDown` will be invoked 4 times each, +specifically, once for each of this family: + - BM_func_Arg_1_Threads_16, BM_func_Arg_1_Threads_32 + - BM_func_Arg_3_Threads_16, BM_func_Arg_3_Threads_32 + +<a name="passing-arguments" /> + +## Passing Arguments + +Sometimes a family of benchmarks can be implemented with just one routine that +takes an extra argument to specify which one of the family of benchmarks to +run. For example, the following code defines a family of benchmarks for +measuring the speed of `memcpy()` calls of different lengths: + +```c++ +static void BM_memcpy(benchmark::State& state) { + char* src = new char[state.range(0)]; + char* dst = new char[state.range(0)]; + memset(src, 'x', state.range(0)); + for (auto _ : state) + memcpy(dst, src, state.range(0)); + state.SetBytesProcessed(int64_t(state.iterations()) * + int64_t(state.range(0))); + delete[] src; + delete[] dst; +} +BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(4<<10)->Arg(8<<10); +``` + +The preceding code is quite repetitive, and can be replaced with the following +short-hand. The following invocation will pick a few appropriate arguments in +the specified range and will generate a benchmark for each such argument. + +```c++ +BENCHMARK(BM_memcpy)->Range(8, 8<<10); +``` + +By default the arguments in the range are generated in multiples of eight and +the command above selects [ 8, 64, 512, 4k, 8k ]. In the following code the +range multiplier is changed to multiples of two. + +```c++ +BENCHMARK(BM_memcpy)->RangeMultiplier(2)->Range(8, 8<<10); +``` + +Now arguments generated are [ 8, 16, 32, 64, 128, 256, 512, 1024, 2k, 4k, 8k ]. + +The preceding code shows a method of defining a sparse range. The following +example shows a method of defining a dense range. It is then used to benchmark +the performance of `std::vector` initialization for uniformly increasing sizes. + +```c++ +static void BM_DenseRange(benchmark::State& state) { + for(auto _ : state) { + std::vector<int> v(state.range(0), state.range(0)); + auto data = v.data(); + benchmark::DoNotOptimize(data); + benchmark::ClobberMemory(); + } +} +BENCHMARK(BM_DenseRange)->DenseRange(0, 1024, 128); +``` + +Now arguments generated are [ 0, 128, 256, 384, 512, 640, 768, 896, 1024 ]. + +You might have a benchmark that depends on two or more inputs. For example, the +following code defines a family of benchmarks for measuring the speed of set +insertion. + +```c++ +static void BM_SetInsert(benchmark::State& state) { + std::set<int> data; + for (auto _ : state) { + state.PauseTiming(); + data = ConstructRandomSet(state.range(0)); + state.ResumeTiming(); + for (int j = 0; j < state.range(1); ++j) + data.insert(RandomNumber()); + } +} +BENCHMARK(BM_SetInsert) + ->Args({1<<10, 128}) + ->Args({2<<10, 128}) + ->Args({4<<10, 128}) + ->Args({8<<10, 128}) + ->Args({1<<10, 512}) + ->Args({2<<10, 512}) + ->Args({4<<10, 512}) + ->Args({8<<10, 512}); +``` + +The preceding code is quite repetitive, and can be replaced with the following +short-hand. The following macro will pick a few appropriate arguments in the +product of the two specified ranges and will generate a benchmark for each such +pair. + +<!-- {% raw %} --> +```c++ +BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {128, 512}}); +``` +<!-- {% endraw %} --> + +Some benchmarks may require specific argument values that cannot be expressed +with `Ranges`. In this case, `ArgsProduct` offers the ability to generate a +benchmark input for each combination in the product of the supplied vectors. + +<!-- {% raw %} --> +```c++ +BENCHMARK(BM_SetInsert) + ->ArgsProduct({{1<<10, 3<<10, 8<<10}, {20, 40, 60, 80}}) +// would generate the same benchmark arguments as +BENCHMARK(BM_SetInsert) + ->Args({1<<10, 20}) + ->Args({3<<10, 20}) + ->Args({8<<10, 20}) + ->Args({3<<10, 40}) + ->Args({8<<10, 40}) + ->Args({1<<10, 40}) + ->Args({1<<10, 60}) + ->Args({3<<10, 60}) + ->Args({8<<10, 60}) + ->Args({1<<10, 80}) + ->Args({3<<10, 80}) + ->Args({8<<10, 80}); +``` +<!-- {% endraw %} --> + +For the most common scenarios, helper methods for creating a list of +integers for a given sparse or dense range are provided. + +```c++ +BENCHMARK(BM_SetInsert) + ->ArgsProduct({ + benchmark::CreateRange(8, 128, /*multi=*/2), + benchmark::CreateDenseRange(1, 4, /*step=*/1) + }) +// would generate the same benchmark arguments as +BENCHMARK(BM_SetInsert) + ->ArgsProduct({ + {8, 16, 32, 64, 128}, + {1, 2, 3, 4} + }); +``` + +For more complex patterns of inputs, passing a custom function to `Apply` allows +programmatic specification of an arbitrary set of arguments on which to run the +benchmark. The following example enumerates a dense range on one parameter, +and a sparse range on the second. + +```c++ +static void CustomArguments(benchmark::internal::Benchmark* b) { + for (int i = 0; i <= 10; ++i) + for (int j = 32; j <= 1024*1024; j *= 8) + b->Args({i, j}); +} +BENCHMARK(BM_SetInsert)->Apply(CustomArguments); +``` + +### Passing Arbitrary Arguments to a Benchmark + +In C++11 it is possible to define a benchmark that takes an arbitrary number +of extra arguments. The `BENCHMARK_CAPTURE(func, test_case_name, ...args)` +macro creates a benchmark that invokes `func` with the `benchmark::State` as +the first argument followed by the specified `args...`. +The `test_case_name` is appended to the name of the benchmark and +should describe the values passed. + +```c++ +template <class ...Args> +void BM_takes_args(benchmark::State& state, Args&&... args) { + auto args_tuple = std::make_tuple(std::move(args)...); + for (auto _ : state) { + std::cout << std::get<0>(args_tuple) << ": " << std::get<1>(args_tuple) + << '\n'; + [...] + } +} +// Registers a benchmark named "BM_takes_args/int_string_test" that passes +// the specified values to `args`. +BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc")); + +// Registers the same benchmark "BM_takes_args/int_test" that passes +// the specified values to `args`. +BENCHMARK_CAPTURE(BM_takes_args, int_test, 42, 43); +``` + +Note that elements of `...args` may refer to global variables. Users should +avoid modifying global state inside of a benchmark. + +<a name="asymptotic-complexity" /> + +## Calculating Asymptotic Complexity (Big O) + +Asymptotic complexity might be calculated for a family of benchmarks. The +following code will calculate the coefficient for the high-order term in the +running time and the normalized root-mean square error of string comparison. + +```c++ +static void BM_StringCompare(benchmark::State& state) { + std::string s1(state.range(0), '-'); + std::string s2(state.range(0), '-'); + for (auto _ : state) { + auto comparison_result = s1.compare(s2); + benchmark::DoNotOptimize(comparison_result); + } + state.SetComplexityN(state.range(0)); +} +BENCHMARK(BM_StringCompare) + ->RangeMultiplier(2)->Range(1<<10, 1<<18)->Complexity(benchmark::oN); +``` + +As shown in the following invocation, asymptotic complexity might also be +calculated automatically. + +```c++ +BENCHMARK(BM_StringCompare) + ->RangeMultiplier(2)->Range(1<<10, 1<<18)->Complexity(); +``` + +The following code will specify asymptotic complexity with a lambda function, +that might be used to customize high-order term calculation. + +```c++ +BENCHMARK(BM_StringCompare)->RangeMultiplier(2) + ->Range(1<<10, 1<<18)->Complexity([](benchmark::IterationCount n)->double{return n; }); +``` + +<a name="custom-benchmark-name" /> + +## Custom Benchmark Name + +You can change the benchmark's name as follows: + +```c++ +BENCHMARK(BM_memcpy)->Name("memcpy")->RangeMultiplier(2)->Range(8, 8<<10); +``` + +The invocation will execute the benchmark as before using `BM_memcpy` but changes +the prefix in the report to `memcpy`. + +<a name="templated-benchmarks" /> + +## Templated Benchmarks + +This example produces and consumes messages of size `sizeof(v)` `range_x` +times. It also outputs throughput in the absence of multiprogramming. + +```c++ +template <class Q> void BM_Sequential(benchmark::State& state) { + Q q; + typename Q::value_type v; + for (auto _ : state) { + for (int i = state.range(0); i--; ) + q.push(v); + for (int e = state.range(0); e--; ) + q.Wait(&v); + } + // actually messages, not bytes: + state.SetBytesProcessed( + static_cast<int64_t>(state.iterations())*state.range(0)); +} +// C++03 +BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10); + +// C++11 or newer, you can use the BENCHMARK macro with template parameters: +BENCHMARK(BM_Sequential<WaitQueue<int>>)->Range(1<<0, 1<<10); + +``` + +Three macros are provided for adding benchmark templates. + +```c++ +#ifdef BENCHMARK_HAS_CXX11 +#define BENCHMARK(func<...>) // Takes any number of parameters. +#else // C++ < C++11 +#define BENCHMARK_TEMPLATE(func, arg1) +#endif +#define BENCHMARK_TEMPLATE1(func, arg1) +#define BENCHMARK_TEMPLATE2(func, arg1, arg2) +``` + +<a name="fixtures" /> + +## Fixtures + +Fixture tests are created by first defining a type that derives from +`::benchmark::Fixture` and then creating/registering the tests using the +following macros: + +* `BENCHMARK_F(ClassName, Method)` +* `BENCHMARK_DEFINE_F(ClassName, Method)` +* `BENCHMARK_REGISTER_F(ClassName, Method)` + +For Example: + +```c++ +class MyFixture : public benchmark::Fixture { +public: + void SetUp(const ::benchmark::State& state) { + } + + void TearDown(const ::benchmark::State& state) { + } +}; + +BENCHMARK_F(MyFixture, FooTest)(benchmark::State& st) { + for (auto _ : st) { + ... + } +} + +BENCHMARK_DEFINE_F(MyFixture, BarTest)(benchmark::State& st) { + for (auto _ : st) { + ... + } +} +/* BarTest is NOT registered */ +BENCHMARK_REGISTER_F(MyFixture, BarTest)->Threads(2); +/* BarTest is now registered */ +``` + +### Templated Fixtures + +Also you can create templated fixture by using the following macros: + +* `BENCHMARK_TEMPLATE_F(ClassName, Method, ...)` +* `BENCHMARK_TEMPLATE_DEFINE_F(ClassName, Method, ...)` + +For example: + +```c++ +template<typename T> +class MyFixture : public benchmark::Fixture {}; + +BENCHMARK_TEMPLATE_F(MyFixture, IntTest, int)(benchmark::State& st) { + for (auto _ : st) { + ... + } +} + +BENCHMARK_TEMPLATE_DEFINE_F(MyFixture, DoubleTest, double)(benchmark::State& st) { + for (auto _ : st) { + ... + } +} + +BENCHMARK_REGISTER_F(MyFixture, DoubleTest)->Threads(2); +``` + +<a name="custom-counters" /> + +## Custom Counters + +You can add your own counters with user-defined names. The example below +will add columns "Foo", "Bar" and "Baz" in its output: + +```c++ +static void UserCountersExample1(benchmark::State& state) { + double numFoos = 0, numBars = 0, numBazs = 0; + for (auto _ : state) { + // ... count Foo,Bar,Baz events + } + state.counters["Foo"] = numFoos; + state.counters["Bar"] = numBars; + state.counters["Baz"] = numBazs; +} +``` + +The `state.counters` object is a `std::map` with `std::string` keys +and `Counter` values. The latter is a `double`-like class, via an implicit +conversion to `double&`. Thus you can use all of the standard arithmetic +assignment operators (`=,+=,-=,*=,/=`) to change the value of each counter. + +In multithreaded benchmarks, each counter is set on the calling thread only. +When the benchmark finishes, the counters from each thread will be summed; +the resulting sum is the value which will be shown for the benchmark. + +The `Counter` constructor accepts three parameters: the value as a `double` +; a bit flag which allows you to show counters as rates, and/or as per-thread +iteration, and/or as per-thread averages, and/or iteration invariants, +and/or finally inverting the result; and a flag specifying the 'unit' - i.e. +is 1k a 1000 (default, `benchmark::Counter::OneK::kIs1000`), or 1024 +(`benchmark::Counter::OneK::kIs1024`)? + +```c++ + // sets a simple counter + state.counters["Foo"] = numFoos; + + // Set the counter as a rate. It will be presented divided + // by the duration of the benchmark. + // Meaning: per one second, how many 'foo's are processed? + state.counters["FooRate"] = Counter(numFoos, benchmark::Counter::kIsRate); + + // Set the counter as a rate. It will be presented divided + // by the duration of the benchmark, and the result inverted. + // Meaning: how many seconds it takes to process one 'foo'? + state.counters["FooInvRate"] = Counter(numFoos, benchmark::Counter::kIsRate | benchmark::Counter::kInvert); + + // Set the counter as a thread-average quantity. It will + // be presented divided by the number of threads. + state.counters["FooAvg"] = Counter(numFoos, benchmark::Counter::kAvgThreads); + + // There's also a combined flag: + state.counters["FooAvgRate"] = Counter(numFoos,benchmark::Counter::kAvgThreadsRate); + + // This says that we process with the rate of state.range(0) bytes every iteration: + state.counters["BytesProcessed"] = Counter(state.range(0), benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::OneK::kIs1024); +``` + +When you're compiling in C++11 mode or later you can use `insert()` with +`std::initializer_list`: + +<!-- {% raw %} --> +```c++ + // With C++11, this can be done: + state.counters.insert({{"Foo", numFoos}, {"Bar", numBars}, {"Baz", numBazs}}); + // ... instead of: + state.counters["Foo"] = numFoos; + state.counters["Bar"] = numBars; + state.counters["Baz"] = numBazs; +``` +<!-- {% endraw %} --> + +### Counter Reporting + +When using the console reporter, by default, user counters are printed at +the end after the table, the same way as ``bytes_processed`` and +``items_processed``. This is best for cases in which there are few counters, +or where there are only a couple of lines per benchmark. Here's an example of +the default output: + +``` +------------------------------------------------------------------------------ +Benchmark Time CPU Iterations UserCounters... +------------------------------------------------------------------------------ +BM_UserCounter/threads:8 2248 ns 10277 ns 68808 Bar=16 Bat=40 Baz=24 Foo=8 +BM_UserCounter/threads:1 9797 ns 9788 ns 71523 Bar=2 Bat=5 Baz=3 Foo=1024m +BM_UserCounter/threads:2 4924 ns 9842 ns 71036 Bar=4 Bat=10 Baz=6 Foo=2 +BM_UserCounter/threads:4 2589 ns 10284 ns 68012 Bar=8 Bat=20 Baz=12 Foo=4 +BM_UserCounter/threads:8 2212 ns 10287 ns 68040 Bar=16 Bat=40 Baz=24 Foo=8 +BM_UserCounter/threads:16 1782 ns 10278 ns 68144 Bar=32 Bat=80 Baz=48 Foo=16 +BM_UserCounter/threads:32 1291 ns 10296 ns 68256 Bar=64 Bat=160 Baz=96 Foo=32 +BM_UserCounter/threads:4 2615 ns 10307 ns 68040 Bar=8 Bat=20 Baz=12 Foo=4 +BM_Factorial 26 ns 26 ns 26608979 40320 +BM_Factorial/real_time 26 ns 26 ns 26587936 40320 +BM_CalculatePiRange/1 16 ns 16 ns 45704255 0 +BM_CalculatePiRange/8 73 ns 73 ns 9520927 3.28374 +BM_CalculatePiRange/64 609 ns 609 ns 1140647 3.15746 +BM_CalculatePiRange/512 4900 ns 4901 ns 142696 3.14355 +``` + +If this doesn't suit you, you can print each counter as a table column by +passing the flag `--benchmark_counters_tabular=true` to the benchmark +application. This is best for cases in which there are a lot of counters, or +a lot of lines per individual benchmark. Note that this will trigger a +reprinting of the table header any time the counter set changes between +individual benchmarks. Here's an example of corresponding output when +`--benchmark_counters_tabular=true` is passed: + +``` +--------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations Bar Bat Baz Foo +--------------------------------------------------------------------------------------- +BM_UserCounter/threads:8 2198 ns 9953 ns 70688 16 40 24 8 +BM_UserCounter/threads:1 9504 ns 9504 ns 73787 2 5 3 1 +BM_UserCounter/threads:2 4775 ns 9550 ns 72606 4 10 6 2 +BM_UserCounter/threads:4 2508 ns 9951 ns 70332 8 20 12 4 +BM_UserCounter/threads:8 2055 ns 9933 ns 70344 16 40 24 8 +BM_UserCounter/threads:16 1610 ns 9946 ns 70720 32 80 48 16 +BM_UserCounter/threads:32 1192 ns 9948 ns 70496 64 160 96 32 +BM_UserCounter/threads:4 2506 ns 9949 ns 70332 8 20 12 4 +-------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------- +BM_Factorial 26 ns 26 ns 26392245 40320 +BM_Factorial/real_time 26 ns 26 ns 26494107 40320 +BM_CalculatePiRange/1 15 ns 15 ns 45571597 0 +BM_CalculatePiRange/8 74 ns 74 ns 9450212 3.28374 +BM_CalculatePiRange/64 595 ns 595 ns 1173901 3.15746 +BM_CalculatePiRange/512 4752 ns 4752 ns 147380 3.14355 +BM_CalculatePiRange/4k 37970 ns 37972 ns 18453 3.14184 +BM_CalculatePiRange/32k 303733 ns 303744 ns 2305 3.14162 +BM_CalculatePiRange/256k 2434095 ns 2434186 ns 288 3.1416 +BM_CalculatePiRange/1024k 9721140 ns 9721413 ns 71 3.14159 +BM_CalculatePi/threads:8 2255 ns 9943 ns 70936 +``` + +Note above the additional header printed when the benchmark changes from +``BM_UserCounter`` to ``BM_Factorial``. This is because ``BM_Factorial`` does +not have the same counter set as ``BM_UserCounter``. + +<a name="multithreaded-benchmarks"/> + +## Multithreaded Benchmarks + +In a multithreaded test (benchmark invoked by multiple threads simultaneously), +it is guaranteed that none of the threads will start until all have reached +the start of the benchmark loop, and all will have finished before any thread +exits the benchmark loop. (This behavior is also provided by the `KeepRunning()` +API) As such, any global setup or teardown can be wrapped in a check against the thread +index: + +```c++ +static void BM_MultiThreaded(benchmark::State& state) { + if (state.thread_index() == 0) { + // Setup code here. + } + for (auto _ : state) { + // Run the test as normal. + } + if (state.thread_index() == 0) { + // Teardown code here. + } +} +BENCHMARK(BM_MultiThreaded)->Threads(2); +``` + +To run the benchmark across a range of thread counts, instead of `Threads`, use +`ThreadRange`. This takes two parameters (`min_threads` and `max_threads`) and +runs the benchmark once for values in the inclusive range. For example: + +```c++ +BENCHMARK(BM_MultiThreaded)->ThreadRange(1, 8); +``` + +will run `BM_MultiThreaded` with thread counts 1, 2, 4, and 8. + +If the benchmarked code itself uses threads and you want to compare it to +single-threaded code, you may want to use real-time ("wallclock") measurements +for latency comparisons: + +```c++ +BENCHMARK(BM_test)->Range(8, 8<<10)->UseRealTime(); +``` + +Without `UseRealTime`, CPU time is used by default. + +<a name="cpu-timers" /> + +## CPU Timers + +By default, the CPU timer only measures the time spent by the main thread. +If the benchmark itself uses threads internally, this measurement may not +be what you are looking for. Instead, there is a way to measure the total +CPU usage of the process, by all the threads. + +```c++ +void callee(int i); + +static void MyMain(int size) { +#pragma omp parallel for + for(int i = 0; i < size; i++) + callee(i); +} + +static void BM_OpenMP(benchmark::State& state) { + for (auto _ : state) + MyMain(state.range(0)); +} + +// Measure the time spent by the main thread, use it to decide for how long to +// run the benchmark loop. Depending on the internal implementation detail may +// measure to anywhere from near-zero (the overhead spent before/after work +// handoff to worker thread[s]) to the whole single-thread time. +BENCHMARK(BM_OpenMP)->Range(8, 8<<10); + +// Measure the user-visible time, the wall clock (literally, the time that +// has passed on the clock on the wall), use it to decide for how long to +// run the benchmark loop. This will always be meaningful, and will match the +// time spent by the main thread in single-threaded case, in general decreasing +// with the number of internal threads doing the work. +BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->UseRealTime(); + +// Measure the total CPU consumption, use it to decide for how long to +// run the benchmark loop. This will always measure to no less than the +// time spent by the main thread in single-threaded case. +BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->MeasureProcessCPUTime(); + +// A mixture of the last two. Measure the total CPU consumption, but use the +// wall clock to decide for how long to run the benchmark loop. +BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->MeasureProcessCPUTime()->UseRealTime(); +``` + +### Controlling Timers + +Normally, the entire duration of the work loop (`for (auto _ : state) {}`) +is measured. But sometimes, it is necessary to do some work inside of +that loop, every iteration, but without counting that time to the benchmark time. +That is possible, although it is not recommended, since it has high overhead. + +<!-- {% raw %} --> +```c++ +static void BM_SetInsert_With_Timer_Control(benchmark::State& state) { + std::set<int> data; + for (auto _ : state) { + state.PauseTiming(); // Stop timers. They will not count until they are resumed. + data = ConstructRandomSet(state.range(0)); // Do something that should not be measured + state.ResumeTiming(); // And resume timers. They are now counting again. + // The rest will be measured. + for (int j = 0; j < state.range(1); ++j) + data.insert(RandomNumber()); + } +} +BENCHMARK(BM_SetInsert_With_Timer_Control)->Ranges({{1<<10, 8<<10}, {128, 512}}); +``` +<!-- {% endraw %} --> + +<a name="manual-timing" /> + +## Manual Timing + +For benchmarking something for which neither CPU time nor real-time are +correct or accurate enough, completely manual timing is supported using +the `UseManualTime` function. + +When `UseManualTime` is used, the benchmarked code must call +`SetIterationTime` once per iteration of the benchmark loop to +report the manually measured time. + +An example use case for this is benchmarking GPU execution (e.g. OpenCL +or CUDA kernels, OpenGL or Vulkan or Direct3D draw calls), which cannot +be accurately measured using CPU time or real-time. Instead, they can be +measured accurately using a dedicated API, and these measurement results +can be reported back with `SetIterationTime`. + +```c++ +static void BM_ManualTiming(benchmark::State& state) { + int microseconds = state.range(0); + std::chrono::duration<double, std::micro> sleep_duration { + static_cast<double>(microseconds) + }; + + for (auto _ : state) { + auto start = std::chrono::high_resolution_clock::now(); + // Simulate some useful workload with a sleep + std::this_thread::sleep_for(sleep_duration); + auto end = std::chrono::high_resolution_clock::now(); + + auto elapsed_seconds = + std::chrono::duration_cast<std::chrono::duration<double>>( + end - start); + + state.SetIterationTime(elapsed_seconds.count()); + } +} +BENCHMARK(BM_ManualTiming)->Range(1, 1<<17)->UseManualTime(); +``` + +<a name="setting-the-time-unit" /> + +## Setting the Time Unit + +If a benchmark runs a few milliseconds it may be hard to visually compare the +measured times, since the output data is given in nanoseconds per default. In +order to manually set the time unit, you can specify it manually: + +```c++ +BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); +``` + +Additionally the default time unit can be set globally with the +`--benchmark_time_unit={ns|us|ms|s}` command line argument. The argument only +affects benchmarks where the time unit is not set explicitly. + +<a name="preventing-optimization" /> + +## Preventing Optimization + +To prevent a value or expression from being optimized away by the compiler +the `benchmark::DoNotOptimize(...)` and `benchmark::ClobberMemory()` +functions can be used. + +```c++ +static void BM_test(benchmark::State& state) { + for (auto _ : state) { + int x = 0; + for (int i=0; i < 64; ++i) { + benchmark::DoNotOptimize(x += i); + } + } +} +``` + +`DoNotOptimize(<expr>)` forces the *result* of `<expr>` to be stored in either +memory or a register. For GNU based compilers it acts as read/write barrier +for global memory. More specifically it forces the compiler to flush pending +writes to memory and reload any other values as necessary. + +Note that `DoNotOptimize(<expr>)` does not prevent optimizations on `<expr>` +in any way. `<expr>` may even be removed entirely when the result is already +known. For example: + +```c++ + /* Example 1: `<expr>` is removed entirely. */ + int foo(int x) { return x + 42; } + while (...) DoNotOptimize(foo(0)); // Optimized to DoNotOptimize(42); + + /* Example 2: Result of '<expr>' is only reused */ + int bar(int) __attribute__((const)); + while (...) DoNotOptimize(bar(0)); // Optimized to: + // int __result__ = bar(0); + // while (...) DoNotOptimize(__result__); +``` + +The second tool for preventing optimizations is `ClobberMemory()`. In essence +`ClobberMemory()` forces the compiler to perform all pending writes to global +memory. Memory managed by block scope objects must be "escaped" using +`DoNotOptimize(...)` before it can be clobbered. In the below example +`ClobberMemory()` prevents the call to `v.push_back(42)` from being optimized +away. + +```c++ +static void BM_vector_push_back(benchmark::State& state) { + for (auto _ : state) { + std::vector<int> v; + v.reserve(1); + auto data = v.data(); // Allow v.data() to be clobbered. Pass as non-const + benchmark::DoNotOptimize(data); // lvalue to avoid undesired compiler optimizations + v.push_back(42); + benchmark::ClobberMemory(); // Force 42 to be written to memory. + } +} +``` + +Note that `ClobberMemory()` is only available for GNU or MSVC based compilers. + +<a name="reporting-statistics" /> + +## Statistics: Reporting the Mean, Median and Standard Deviation / Coefficient of variation of Repeated Benchmarks + +By default each benchmark is run once and that single result is reported. +However benchmarks are often noisy and a single result may not be representative +of the overall behavior. For this reason it's possible to repeatedly rerun the +benchmark. + +The number of runs of each benchmark is specified globally by the +`--benchmark_repetitions` flag or on a per benchmark basis by calling +`Repetitions` on the registered benchmark object. When a benchmark is run more +than once the mean, median, standard deviation and coefficient of variation +of the runs will be reported. + +Additionally the `--benchmark_report_aggregates_only={true|false}`, +`--benchmark_display_aggregates_only={true|false}` flags or +`ReportAggregatesOnly(bool)`, `DisplayAggregatesOnly(bool)` functions can be +used to change how repeated tests are reported. By default the result of each +repeated run is reported. When `report aggregates only` option is `true`, +only the aggregates (i.e. mean, median, standard deviation and coefficient +of variation, maybe complexity measurements if they were requested) of the runs +is reported, to both the reporters - standard output (console), and the file. +However when only the `display aggregates only` option is `true`, +only the aggregates are displayed in the standard output, while the file +output still contains everything. +Calling `ReportAggregatesOnly(bool)` / `DisplayAggregatesOnly(bool)` on a +registered benchmark object overrides the value of the appropriate flag for that +benchmark. + +<a name="custom-statistics" /> + +## Custom Statistics + +While having these aggregates is nice, this may not be enough for everyone. +For example you may want to know what the largest observation is, e.g. because +you have some real-time constraints. This is easy. The following code will +specify a custom statistic to be calculated, defined by a lambda function. + +```c++ +void BM_spin_empty(benchmark::State& state) { + for (auto _ : state) { + for (int x = 0; x < state.range(0); ++x) { + benchmark::DoNotOptimize(x); + } + } +} + +BENCHMARK(BM_spin_empty) + ->ComputeStatistics("max", [](const std::vector<double>& v) -> double { + return *(std::max_element(std::begin(v), std::end(v))); + }) + ->Arg(512); +``` + +While usually the statistics produce values in time units, +you can also produce percentages: + +```c++ +void BM_spin_empty(benchmark::State& state) { + for (auto _ : state) { + for (int x = 0; x < state.range(0); ++x) { + benchmark::DoNotOptimize(x); + } + } +} + +BENCHMARK(BM_spin_empty) + ->ComputeStatistics("ratio", [](const std::vector<double>& v) -> double { + return std::begin(v) / std::end(v); + }, benchmark::StatisticUnit::kPercentage) + ->Arg(512); +``` + +<a name="memory-usage" /> + +## Memory Usage + +It's often useful to also track memory usage for benchmarks, alongside CPU +performance. For this reason, benchmark offers the `RegisterMemoryManager` +method that allows a custom `MemoryManager` to be injected. + +If set, the `MemoryManager::Start` and `MemoryManager::Stop` methods will be +called at the start and end of benchmark runs to allow user code to fill out +a report on the number of allocations, bytes used, etc. + +This data will then be reported alongside other performance data, currently +only when using JSON output. + +<a name="using-register-benchmark" /> + +## Using RegisterBenchmark(name, fn, args...) + +The `RegisterBenchmark(name, func, args...)` function provides an alternative +way to create and register benchmarks. +`RegisterBenchmark(name, func, args...)` creates, registers, and returns a +pointer to a new benchmark with the specified `name` that invokes +`func(st, args...)` where `st` is a `benchmark::State` object. + +Unlike the `BENCHMARK` registration macros, which can only be used at the global +scope, the `RegisterBenchmark` can be called anywhere. This allows for +benchmark tests to be registered programmatically. + +Additionally `RegisterBenchmark` allows any callable object to be registered +as a benchmark. Including capturing lambdas and function objects. + +For Example: +```c++ +auto BM_test = [](benchmark::State& st, auto Inputs) { /* ... */ }; + +int main(int argc, char** argv) { + for (auto& test_input : { /* ... */ }) + benchmark::RegisterBenchmark(test_input.name(), BM_test, test_input); + benchmark::Initialize(&argc, argv); + benchmark::RunSpecifiedBenchmarks(); + benchmark::Shutdown(); +} +``` + +<a name="exiting-with-an-error" /> + +## Exiting with an Error + +When errors caused by external influences, such as file I/O and network +communication, occur within a benchmark the +`State::SkipWithError(const std::string& msg)` function can be used to skip that run +of benchmark and report the error. Note that only future iterations of the +`KeepRunning()` are skipped. For the ranged-for version of the benchmark loop +Users must explicitly exit the loop, otherwise all iterations will be performed. +Users may explicitly return to exit the benchmark immediately. + +The `SkipWithError(...)` function may be used at any point within the benchmark, +including before and after the benchmark loop. Moreover, if `SkipWithError(...)` +has been used, it is not required to reach the benchmark loop and one may return +from the benchmark function early. + +For example: + +```c++ +static void BM_test(benchmark::State& state) { + auto resource = GetResource(); + if (!resource.good()) { + state.SkipWithError("Resource is not good!"); + // KeepRunning() loop will not be entered. + } + while (state.KeepRunning()) { + auto data = resource.read_data(); + if (!resource.good()) { + state.SkipWithError("Failed to read data!"); + break; // Needed to skip the rest of the iteration. + } + do_stuff(data); + } +} + +static void BM_test_ranged_fo(benchmark::State & state) { + auto resource = GetResource(); + if (!resource.good()) { + state.SkipWithError("Resource is not good!"); + return; // Early return is allowed when SkipWithError() has been used. + } + for (auto _ : state) { + auto data = resource.read_data(); + if (!resource.good()) { + state.SkipWithError("Failed to read data!"); + break; // REQUIRED to prevent all further iterations. + } + do_stuff(data); + } +} +``` +<a name="a-faster-keep-running-loop" /> + +## A Faster KeepRunning Loop + +In C++11 mode, a ranged-based for loop should be used in preference to +the `KeepRunning` loop for running the benchmarks. For example: + +```c++ +static void BM_Fast(benchmark::State &state) { + for (auto _ : state) { + FastOperation(); + } +} +BENCHMARK(BM_Fast); +``` + +The reason the ranged-for loop is faster than using `KeepRunning`, is +because `KeepRunning` requires a memory load and store of the iteration count +ever iteration, whereas the ranged-for variant is able to keep the iteration count +in a register. + +For example, an empty inner loop of using the ranged-based for method looks like: + +```asm +# Loop Init + mov rbx, qword ptr [r14 + 104] + call benchmark::State::StartKeepRunning() + test rbx, rbx + je .LoopEnd +.LoopHeader: # =>This Inner Loop Header: Depth=1 + add rbx, -1 + jne .LoopHeader +.LoopEnd: +``` + +Compared to an empty `KeepRunning` loop, which looks like: + +```asm +.LoopHeader: # in Loop: Header=BB0_3 Depth=1 + cmp byte ptr [rbx], 1 + jne .LoopInit +.LoopBody: # =>This Inner Loop Header: Depth=1 + mov rax, qword ptr [rbx + 8] + lea rcx, [rax + 1] + mov qword ptr [rbx + 8], rcx + cmp rax, qword ptr [rbx + 104] + jb .LoopHeader + jmp .LoopEnd +.LoopInit: + mov rdi, rbx + call benchmark::State::StartKeepRunning() + jmp .LoopBody +.LoopEnd: +``` + +Unless C++03 compatibility is required, the ranged-for variant of writing +the benchmark loop should be preferred. + +<a name="disabling-cpu-frequency-scaling" /> + +## Disabling CPU Frequency Scaling + +If you see this error: + +``` +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may +be noisy and will incur extra overhead. +``` + +you might want to disable the CPU frequency scaling while running the +benchmark, as well as consider other ways to stabilize the performance of +your system while benchmarking. + +See [Reducing Variance](reducing_variance.md) for more information. diff --git a/include/benchmark/benchmark.h b/include/benchmark/benchmark.h index f57e3e7..e3857e7 100644 --- a/include/benchmark/benchmark.h +++ b/include/benchmark/benchmark.h @@ -34,7 +34,7 @@ static void BM_StringCopy(benchmark::State& state) { BENCHMARK(BM_StringCopy); // Augment the main() program to invoke benchmarks if specified -// via the --benchmarks command line flag. E.g., +// via the --benchmark_filter command line flag. E.g., // my_unittest --benchmark_filter=all // my_unittest --benchmark_filter=BM_StringCreation // my_unittest --benchmark_filter=String @@ -42,6 +42,7 @@ BENCHMARK(BM_StringCopy); int main(int argc, char** argv) { benchmark::Initialize(&argc, argv); benchmark::RunSpecifiedBenchmarks(); + benchmark::Shutdown(); return 0; } @@ -139,13 +140,13 @@ thread exits the loop body. As such, any global setup or teardown you want to do can be wrapped in a check against the thread index: static void BM_MultiThreaded(benchmark::State& state) { - if (state.thread_index == 0) { + if (state.thread_index() == 0) { // Setup code here. } for (auto _ : state) { // Run the test as normal. } - if (state.thread_index == 0) { + if (state.thread_index() == 0) { // Teardown code here. } } @@ -167,19 +168,29 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); #define BENCHMARK_HAS_CXX11 #endif +// This _MSC_VER check should detect VS 2017 v15.3 and newer. +#if __cplusplus >= 201703L || \ + (defined(_MSC_VER) && _MSC_VER >= 1911 && _MSVC_LANG >= 201703L) +#define BENCHMARK_HAS_CXX17 +#endif + #include <stdint.h> #include <algorithm> #include <cassert> #include <cstddef> #include <iosfwd> +#include <limits> #include <map> #include <set> #include <string> #include <utility> #include <vector> +#include "benchmark/export.h" + #if defined(BENCHMARK_HAS_CXX11) +#include <atomic> #include <initializer_list> #include <type_traits> #include <utility> @@ -199,42 +210,63 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); TypeName& operator=(const TypeName&) = delete #endif -#if defined(__GNUC__) +#ifdef BENCHMARK_HAS_CXX17 +#define BENCHMARK_UNUSED [[maybe_unused]] +#elif defined(__GNUC__) || defined(__clang__) #define BENCHMARK_UNUSED __attribute__((unused)) -#define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline)) -#define BENCHMARK_NOEXCEPT noexcept -#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x) -#elif defined(_MSC_VER) && !defined(__clang__) +#else #define BENCHMARK_UNUSED -#define BENCHMARK_ALWAYS_INLINE __forceinline -#if _MSC_VER >= 1900 -#define BENCHMARK_NOEXCEPT noexcept -#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x) +#endif + +// Used to annotate functions, methods and classes so they +// are not optimized by the compiler. Useful for tests +// where you expect loops to stay in place churning cycles +#if defined(__clang__) +#define BENCHMARK_DONT_OPTIMIZE __attribute__((optnone)) +#elif defined(__GNUC__) || defined(__GNUG__) +#define BENCHMARK_DONT_OPTIMIZE __attribute__((optimize(0))) #else -#define BENCHMARK_NOEXCEPT -#define BENCHMARK_NOEXCEPT_OP(x) +// MSVC & Intel do not have a no-optimize attribute, only line pragmas +#define BENCHMARK_DONT_OPTIMIZE #endif + +#if defined(__GNUC__) || defined(__clang__) +#define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline)) +#elif defined(_MSC_VER) && !defined(__clang__) +#define BENCHMARK_ALWAYS_INLINE __forceinline #define __func__ __FUNCTION__ #else -#define BENCHMARK_UNUSED #define BENCHMARK_ALWAYS_INLINE -#define BENCHMARK_NOEXCEPT -#define BENCHMARK_NOEXCEPT_OP(x) #endif #define BENCHMARK_INTERNAL_TOSTRING2(x) #x #define BENCHMARK_INTERNAL_TOSTRING(x) BENCHMARK_INTERNAL_TOSTRING2(x) -#if defined(__GNUC__) || defined(__clang__) +// clang-format off +#if (defined(__GNUC__) && !defined(__NVCC__) && !defined(__NVCOMPILER)) || defined(__clang__) #define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y) #define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg))) +#define BENCHMARK_DISABLE_DEPRECATED_WARNING \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#define BENCHMARK_RESTORE_DEPRECATED_WARNING _Pragma("GCC diagnostic pop") +#elif defined(__NVCOMPILER) +#define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y) +#define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg))) +#define BENCHMARK_DISABLE_DEPRECATED_WARNING \ + _Pragma("diagnostic push") \ + _Pragma("diag_suppress deprecated_entity_with_custom_message") +#define BENCHMARK_RESTORE_DEPRECATED_WARNING _Pragma("diagnostic pop") #else #define BENCHMARK_BUILTIN_EXPECT(x, y) x #define BENCHMARK_DEPRECATED_MSG(msg) #define BENCHMARK_WARNING_MSG(msg) \ __pragma(message(__FILE__ "(" BENCHMARK_INTERNAL_TOSTRING( \ __LINE__) ") : warning note: " msg)) +#define BENCHMARK_DISABLE_DEPRECATED_WARNING +#define BENCHMARK_RESTORE_DEPRECATED_WARNING #endif +// clang-format on #if defined(__GNUC__) && !defined(__clang__) #define BENCHMARK_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) @@ -252,21 +284,60 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); #define BENCHMARK_UNREACHABLE() ((void)0) #endif +#ifdef BENCHMARK_HAS_CXX11 +#define BENCHMARK_OVERRIDE override +#else +#define BENCHMARK_OVERRIDE +#endif + +#if defined(_MSC_VER) +#pragma warning(push) +// C4251: <symbol> needs to have dll-interface to be used by clients of class +#pragma warning(disable : 4251) +#endif + namespace benchmark { class BenchmarkReporter; -class MemoryManager; -void Initialize(int* argc, char** argv); +// Default number of minimum benchmark running time in seconds. +const char kDefaultMinTimeStr[] = "0.5s"; + +BENCHMARK_EXPORT void PrintDefaultHelp(); + +BENCHMARK_EXPORT void Initialize(int* argc, char** argv, + void (*HelperPrinterf)() = PrintDefaultHelp); +BENCHMARK_EXPORT void Shutdown(); // Report to stdout all arguments in 'argv' as unrecognized except the first. // Returns true there is at least on unrecognized argument (i.e. 'argc' > 1). -bool ReportUnrecognizedArguments(int argc, char** argv); +BENCHMARK_EXPORT bool ReportUnrecognizedArguments(int argc, char** argv); + +// Returns the current value of --benchmark_filter. +BENCHMARK_EXPORT std::string GetBenchmarkFilter(); + +// Sets a new value to --benchmark_filter. (This will override this flag's +// current value). +// Should be called after `benchmark::Initialize()`, as +// `benchmark::Initialize()` will override the flag's value. +BENCHMARK_EXPORT void SetBenchmarkFilter(std::string value); + +// Returns the current value of --v (command line value for verbosity). +BENCHMARK_EXPORT int32_t GetBenchmarkVerbosity(); + +// Creates a default display reporter. Used by the library when no display +// reporter is provided, but also made available for external use in case a +// custom reporter should respect the `--benchmark_format` flag as a fallback +BENCHMARK_EXPORT BenchmarkReporter* CreateDefaultDisplayReporter(); // Generate a list of benchmarks matching the specified --benchmark_filter flag // and if --benchmark_list_tests is specified return after printing the name // of each matching benchmark. Otherwise run each matching benchmark and // report the results. // +// spec : Specify the benchmarks to run. If users do not specify this arg, +// then the value of FLAGS_benchmark_filter +// will be used. +// // The second and third overload use the specified 'display_reporter' and // 'file_reporter' respectively. 'file_reporter' will write to the file // specified @@ -274,28 +345,94 @@ bool ReportUnrecognizedArguments(int argc, char** argv); // 'file_reporter' is ignored. // // RETURNS: The number of matching benchmarks. -size_t RunSpecifiedBenchmarks(); -size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter); -size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, - BenchmarkReporter* file_reporter); +BENCHMARK_EXPORT size_t RunSpecifiedBenchmarks(); +BENCHMARK_EXPORT size_t RunSpecifiedBenchmarks(std::string spec); + +BENCHMARK_EXPORT size_t +RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter); +BENCHMARK_EXPORT size_t +RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, std::string spec); + +BENCHMARK_EXPORT size_t RunSpecifiedBenchmarks( + BenchmarkReporter* display_reporter, BenchmarkReporter* file_reporter); +BENCHMARK_EXPORT size_t +RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, + BenchmarkReporter* file_reporter, std::string spec); + +// TimeUnit is passed to a benchmark in order to specify the order of magnitude +// for the measured time. +enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond, kSecond }; + +BENCHMARK_EXPORT TimeUnit GetDefaultTimeUnit(); + +// Sets the default time unit the benchmarks use +// Has to be called before the benchmark loop to take effect +BENCHMARK_EXPORT void SetDefaultTimeUnit(TimeUnit unit); + +// If a MemoryManager is registered (via RegisterMemoryManager()), +// it can be used to collect and report allocation metrics for a run of the +// benchmark. +class MemoryManager { + public: + static const int64_t TombstoneValue; + + struct Result { + Result() + : num_allocs(0), + max_bytes_used(0), + total_allocated_bytes(TombstoneValue), + net_heap_growth(TombstoneValue) {} + + // The number of allocations made in total between Start and Stop. + int64_t num_allocs; + + // The peak memory use between Start and Stop. + int64_t max_bytes_used; + + // The total memory allocated, in bytes, between Start and Stop. + // Init'ed to TombstoneValue if metric not available. + int64_t total_allocated_bytes; + + // The net changes in memory, in bytes, between Start and Stop. + // ie., total_allocated_bytes - total_deallocated_bytes. + // Init'ed to TombstoneValue if metric not available. + int64_t net_heap_growth; + }; + + virtual ~MemoryManager() {} + + // Implement this to start recording allocation information. + virtual void Start() = 0; + + // Implement this to stop recording and fill out the given Result structure. + virtual void Stop(Result& result) = 0; +}; // Register a MemoryManager instance that will be used to collect and report // allocation measurements for benchmark runs. +BENCHMARK_EXPORT void RegisterMemoryManager(MemoryManager* memory_manager); +// Add a key-value pair to output as part of the context stanza in the report. +BENCHMARK_EXPORT +void AddCustomContext(const std::string& key, const std::string& value); + namespace internal { class Benchmark; class BenchmarkImp; class BenchmarkFamilies; +BENCHMARK_EXPORT std::map<std::string, std::string>*& GetGlobalContext(); + +BENCHMARK_EXPORT void UseCharPointer(char const volatile*); // Take ownership of the pointer and register the benchmark. Return the // registered benchmark. -Benchmark* RegisterBenchmarkInternal(Benchmark*); +BENCHMARK_EXPORT Benchmark* RegisterBenchmarkInternal(Benchmark*); // Ensure that the standard streams are properly initialized in every TU. -int InitializeStreams(); +BENCHMARK_EXPORT int InitializeStreams(); BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams(); } // namespace internal @@ -305,12 +442,24 @@ BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams(); #define BENCHMARK_HAS_NO_INLINE_ASSEMBLY #endif +// Force the compiler to flush pending writes to global memory. Acts as an +// effective read/write barrier +#ifdef BENCHMARK_HAS_CXX11 +inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { + std::atomic_signal_fence(std::memory_order_acq_rel); +} +#endif + // The DoNotOptimize(...) function can be used to prevent a value or // expression from being optimized away by the compiler. This function is // intended to add little to no overhead. // See: https://youtu.be/nXaxk27zwlk?t=2441 #ifndef BENCHMARK_HAS_NO_INLINE_ASSEMBLY +#if !defined(__GNUC__) || defined(__llvm__) || defined(__INTEL_COMPILER) template <class Tp> +BENCHMARK_DEPRECATED_MSG( + "The const-ref version of this method can permit " + "undesired compiler optimizations in benchmarks") inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { asm volatile("" : : "r,m"(value) : "memory"); } @@ -324,25 +473,125 @@ inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) { #endif } -// Force the compiler to flush pending writes to global memory. Acts as an -// effective read/write barrier +#ifdef BENCHMARK_HAS_CXX11 +template <class Tp> +inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp&& value) { +#if defined(__clang__) + asm volatile("" : "+r,m"(value) : : "memory"); +#else + asm volatile("" : "+m,r"(value) : : "memory"); +#endif +} +#endif +#elif defined(BENCHMARK_HAS_CXX11) && (__GNUC__ >= 5) +// Workaround for a bug with full argument copy overhead with GCC. +// See: #1340 and https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105519 +template <class Tp> +BENCHMARK_DEPRECATED_MSG( + "The const-ref version of this method can permit " + "undesired compiler optimizations in benchmarks") +inline BENCHMARK_ALWAYS_INLINE + typename std::enable_if<std::is_trivially_copyable<Tp>::value && + (sizeof(Tp) <= sizeof(Tp*))>::type + DoNotOptimize(Tp const& value) { + asm volatile("" : : "r,m"(value) : "memory"); +} + +template <class Tp> +BENCHMARK_DEPRECATED_MSG( + "The const-ref version of this method can permit " + "undesired compiler optimizations in benchmarks") +inline BENCHMARK_ALWAYS_INLINE + typename std::enable_if<!std::is_trivially_copyable<Tp>::value || + (sizeof(Tp) > sizeof(Tp*))>::type + DoNotOptimize(Tp const& value) { + asm volatile("" : : "m"(value) : "memory"); +} + +template <class Tp> +inline BENCHMARK_ALWAYS_INLINE + typename std::enable_if<std::is_trivially_copyable<Tp>::value && + (sizeof(Tp) <= sizeof(Tp*))>::type + DoNotOptimize(Tp& value) { + asm volatile("" : "+m,r"(value) : : "memory"); +} + +template <class Tp> +inline BENCHMARK_ALWAYS_INLINE + typename std::enable_if<!std::is_trivially_copyable<Tp>::value || + (sizeof(Tp) > sizeof(Tp*))>::type + DoNotOptimize(Tp& value) { + asm volatile("" : "+m"(value) : : "memory"); +} + +template <class Tp> +inline BENCHMARK_ALWAYS_INLINE + typename std::enable_if<std::is_trivially_copyable<Tp>::value && + (sizeof(Tp) <= sizeof(Tp*))>::type + DoNotOptimize(Tp&& value) { + asm volatile("" : "+m,r"(value) : : "memory"); +} + +template <class Tp> +inline BENCHMARK_ALWAYS_INLINE + typename std::enable_if<!std::is_trivially_copyable<Tp>::value || + (sizeof(Tp) > sizeof(Tp*))>::type + DoNotOptimize(Tp&& value) { + asm volatile("" : "+m"(value) : : "memory"); +} + +#else +// Fallback for GCC < 5. Can add some overhead because the compiler is forced +// to use memory operations instead of operations with registers. +// TODO: Remove if GCC < 5 will be unsupported. +template <class Tp> +BENCHMARK_DEPRECATED_MSG( + "The const-ref version of this method can permit " + "undesired compiler optimizations in benchmarks") +inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { + asm volatile("" : : "m"(value) : "memory"); +} + +template <class Tp> +inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) { + asm volatile("" : "+m"(value) : : "memory"); +} + +#ifdef BENCHMARK_HAS_CXX11 +template <class Tp> +inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp&& value) { + asm volatile("" : "+m"(value) : : "memory"); +} +#endif +#endif + +#ifndef BENCHMARK_HAS_CXX11 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { asm volatile("" : : : "memory"); } +#endif #elif defined(_MSC_VER) template <class Tp> +BENCHMARK_DEPRECATED_MSG( + "The const-ref version of this method can permit " + "undesired compiler optimizations in benchmarks") inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value)); _ReadWriteBarrier(); } +#ifndef BENCHMARK_HAS_CXX11 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { _ReadWriteBarrier(); } +#endif #else template <class Tp> +BENCHMARK_DEPRECATED_MSG( + "The const-ref version of this method can permit " + "undesired compiler optimizations in benchmarks") inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value)); } -// FIXME Add ClobberMemory() for non-gnu and non-msvc compilers +// FIXME Add ClobberMemory() for non-gnu and non-msvc compilers, before C++11. #endif // This class is used for user-defined counters. @@ -352,27 +601,27 @@ class Counter { kDefaults = 0, // Mark the counter as a rate. It will be presented divided // by the duration of the benchmark. - kIsRate = 1U << 0U, + kIsRate = 1 << 0, // Mark the counter as a thread-average quantity. It will be // presented divided by the number of threads. - kAvgThreads = 1U << 1U, + kAvgThreads = 1 << 1, // Mark the counter as a thread-average rate. See above. kAvgThreadsRate = kIsRate | kAvgThreads, // Mark the counter as a constant value, valid/same for *every* iteration. // When reporting, it will be *multiplied* by the iteration count. - kIsIterationInvariant = 1U << 2U, + kIsIterationInvariant = 1 << 2, // Mark the counter as a constant rate. // When reporting, it will be *multiplied* by the iteration count // and then divided by the duration of the benchmark. kIsIterationInvariantRate = kIsRate | kIsIterationInvariant, // Mark the counter as a iteration-average quantity. // It will be presented divided by the number of iterations. - kAvgIterations = 1U << 3U, + kAvgIterations = 1 << 3, // Mark the counter as a iteration-average rate. See above. kAvgIterationsRate = kIsRate | kAvgIterations, // In the end, invert the result. This is always done last! - kInvert = 1U << 31U + kInvert = 1 << 31 }; enum OneK { @@ -390,7 +639,7 @@ class Counter { Counter(double v = 0., Flags f = kDefaults, OneK k = kIs1000) : value(v), flags(f), oneK(k) {} - BENCHMARK_ALWAYS_INLINE operator double const&() const { return value; } + BENCHMARK_ALWAYS_INLINE operator double const &() const { return value; } BENCHMARK_ALWAYS_INLINE operator double&() { return value; } }; @@ -405,17 +654,15 @@ Counter::Flags inline operator|(const Counter::Flags& LHS, // This is the container for the user-defined counters. typedef std::map<std::string, Counter> UserCounters; -// TimeUnit is passed to a benchmark in order to specify the order of magnitude -// for the measured time. -enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond, kSecond }; - // BigO is passed to a benchmark in order to specify the asymptotic // computational // complexity for the benchmark. In case oAuto is selected, complexity will be // calculated automatically to the best fit. enum BigO { oNone, o1, oN, oNSquared, oNCubed, oLogN, oNLogN, oAuto, oLambda }; -typedef uint64_t IterationCount; +typedef int64_t IterationCount; + +enum StatisticUnit { kTime, kPercentage }; // BigOFunc is passed to a benchmark in order to specify the asymptotic // computational complexity for the benchmark. @@ -429,14 +676,17 @@ namespace internal { struct Statistics { std::string name_; StatisticsFunc* compute_; + StatisticUnit unit_; - Statistics(const std::string& name, StatisticsFunc* compute) - : name_(name), compute_(compute) {} + Statistics(const std::string& name, StatisticsFunc* compute, + StatisticUnit unit = kTime) + : name_(name), compute_(compute), unit_(unit) {} }; -struct BenchmarkInstance; +class BenchmarkInstance; class ThreadTimer; class ThreadManager; +class PerfCountersMeasurement; enum AggregationReportMode #if defined(BENCHMARK_HAS_CXX11) @@ -458,11 +708,21 @@ enum AggregationReportMode ARM_FileReportAggregatesOnly | ARM_DisplayReportAggregatesOnly }; +enum Skipped +#if defined(BENCHMARK_HAS_CXX11) + : unsigned +#endif +{ + NotSkipped = 0, + SkippedWithMessage, + SkippedWithError +}; + } // namespace internal // State is passed to a running Benchmark and contains state for the // benchmark to use. -class State { +class BENCHMARK_EXPORT State { public: struct StateIterator; friend struct StateIterator; @@ -494,8 +754,8 @@ class State { // } bool KeepRunningBatch(IterationCount n); - // REQUIRES: timer is running and 'SkipWithError(...)' has not been called - // by the current thread. + // REQUIRES: timer is running and 'SkipWithMessage(...)' or + // 'SkipWithError(...)' has not been called by the current thread. // Stop the benchmark timer. If not called, the timer will be // automatically stopped after the last iteration of the benchmark loop. // @@ -510,8 +770,8 @@ class State { // within each benchmark iteration, if possible. void PauseTiming(); - // REQUIRES: timer is not running and 'SkipWithError(...)' has not been called - // by the current thread. + // REQUIRES: timer is not running and 'SkipWithMessage(...)' or + // 'SkipWithError(...)' has not been called by the current thread. // Start the benchmark timer. The timer is NOT running on entrance to the // benchmark function. It begins running after control flow enters the // benchmark loop. @@ -521,8 +781,30 @@ class State { // within each benchmark iteration, if possible. void ResumeTiming(); - // REQUIRES: 'SkipWithError(...)' has not been called previously by the - // current thread. + // REQUIRES: 'SkipWithMessage(...)' or 'SkipWithError(...)' has not been + // called previously by the current thread. + // Report the benchmark as resulting in being skipped with the specified + // 'msg'. + // After this call the user may explicitly 'return' from the benchmark. + // + // If the ranged-for style of benchmark loop is used, the user must explicitly + // break from the loop, otherwise all future iterations will be run. + // If the 'KeepRunning()' loop is used the current thread will automatically + // exit the loop at the end of the current iteration. + // + // For threaded benchmarks only the current thread stops executing and future + // calls to `KeepRunning()` will block until all threads have completed + // the `KeepRunning()` loop. If multiple threads report being skipped only the + // first skip message is used. + // + // NOTE: Calling 'SkipWithMessage(...)' does not cause the benchmark to exit + // the current scope immediately. If the function is called from within + // the 'KeepRunning()' loop the current iteration will finish. It is the users + // responsibility to exit the scope as needed. + void SkipWithMessage(const std::string& msg); + + // REQUIRES: 'SkipWithMessage(...)' or 'SkipWithError(...)' has not been + // called previously by the current thread. // Report the benchmark as resulting in an error with the specified 'msg'. // After this call the user may explicitly 'return' from the benchmark. // @@ -540,10 +822,13 @@ class State { // the current scope immediately. If the function is called from within // the 'KeepRunning()' loop the current iteration will finish. It is the users // responsibility to exit the scope as needed. - void SkipWithError(const char* msg); + void SkipWithError(const std::string& msg); + + // Returns true if 'SkipWithMessage(...)' or 'SkipWithError(...)' was called. + bool skipped() const { return internal::NotSkipped != skipped_; } // Returns true if an error has been reported with 'SkipWithError(...)'. - bool error_occurred() const { return error_occurred_; } + bool error_occurred() const { return internal::SkippedWithError == skipped_; } // REQUIRES: called exactly once per iteration of the benchmarking loop. // Set the manually measured time for this benchmark iteration, which @@ -614,11 +899,7 @@ class State { // BM_Compress 50 50 14115038 compress:27.3% // // REQUIRES: a benchmark has exited its benchmarking loop. - void SetLabel(const char* label); - - void BENCHMARK_ALWAYS_INLINE SetLabel(const std::string& str) { - this->SetLabel(str.c_str()); - } + void SetLabel(const std::string& label); // Range arguments for this run. CHECKs if the argument has been set. BENCHMARK_ALWAYS_INLINE @@ -633,6 +914,14 @@ class State { BENCHMARK_DEPRECATED_MSG("use 'range(1)' instead") int64_t range_y() const { return range(1); } + // Number of threads concurrently executing the benchmark. + BENCHMARK_ALWAYS_INLINE + int threads() const { return threads_; } + + // Index of the executing thread. Values from [0, threads). + BENCHMARK_ALWAYS_INLINE + int thread_index() const { return thread_index_; } + BENCHMARK_ALWAYS_INLINE IterationCount iterations() const { if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) { @@ -641,8 +930,11 @@ class State { return max_iterations - total_iterations_ + batch_leftover_; } - private - : // items we expect on the first cache line (ie 64 bytes of the struct) + BENCHMARK_ALWAYS_INLINE + std::string name() const { return name_; } + + private: + // items we expect on the first cache line (ie 64 bytes of the struct) // When total_iterations_ is 0, KeepRunning() and friends will return false. // May be larger than max_iterations. IterationCount total_iterations_; @@ -658,9 +950,9 @@ class State { private: bool started_; bool finished_; - bool error_occurred_; + internal::Skipped skipped_; - private: // items we don't need on the first cache line + // items we don't need on the first cache line std::vector<int64_t> range_; int64_t complexity_n_; @@ -668,25 +960,28 @@ class State { public: // Container for user-defined counters. UserCounters counters; - // Index of the executing thread. Values from [0, threads). - const int thread_index; - // Number of threads concurrently executing the benchmark. - const int threads; private: - State(IterationCount max_iters, const std::vector<int64_t>& ranges, - int thread_i, int n_threads, internal::ThreadTimer* timer, - internal::ThreadManager* manager); + State(std::string name, IterationCount max_iters, + const std::vector<int64_t>& ranges, int thread_i, int n_threads, + internal::ThreadTimer* timer, internal::ThreadManager* manager, + internal::PerfCountersMeasurement* perf_counters_measurement); void StartKeepRunning(); // Implementation of KeepRunning() and KeepRunningBatch(). // is_batch must be true unless n is 1. bool KeepRunningInternal(IterationCount n, bool is_batch); void FinishKeepRunning(); - internal::ThreadTimer* timer_; - internal::ThreadManager* manager_; - friend struct internal::BenchmarkInstance; + const std::string name_; + const int thread_index_; + const int threads_; + + internal::ThreadTimer* const timer_; + internal::ThreadManager* const manager_; + internal::PerfCountersMeasurement* const perf_counters_measurement_; + + friend class internal::BenchmarkInstance; }; inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunning() { @@ -710,7 +1005,7 @@ inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningInternal(IterationCount n, } if (!started_) { StartKeepRunning(); - if (!error_occurred_ && total_iterations_ >= n) { + if (!skipped() && total_iterations_ >= n) { total_iterations_ -= n; return true; } @@ -740,7 +1035,7 @@ struct State::StateIterator { BENCHMARK_ALWAYS_INLINE explicit StateIterator(State* st) - : cached_(st->error_occurred_ ? 0 : st->max_iterations), parent_(st) {} + : cached_(st->skipped() ? 0 : st->max_iterations), parent_(st) {} public: BENCHMARK_ALWAYS_INLINE @@ -783,13 +1078,16 @@ typedef void(Function)(State&); // be called on this object to change the properties of the benchmark. // Each method returns "this" so that multiple method calls can // chained into one expression. -class Benchmark { +class BENCHMARK_EXPORT Benchmark { public: virtual ~Benchmark(); // Note: the following methods all return "this" so that multiple // method calls can be chained together in one expression. + // Specify the name of the benchmark + Benchmark* Name(const std::string& name); + // Run this benchmark once with "x" as the extra argument passed // to the function. // REQUIRES: The function passed to the constructor must accept an arg1. @@ -850,6 +1148,23 @@ class Benchmark { return Ranges(ranges); } + // Have "setup" and/or "teardown" invoked once for every benchmark run. + // If the benchmark is multi-threaded (will run in k threads concurrently), + // the setup callback will be be invoked exactly once (not k times) before + // each run with k threads. Time allowing (e.g. for a short benchmark), there + // may be multiple such runs per benchmark, each run with its own + // "setup"/"teardown". + // + // If the benchmark uses different size groups of threads (e.g. via + // ThreadRange), the above will be true for each size group. + // + // The callback will be passed a State object, which includes the number + // of threads, thread-index, benchmark arguments, etc. + // + // The callback must not be NULL or self-deleting. + Benchmark* Setup(void (*setup)(const benchmark::State&)); + Benchmark* Teardown(void (*teardown)(const benchmark::State&)); + // Pass this benchmark object to *func, which can customize // the benchmark by calling various methods like Arg, Args, // Threads, etc. @@ -864,12 +1179,19 @@ class Benchmark { // REQUIRES: `t > 0` and `Iterations` has not been called on this benchmark. Benchmark* MinTime(double t); + // Set the minimum amount of time to run the benchmark before taking runtimes + // of this benchmark into account. This + // option overrides the `benchmark_min_warmup_time` flag. + // REQUIRES: `t >= 0` and `Iterations` has not been called on this benchmark. + Benchmark* MinWarmUpTime(double t); + // Specify the amount of iterations that should be run by this benchmark. + // This option overrides the `benchmark_min_time` flag. // REQUIRES: 'n > 0' and `MinTime` has not been called on this benchmark. // // NOTE: This function should only be used when *exact* iteration control is // needed and never to control or limit how long a benchmark runs, where - // `--benchmark_min_time=N` or `MinTime(...)` should be used instead. + // `--benchmark_min_time=<N>s` or `MinTime(...)` should be used instead. Benchmark* Iterations(IterationCount n); // Specify the amount of times to repeat this benchmark. This option overrides @@ -889,7 +1211,7 @@ class Benchmark { // By default, the CPU time is measured only for the main thread, which may // be unrepresentative if the benchmark uses threads internally. If called, // the total CPU time spent by all the threads will be measured instead. - // By default, the only the main thread CPU time will be measured. + // By default, only the main thread CPU time will be measured. Benchmark* MeasureProcessCPUTime(); // If a particular benchmark should use the Wall clock instead of the CPU time @@ -918,7 +1240,9 @@ class Benchmark { Benchmark* Complexity(BigOFunc* complexity); // Add this statistics to be computed over all the values of benchmark run - Benchmark* ComputeStatistics(std::string name, StatisticsFunc* statistics); + Benchmark* ComputeStatistics(const std::string& name, + StatisticsFunc* statistics, + StatisticUnit unit = kTime); // Support for running multiple copies of the same benchmark concurrently // in multiple threads. This may be useful when measuring the scaling @@ -952,23 +1276,32 @@ class Benchmark { virtual void Run(State& state) = 0; + TimeUnit GetTimeUnit() const; + protected: - explicit Benchmark(const char* name); - Benchmark(Benchmark const&); - void SetName(const char* name); + explicit Benchmark(const std::string& name); + void SetName(const std::string& name); + public: + const char* GetName() const; int ArgsCnt() const; + const char* GetArgName(int arg) const; private: friend class BenchmarkFamilies; + friend class BenchmarkInstance; std::string name_; AggregationReportMode aggregation_report_mode_; std::vector<std::string> arg_names_; // Args for all benchmark runs std::vector<std::vector<int64_t> > args_; // Args for all benchmark runs + TimeUnit time_unit_; + bool use_default_time_unit_; + int range_multiplier_; double min_time_; + double min_warmup_time_; IterationCount iterations_; int repetitions_; bool measure_process_cpu_time_; @@ -979,7 +1312,21 @@ class Benchmark { std::vector<Statistics> statistics_; std::vector<int> thread_counts_; - Benchmark& operator=(Benchmark const&); + typedef void (*callback_function)(const benchmark::State&); + callback_function setup_; + callback_function teardown_; + + Benchmark(Benchmark const&) +#if defined(BENCHMARK_HAS_CXX11) + = delete +#endif + ; + + Benchmark& operator=(Benchmark const&) +#if defined(BENCHMARK_HAS_CXX11) + = delete +#endif + ; }; } // namespace internal @@ -988,27 +1335,27 @@ class Benchmark { // the specified functor 'fn'. // // RETURNS: A pointer to the registered benchmark. -internal::Benchmark* RegisterBenchmark(const char* name, +internal::Benchmark* RegisterBenchmark(const std::string& name, internal::Function* fn); #if defined(BENCHMARK_HAS_CXX11) template <class Lambda> -internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn); +internal::Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn); #endif // Remove all registered benchmarks. All pointers to previously registered // benchmarks are invalidated. -void ClearRegisteredBenchmarks(); +BENCHMARK_EXPORT void ClearRegisteredBenchmarks(); namespace internal { // The class used to hold all Benchmarks created from static function. // (ie those created using the BENCHMARK(...) macros. -class FunctionBenchmark : public Benchmark { +class BENCHMARK_EXPORT FunctionBenchmark : public Benchmark { public: - FunctionBenchmark(const char* name, Function* func) + FunctionBenchmark(const std::string& name, Function* func) : Benchmark(name), func_(func) {} - virtual void Run(State& st); + void Run(State& st) BENCHMARK_OVERRIDE; private: Function* func_; @@ -1018,36 +1365,38 @@ class FunctionBenchmark : public Benchmark { template <class Lambda> class LambdaBenchmark : public Benchmark { public: - virtual void Run(State& st) { lambda_(st); } + void Run(State& st) BENCHMARK_OVERRIDE { lambda_(st); } private: template <class OLambda> - LambdaBenchmark(const char* name, OLambda&& lam) + LambdaBenchmark(const std::string& name, OLambda&& lam) : Benchmark(name), lambda_(std::forward<OLambda>(lam)) {} LambdaBenchmark(LambdaBenchmark const&) = delete; - private: - template <class Lam> - friend Benchmark* ::benchmark::RegisterBenchmark(const char*, Lam&&); + template <class Lam> // NOLINTNEXTLINE(readability-redundant-declaration) + friend Benchmark* ::benchmark::RegisterBenchmark(const std::string&, Lam&&); Lambda lambda_; }; #endif - } // namespace internal -inline internal::Benchmark* RegisterBenchmark(const char* name, +inline internal::Benchmark* RegisterBenchmark(const std::string& name, internal::Function* fn) { + // FIXME: this should be a `std::make_unique<>()` but we don't have C++14. + // codechecker_intentional [cplusplus.NewDeleteLeaks] return internal::RegisterBenchmarkInternal( ::new internal::FunctionBenchmark(name, fn)); } #ifdef BENCHMARK_HAS_CXX11 template <class Lambda> -internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn) { +internal::Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn) { using BenchType = internal::LambdaBenchmark<typename std::decay<Lambda>::type>; + // FIXME: this should be a `std::make_unique<>()` but we don't have C++14. + // codechecker_intentional [cplusplus.NewDeleteLeaks] return internal::RegisterBenchmarkInternal( ::new BenchType(name, std::forward<Lambda>(fn))); } @@ -1056,7 +1405,7 @@ internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn) { #if defined(BENCHMARK_HAS_CXX11) && \ (!defined(BENCHMARK_GCC_VERSION) || BENCHMARK_GCC_VERSION >= 409) template <class Lambda, class... Args> -internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn, +internal::Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn, Args&&... args) { return benchmark::RegisterBenchmark( name, [=](benchmark::State& st) { fn(st, args...); }); @@ -1070,7 +1419,7 @@ class Fixture : public internal::Benchmark { public: Fixture() : internal::Benchmark("") {} - virtual void Run(State& st) { + void Run(State& st) BENCHMARK_OVERRIDE { this->SetUp(st); this->BenchmarkCase(st); this->TearDown(st); @@ -1086,7 +1435,6 @@ class Fixture : public internal::Benchmark { protected: virtual void BenchmarkCase(State&) = 0; }; - } // namespace benchmark // ------------------------------------------------------ @@ -1102,22 +1450,37 @@ class Fixture : public internal::Benchmark { #endif // Helpers for generating unique variable names +#ifdef BENCHMARK_HAS_CXX11 +#define BENCHMARK_PRIVATE_NAME(...) \ + BENCHMARK_PRIVATE_CONCAT(benchmark_uniq_, BENCHMARK_PRIVATE_UNIQUE_ID, \ + __VA_ARGS__) +#else #define BENCHMARK_PRIVATE_NAME(n) \ - BENCHMARK_PRIVATE_CONCAT(_benchmark_, BENCHMARK_PRIVATE_UNIQUE_ID, n) + BENCHMARK_PRIVATE_CONCAT(benchmark_uniq_, BENCHMARK_PRIVATE_UNIQUE_ID, n) +#endif // BENCHMARK_HAS_CXX11 + #define BENCHMARK_PRIVATE_CONCAT(a, b, c) BENCHMARK_PRIVATE_CONCAT2(a, b, c) #define BENCHMARK_PRIVATE_CONCAT2(a, b, c) a##b##c // Helper for concatenation with macro name expansion #define BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method) \ - BaseClass##_##Method##_Benchmark + BaseClass##_##Method##_Benchmark #define BENCHMARK_PRIVATE_DECLARE(n) \ static ::benchmark::internal::Benchmark* BENCHMARK_PRIVATE_NAME(n) \ BENCHMARK_UNUSED +#ifdef BENCHMARK_HAS_CXX11 +#define BENCHMARK(...) \ + BENCHMARK_PRIVATE_DECLARE(_benchmark_) = \ + (::benchmark::internal::RegisterBenchmarkInternal( \ + new ::benchmark::internal::FunctionBenchmark(#__VA_ARGS__, \ + __VA_ARGS__))) +#else #define BENCHMARK(n) \ BENCHMARK_PRIVATE_DECLARE(n) = \ (::benchmark::internal::RegisterBenchmarkInternal( \ new ::benchmark::internal::FunctionBenchmark(#n, n))) +#endif // BENCHMARK_HAS_CXX11 // Old-style macros #define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a)) @@ -1178,49 +1541,49 @@ class Fixture : public internal::Benchmark { #define BENCHMARK_TEMPLATE(n, a) BENCHMARK_TEMPLATE1(n, a) #endif -#define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \ - class BaseClass##_##Method##_Benchmark : public BaseClass { \ - public: \ - BaseClass##_##Method##_Benchmark() : BaseClass() { \ - this->SetName(#BaseClass "/" #Method); \ - } \ - \ - protected: \ - virtual void BenchmarkCase(::benchmark::State&); \ +#define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \ + class BaseClass##_##Method##_Benchmark : public BaseClass { \ + public: \ + BaseClass##_##Method##_Benchmark() { \ + this->SetName(#BaseClass "/" #Method); \ + } \ + \ + protected: \ + void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \ }; #define BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \ class BaseClass##_##Method##_Benchmark : public BaseClass<a> { \ public: \ - BaseClass##_##Method##_Benchmark() : BaseClass<a>() { \ + BaseClass##_##Method##_Benchmark() { \ this->SetName(#BaseClass "<" #a ">/" #Method); \ } \ \ protected: \ - virtual void BenchmarkCase(::benchmark::State&); \ + void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \ }; #define BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \ class BaseClass##_##Method##_Benchmark : public BaseClass<a, b> { \ public: \ - BaseClass##_##Method##_Benchmark() : BaseClass<a, b>() { \ + BaseClass##_##Method##_Benchmark() { \ this->SetName(#BaseClass "<" #a "," #b ">/" #Method); \ } \ \ protected: \ - virtual void BenchmarkCase(::benchmark::State&); \ + void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \ }; #ifdef BENCHMARK_HAS_CXX11 #define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, ...) \ class BaseClass##_##Method##_Benchmark : public BaseClass<__VA_ARGS__> { \ public: \ - BaseClass##_##Method##_Benchmark() : BaseClass<__VA_ARGS__>() { \ + BaseClass##_##Method##_Benchmark() { \ this->SetName(#BaseClass "<" #__VA_ARGS__ ">/" #Method); \ } \ \ protected: \ - virtual void BenchmarkCase(::benchmark::State&); \ + void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \ }; #else #define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(n, a) \ @@ -1282,11 +1645,20 @@ class Fixture : public internal::Benchmark { #endif // Helper macro to create a main routine in a test that runs the benchmarks +// Note the workaround for Hexagon simulator passing argc != 0, argv = NULL. #define BENCHMARK_MAIN() \ int main(int argc, char** argv) { \ + char arg0_default[] = "benchmark"; \ + char* args_default = arg0_default; \ + if (!argv) { \ + argc = 1; \ + argv = &args_default; \ + } \ ::benchmark::Initialize(&argc, argv); \ if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1; \ ::benchmark::RunSpecifiedBenchmarks(); \ + ::benchmark::Shutdown(); \ + return 0; \ } \ int main(int, char**) @@ -1295,7 +1667,7 @@ class Fixture : public internal::Benchmark { namespace benchmark { -struct CPUInfo { +struct BENCHMARK_EXPORT CPUInfo { struct CacheInfo { std::string type; int level; @@ -1303,16 +1675,12 @@ struct CPUInfo { int num_sharing; }; - enum Scaling { - UNKNOWN, - ENABLED, - DISABLED - }; + enum Scaling { UNKNOWN, ENABLED, DISABLED }; int num_cpus; + Scaling scaling; double cycles_per_second; std::vector<CacheInfo> caches; - Scaling scaling; std::vector<double> load_avg; static const CPUInfo& Get(); @@ -1323,7 +1691,7 @@ struct CPUInfo { }; // Adding Struct for System Information -struct SystemInfo { +struct BENCHMARK_EXPORT SystemInfo { std::string name; static const SystemInfo& Get(); @@ -1335,10 +1703,11 @@ struct SystemInfo { // BenchmarkName contains the components of the Benchmark's name // which allows individual fields to be modified or cleared before // building the final name using 'str()'. -struct BenchmarkName { +struct BENCHMARK_EXPORT BenchmarkName { std::string function_name; std::string args; std::string min_time; + std::string min_warmup_time; std::string iterations; std::string repetitions; std::string time_type; @@ -1354,7 +1723,7 @@ struct BenchmarkName { // can control the destination of the reports by calling // RunSpecifiedBenchmarks and passing it a custom reporter object. // The reporter object must implement the following interface. -class BenchmarkReporter { +class BENCHMARK_EXPORT BenchmarkReporter { public: struct Context { CPUInfo const& cpu_info; @@ -1365,16 +1734,17 @@ class BenchmarkReporter { Context(); }; - struct Run { + struct BENCHMARK_EXPORT Run { static const int64_t no_repetition_index = -1; enum RunType { RT_Iteration, RT_Aggregate }; Run() : run_type(RT_Iteration), - error_occurred(false), + aggregate_unit(kTime), + skipped(internal::NotSkipped), iterations(1), threads(1), - time_unit(kNanosecond), + time_unit(GetDefaultTimeUnit()), real_accumulated_time(0), cpu_accumulated_time(0), max_heapbytes_used(0), @@ -1383,18 +1753,19 @@ class BenchmarkReporter { complexity_n(0), report_big_o(false), report_rms(false), - counters(), - has_memory_result(false), - allocs_per_iter(0.0), - max_bytes_used(0) {} + memory_result(NULL), + allocs_per_iter(0.0) {} std::string benchmark_name() const; BenchmarkName run_name; + int64_t family_index; + int64_t per_family_instance_index; RunType run_type; std::string aggregate_name; + StatisticUnit aggregate_unit; std::string report_label; // Empty if not set by benchmark. - bool error_occurred; - std::string error_message; + internal::Skipped skipped; + std::string skip_message; IterationCount iterations; int64_t threads; @@ -1434,9 +1805,21 @@ class BenchmarkReporter { UserCounters counters; // Memory metrics. - bool has_memory_result; + const MemoryManager::Result* memory_result; double allocs_per_iter; - int64_t max_bytes_used; + }; + + struct PerFamilyRunReports { + PerFamilyRunReports() : num_runs_total(0), num_runs_done(0) {} + + // How many runs will all instances of this benchmark perform? + int num_runs_total; + + // How many runs have happened already? + int num_runs_done; + + // The reports about (non-errneous!) runs of this family. + std::vector<BenchmarkReporter::Run> Runs; }; // Construct a BenchmarkReporter with the output stream set to 'std::cout' @@ -1452,6 +1835,12 @@ class BenchmarkReporter { virtual bool ReportContext(const Context& context) = 0; // Called once for each group of benchmark runs, gives information about + // the configurations of the runs. + virtual void ReportRunsConfig(double /*min_time*/, + bool /*has_explicit_iters*/, + IterationCount /*iters*/) {} + + // Called once for each group of benchmark runs, gives information about // cpu-time and heap memory usage during the benchmark run. If the group // of runs contained more than two entries then 'report' contains additional // elements representing the mean and standard deviation of those runs. @@ -1496,7 +1885,7 @@ class BenchmarkReporter { // Simple reporter that outputs benchmark data to the console. This is the // default reporter used by RunSpecifiedBenchmarks(). -class ConsoleReporter : public BenchmarkReporter { +class BENCHMARK_EXPORT ConsoleReporter : public BenchmarkReporter { public: enum OutputOptions { OO_None = 0, @@ -1506,13 +1895,10 @@ class ConsoleReporter : public BenchmarkReporter { OO_Defaults = OO_ColorTabular }; explicit ConsoleReporter(OutputOptions opts_ = OO_Defaults) - : output_options_(opts_), - name_field_width_(0), - prev_counters_(), - printed_header_(false) {} + : output_options_(opts_), name_field_width_(0), printed_header_(false) {} - virtual bool ReportContext(const Context& context); - virtual void ReportRuns(const std::vector<Run>& reports); + bool ReportContext(const Context& context) BENCHMARK_OVERRIDE; + void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE; protected: virtual void PrintRunData(const Run& report); @@ -1524,12 +1910,12 @@ class ConsoleReporter : public BenchmarkReporter { bool printed_header_; }; -class JSONReporter : public BenchmarkReporter { +class BENCHMARK_EXPORT JSONReporter : public BenchmarkReporter { public: JSONReporter() : first_report_(true) {} - virtual bool ReportContext(const Context& context); - virtual void ReportRuns(const std::vector<Run>& reports); - virtual void Finalize(); + bool ReportContext(const Context& context) BENCHMARK_OVERRIDE; + void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE; + void Finalize() BENCHMARK_OVERRIDE; private: void PrintRunData(const Run& report); @@ -1537,13 +1923,13 @@ class JSONReporter : public BenchmarkReporter { bool first_report_; }; -class BENCHMARK_DEPRECATED_MSG( +class BENCHMARK_EXPORT BENCHMARK_DEPRECATED_MSG( "The CSV Reporter will be removed in a future release") CSVReporter : public BenchmarkReporter { public: CSVReporter() : printed_header_(false) {} - virtual bool ReportContext(const Context& context); - virtual void ReportRuns(const std::vector<Run>& reports); + bool ReportContext(const Context& context) BENCHMARK_OVERRIDE; + void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE; private: void PrintRunData(const Run& report); @@ -1552,29 +1938,6 @@ class BENCHMARK_DEPRECATED_MSG( std::set<std::string> user_counter_names_; }; -// If a MemoryManager is registered, it can be used to collect and report -// allocation metrics for a run of the benchmark. -class MemoryManager { - public: - struct Result { - Result() : num_allocs(0), max_bytes_used(0) {} - - // The number of allocations made in total between Start and Stop. - int64_t num_allocs; - - // The peak memory use between Start and Stop. - int64_t max_bytes_used; - }; - - virtual ~MemoryManager() {} - - // Implement this to start recording allocation information. - virtual void Start() = 0; - - // Implement this to stop recording and fill out the given Result structure. - virtual void Stop(Result* result) = 0; -}; - inline const char* GetTimeUnitString(TimeUnit unit) { switch (unit) { case kSecond: @@ -1603,6 +1966,26 @@ inline double GetTimeUnitMultiplier(TimeUnit unit) { BENCHMARK_UNREACHABLE(); } +// Creates a list of integer values for the given range and multiplier. +// This can be used together with ArgsProduct() to allow multiple ranges +// with different multipliers. +// Example: +// ArgsProduct({ +// CreateRange(0, 1024, /*multi=*/32), +// CreateRange(0, 100, /*multi=*/4), +// CreateDenseRange(0, 4, /*step=*/1), +// }); +BENCHMARK_EXPORT +std::vector<int64_t> CreateRange(int64_t lo, int64_t hi, int multi); + +// Creates a list of integer values for the given range and step. +BENCHMARK_EXPORT +std::vector<int64_t> CreateDenseRange(int64_t start, int64_t limit, int step); + } // namespace benchmark +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + #endif // BENCHMARK_BENCHMARK_H_ diff --git a/include/benchmark/export.h b/include/benchmark/export.h new file mode 100644 index 0000000..f96f859 --- /dev/null +++ b/include/benchmark/export.h @@ -0,0 +1,47 @@ +#ifndef BENCHMARK_EXPORT_H +#define BENCHMARK_EXPORT_H + +#if defined(_WIN32) +#define EXPORT_ATTR __declspec(dllexport) +#define IMPORT_ATTR __declspec(dllimport) +#define NO_EXPORT_ATTR +#define DEPRECATED_ATTR __declspec(deprecated) +#else // _WIN32 +#define EXPORT_ATTR __attribute__((visibility("default"))) +#define IMPORT_ATTR __attribute__((visibility("default"))) +#define NO_EXPORT_ATTR __attribute__((visibility("hidden"))) +#define DEPRECATE_ATTR __attribute__((__deprecated__)) +#endif // _WIN32 + +#ifdef BENCHMARK_STATIC_DEFINE +#define BENCHMARK_EXPORT +#define BENCHMARK_NO_EXPORT +#else // BENCHMARK_STATIC_DEFINE +#ifndef BENCHMARK_EXPORT +#ifdef benchmark_EXPORTS +/* We are building this library */ +#define BENCHMARK_EXPORT EXPORT_ATTR +#else // benchmark_EXPORTS +/* We are using this library */ +#define BENCHMARK_EXPORT IMPORT_ATTR +#endif // benchmark_EXPORTS +#endif // !BENCHMARK_EXPORT + +#ifndef BENCHMARK_NO_EXPORT +#define BENCHMARK_NO_EXPORT NO_EXPORT_ATTR +#endif // !BENCHMARK_NO_EXPORT +#endif // BENCHMARK_STATIC_DEFINE + +#ifndef BENCHMARK_DEPRECATED +#define BENCHMARK_DEPRECATED DEPRECATE_ATTR +#endif // BENCHMARK_DEPRECATED + +#ifndef BENCHMARK_DEPRECATED_EXPORT +#define BENCHMARK_DEPRECATED_EXPORT BENCHMARK_EXPORT BENCHMARK_DEPRECATED +#endif // BENCHMARK_DEPRECATED_EXPORT + +#ifndef BENCHMARK_DEPRECATED_NO_EXPORT +#define BENCHMARK_DEPRECATED_NO_EXPORT BENCHMARK_NO_EXPORT BENCHMARK_DEPRECATED +#endif // BENCHMARK_DEPRECATED_EXPORT + +#endif /* BENCHMARK_EXPORT_H */ diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..fe8770b --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,50 @@ +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "google_benchmark" +description = "A library to benchmark code snippets." +requires-python = ">=3.8" +license = {file = "LICENSE"} +keywords = ["benchmark"] + +authors = [ + {name = "Google", email = "benchmark-discuss@googlegroups.com"}, +] + +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Topic :: Software Development :: Testing", + "Topic :: System :: Benchmark", +] + +dynamic = ["readme", "version"] + +dependencies = [ + "absl-py>=0.7.1", +] + +[project.urls] +Homepage = "https://github.com/google/benchmark" +Documentation = "https://github.com/google/benchmark/tree/main/docs" +Repository = "https://github.com/google/benchmark.git" +Discord = "https://discord.gg/cz7UX7wKC2" + +[tool.setuptools] +package-dir = {"" = "bindings/python"} +zip-safe = false + +[tool.setuptools.packages.find] +where = ["bindings/python"] + +[tool.setuptools.dynamic] +version = { attr = "google_benchmark.__version__" } +readme = { file = "README.md", content-type = "text/markdown" } diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 85e8986..0000000 --- a/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -numpy == 1.19.4 -scipy == 1.5.4 @@ -1,55 +1,50 @@ +import contextlib import os -import posixpath -import re +import platform import shutil -import sys +import sysconfig +from pathlib import Path -from distutils import sysconfig import setuptools from setuptools.command import build_ext -HERE = os.path.dirname(os.path.abspath(__file__)) +PYTHON_INCLUDE_PATH_PLACEHOLDER = "<PYTHON_INCLUDE_PATH>" +IS_WINDOWS = platform.system() == "Windows" +IS_MAC = platform.system() == "Darwin" -IS_WINDOWS = sys.platform.startswith("win") - -def _get_version(): - """Parse the version string from __init__.py.""" - with open( - os.path.join(HERE, "bindings", "python", "google_benchmark", "__init__.py") - ) as init_file: +@contextlib.contextmanager +def temp_fill_include_path(fp: str): + """Temporarily set the Python include path in a file.""" + with open(fp, "r+") as f: try: - version_line = next( - line for line in init_file if line.startswith("__version__") + content = f.read() + replaced = content.replace( + PYTHON_INCLUDE_PATH_PLACEHOLDER, + Path(sysconfig.get_paths()['include']).as_posix(), ) - except StopIteration: - raise ValueError("__version__ not defined in __init__.py") - else: - namespace = {} - exec(version_line, namespace) # pylint: disable=exec-used - return namespace["__version__"] - - -def _parse_requirements(path): - with open(os.path.join(HERE, path)) as requirements: - return [ - line.rstrip() - for line in requirements - if not (line.isspace() or line.startswith("#")) - ] + f.seek(0) + f.write(replaced) + f.truncate() + yield + finally: + # revert to the original content after exit + f.seek(0) + f.write(content) + f.truncate() class BazelExtension(setuptools.Extension): """A C/C++ extension that is defined as a Bazel BUILD target.""" - def __init__(self, name, bazel_target): + def __init__(self, name: str, bazel_target: str): + super().__init__(name=name, sources=[]) + self.bazel_target = bazel_target - self.relpath, self.target_name = posixpath.relpath(bazel_target, "//").split( - ":" - ) - setuptools.Extension.__init__(self, name, sources=[]) + stripped_target = bazel_target.split("//")[-1] + self.relpath, self.target_name = stripped_target.split(":") class BuildBazelExtension(build_ext.build_ext): @@ -60,81 +55,59 @@ class BuildBazelExtension(build_ext.build_ext): self.bazel_build(ext) build_ext.build_ext.run(self) - def bazel_build(self, ext): + def bazel_build(self, ext: BazelExtension): """Runs the bazel build to create the package.""" - with open("WORKSPACE", "r") as workspace: - workspace_contents = workspace.read() - - with open("WORKSPACE", "w") as workspace: - workspace.write( - re.sub( - r'(?<=path = ").*(?=", # May be overwritten by setup\.py\.)', - sysconfig.get_python_inc().replace(os.path.sep, posixpath.sep), - workspace_contents, - ) - ) - - if not os.path.exists(self.build_temp): - os.makedirs(self.build_temp) - - bazel_argv = [ - "bazel", - "build", - ext.bazel_target, - "--symlink_prefix=" + os.path.join(self.build_temp, "bazel-"), - "--compilation_mode=" + ("dbg" if self.debug else "opt"), - ] - - if IS_WINDOWS: - # Link with python*.lib. - for library_dir in self.library_dirs: - bazel_argv.append("--linkopt=/LIBPATH:" + library_dir) - - self.spawn(bazel_argv) - - shared_lib_suffix = '.dll' if IS_WINDOWS else '.so' - ext_bazel_bin_path = os.path.join( - self.build_temp, 'bazel-bin', - ext.relpath, ext.target_name + shared_lib_suffix) - - ext_dest_path = self.get_ext_fullpath(ext.name) - ext_dest_dir = os.path.dirname(ext_dest_path) - if not os.path.exists(ext_dest_dir): - os.makedirs(ext_dest_dir) - shutil.copyfile(ext_bazel_bin_path, ext_dest_path) + with temp_fill_include_path("WORKSPACE"): + temp_path = Path(self.build_temp) + + bazel_argv = [ + "bazel", + "build", + ext.bazel_target, + f"--symlink_prefix={temp_path / 'bazel-'}", + f"--compilation_mode={'dbg' if self.debug else 'opt'}", + # C++17 is required by nanobind + f"--cxxopt={'/std:c++17' if IS_WINDOWS else '-std=c++17'}", + ] + + if IS_WINDOWS: + # Link with python*.lib. + for library_dir in self.library_dirs: + bazel_argv.append("--linkopt=/LIBPATH:" + library_dir) + elif IS_MAC: + if platform.machine() == "x86_64": + # C++17 needs macOS 10.14 at minimum + bazel_argv.append("--macos_minimum_os=10.14") + + # cross-compilation for Mac ARM64 on GitHub Mac x86 runners. + # ARCHFLAGS is set by cibuildwheel before macOS wheel builds. + archflags = os.getenv("ARCHFLAGS", "") + if "arm64" in archflags: + bazel_argv.append("--cpu=darwin_arm64") + bazel_argv.append("--macos_cpus=arm64") + + elif platform.machine() == "arm64": + bazel_argv.append("--macos_minimum_os=11.0") + + self.spawn(bazel_argv) + + shared_lib_suffix = '.dll' if IS_WINDOWS else '.so' + ext_name = ext.target_name + shared_lib_suffix + ext_bazel_bin_path = temp_path / 'bazel-bin' / ext.relpath / ext_name + + ext_dest_path = Path(self.get_ext_fullpath(ext.name)) + shutil.copyfile(ext_bazel_bin_path, ext_dest_path) + + # explicitly call `bazel shutdown` for graceful exit + self.spawn(["bazel", "shutdown"]) setuptools.setup( - name="google_benchmark", - version=_get_version(), - url="https://github.com/google/benchmark", - description="A library to benchmark code snippets.", - author="Google", - author_email="benchmark-py@google.com", - # Contained modules and scripts. - package_dir={"": "bindings/python"}, - packages=setuptools.find_packages("bindings/python"), - install_requires=_parse_requirements("bindings/python/requirements.txt"), cmdclass=dict(build_ext=BuildBazelExtension), ext_modules=[ BazelExtension( - "google_benchmark._benchmark", - "//bindings/python/google_benchmark:_benchmark", + name="google_benchmark._benchmark", + bazel_target="//bindings/python/google_benchmark:_benchmark", ) ], - zip_safe=False, - # PyPI package information. - classifiers=[ - "Development Status :: 4 - Beta", - "Intended Audience :: Developers", - "Intended Audience :: Science/Research", - "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Topic :: Software Development :: Testing", - "Topic :: System :: Benchmark", - ], - license="Apache 2.0", - keywords="benchmark", ) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 35d559e..daf82fb 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -25,32 +25,42 @@ set_target_properties(benchmark PROPERTIES SOVERSION ${GENERIC_LIB_SOVERSION} ) target_include_directories(benchmark PUBLIC - $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> - ) + $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include> +) -# Link threads. -target_link_libraries(benchmark ${BENCHMARK_CXX_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT}) -find_library(LIBRT rt) -if(LIBRT) - target_link_libraries(benchmark ${LIBRT}) +# libpfm, if available +if (PFM_FOUND) + target_link_libraries(benchmark PRIVATE PFM::libpfm) + target_compile_definitions(benchmark PRIVATE -DHAVE_LIBPFM) endif() -if(CMAKE_BUILD_TYPE) - string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UPPER) -endif() -if(NOT CMAKE_THREAD_LIBS_INIT AND "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}" MATCHES ".*-fsanitize=[^ ]*address.*") - message(WARNING "CMake's FindThreads.cmake did not fail, but CMAKE_THREAD_LIBS_INIT ended up being empty. This was fixed in https://github.com/Kitware/CMake/commit/d53317130e84898c5328c237186dbd995aaf1c12 Let's guess that -pthread is sufficient.") - target_link_libraries(benchmark -pthread) +# pthread affinity, if available +if(HAVE_PTHREAD_AFFINITY) + target_compile_definitions(benchmark PRIVATE -DBENCHMARK_HAS_PTHREAD_AFFINITY) endif() +# Link threads. +target_link_libraries(benchmark PRIVATE Threads::Threads) + +target_link_libraries(benchmark PRIVATE ${BENCHMARK_CXX_LIBRARIES}) + +if(HAVE_LIB_RT) + target_link_libraries(benchmark PRIVATE rt) +endif(HAVE_LIB_RT) + + # We need extra libraries on Windows if(${CMAKE_SYSTEM_NAME} MATCHES "Windows") - target_link_libraries(benchmark shlwapi) + target_link_libraries(benchmark PRIVATE shlwapi) endif() # We need extra libraries on Solaris if(${CMAKE_SYSTEM_NAME} MATCHES "SunOS") - target_link_libraries(benchmark kstat) + target_link_libraries(benchmark PRIVATE kstat) +endif() + +if (NOT BUILD_SHARED_LIBS) + target_compile_definitions(benchmark PUBLIC -DBENCHMARK_STATIC_DEFINE) endif() # Benchmark main library @@ -60,34 +70,45 @@ set_target_properties(benchmark_main PROPERTIES OUTPUT_NAME "benchmark_main" VERSION ${GENERIC_LIB_VERSION} SOVERSION ${GENERIC_LIB_SOVERSION} + DEFINE_SYMBOL benchmark_EXPORTS ) -target_include_directories(benchmark PUBLIC - $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> - ) -target_link_libraries(benchmark_main benchmark::benchmark) - +target_link_libraries(benchmark_main PUBLIC benchmark::benchmark) -set(generated_dir "${CMAKE_CURRENT_BINARY_DIR}/generated") +set(generated_dir "${PROJECT_BINARY_DIR}") set(version_config "${generated_dir}/${PROJECT_NAME}ConfigVersion.cmake") set(project_config "${generated_dir}/${PROJECT_NAME}Config.cmake") set(pkg_config "${generated_dir}/${PROJECT_NAME}.pc") +set(targets_to_export benchmark benchmark_main) set(targets_export_name "${PROJECT_NAME}Targets") set(namespace "${PROJECT_NAME}::") include(CMakePackageConfigHelpers) + +configure_package_config_file ( + ${PROJECT_SOURCE_DIR}/cmake/Config.cmake.in + ${project_config} + INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME} + NO_SET_AND_CHECK_MACRO + NO_CHECK_REQUIRED_COMPONENTS_MACRO +) write_basic_package_version_file( "${version_config}" VERSION ${GENERIC_LIB_VERSION} COMPATIBILITY SameMajorVersion ) -configure_file("${PROJECT_SOURCE_DIR}/cmake/Config.cmake.in" "${project_config}" @ONLY) configure_file("${PROJECT_SOURCE_DIR}/cmake/benchmark.pc.in" "${pkg_config}" @ONLY) +export ( + TARGETS ${targets_to_export} + NAMESPACE "${namespace}" + FILE ${generated_dir}/${targets_export_name}.cmake +) + if (BENCHMARK_ENABLE_INSTALL) # Install target (will install the library to specified CMAKE_INSTALL_PREFIX variable) install( - TARGETS benchmark benchmark_main + TARGETS ${targets_to_export} EXPORT ${targets_export_name} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} @@ -96,6 +117,7 @@ if (BENCHMARK_ENABLE_INSTALL) install( DIRECTORY "${PROJECT_SOURCE_DIR}/include/benchmark" + "${PROJECT_BINARY_DIR}/include/benchmark" DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} FILES_MATCHING PATTERN "*.*h") @@ -112,3 +134,37 @@ if (BENCHMARK_ENABLE_INSTALL) NAMESPACE "${namespace}" DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}") endif() + +if (BENCHMARK_ENABLE_DOXYGEN) + find_package(Doxygen REQUIRED) + set(DOXYGEN_QUIET YES) + set(DOXYGEN_RECURSIVE YES) + set(DOXYGEN_GENERATE_HTML YES) + set(DOXYGEN_GENERATE_MAN NO) + set(DOXYGEN_MARKDOWN_SUPPORT YES) + set(DOXYGEN_BUILTIN_STL_SUPPORT YES) + set(DOXYGEN_EXTRACT_PACKAGE YES) + set(DOXYGEN_EXTRACT_STATIC YES) + set(DOXYGEN_SHOW_INCLUDE_FILES YES) + set(DOXYGEN_BINARY_TOC YES) + set(DOXYGEN_TOC_EXPAND YES) + set(DOXYGEN_USE_MDFILE_AS_MAINPAGE "index.md") + doxygen_add_docs(benchmark_doxygen + docs + include + src + ALL + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} + COMMENT "Building documentation with Doxygen.") + if (BENCHMARK_ENABLE_INSTALL AND BENCHMARK_INSTALL_DOCS) + install( + DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/html/" + DESTINATION ${CMAKE_INSTALL_DOCDIR}) + endif() +else() + if (BENCHMARK_ENABLE_INSTALL AND BENCHMARK_INSTALL_DOCS) + install( + DIRECTORY "${PROJECT_SOURCE_DIR}/docs/" + DESTINATION ${CMAKE_INSTALL_DOCDIR}) + endif() +endif() diff --git a/src/benchmark.cc b/src/benchmark.cc index 1c049f2..6139e59 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -13,12 +13,13 @@ // limitations under the License. #include "benchmark/benchmark.h" + #include "benchmark_api_internal.h" #include "benchmark_runner.h" #include "internal_macros.h" #ifndef BENCHMARK_OS_WINDOWS -#ifndef BENCHMARK_OS_FUCHSIA +#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT) #include <sys/resource.h> #endif #include <sys/time.h> @@ -32,7 +33,10 @@ #include <cstdlib> #include <fstream> #include <iostream> +#include <limits> +#include <map> #include <memory> +#include <random> #include <string> #include <thread> #include <utility> @@ -45,94 +49,146 @@ #include "internal_macros.h" #include "log.h" #include "mutex.h" +#include "perf_counters.h" #include "re.h" #include "statistics.h" #include "string_util.h" #include "thread_manager.h" #include "thread_timer.h" +namespace benchmark { // Print a list of benchmarks. This option overrides all other options. -DEFINE_bool(benchmark_list_tests, false); +BM_DEFINE_bool(benchmark_list_tests, false); // A regular expression that specifies the set of benchmarks to execute. If // this flag is empty, or if this flag is the string \"all\", all benchmarks // linked into the binary are run. -DEFINE_string(benchmark_filter, "."); +BM_DEFINE_string(benchmark_filter, ""); -// Minimum number of seconds we should run benchmark before results are -// considered significant. For cpu-time based tests, this is the lower bound +// Specification of how long to run the benchmark. +// +// It can be either an exact number of iterations (specified as `<integer>x`), +// or a minimum number of seconds (specified as `<float>s`). If the latter +// format (ie., min seconds) is used, the system may run the benchmark longer +// until the results are considered significant. +// +// For backward compatibility, the `s` suffix may be omitted, in which case, +// the specified number is interpreted as the number of seconds. +// +// For cpu-time based tests, this is the lower bound // on the total cpu time used by all threads that make up the test. For // real-time based tests, this is the lower bound on the elapsed time of the // benchmark execution, regardless of number of threads. -DEFINE_double(benchmark_min_time, 0.5); +BM_DEFINE_string(benchmark_min_time, kDefaultMinTimeStr); + +// Minimum number of seconds a benchmark should be run before results should be +// taken into account. This e.g can be necessary for benchmarks of code which +// needs to fill some form of cache before performance is of interest. +// Note: results gathered within this period are discarded and not used for +// reported result. +BM_DEFINE_double(benchmark_min_warmup_time, 0.0); // The number of runs of each benchmark. If greater than 1, the mean and // standard deviation of the runs will be reported. -DEFINE_int32(benchmark_repetitions, 1); +BM_DEFINE_int32(benchmark_repetitions, 1); + +// If set, enable random interleaving of repetitions of all benchmarks. +// See http://github.com/google/benchmark/issues/1051 for details. +BM_DEFINE_bool(benchmark_enable_random_interleaving, false); // Report the result of each benchmark repetitions. When 'true' is specified // only the mean, standard deviation, and other statistics are reported for // repeated benchmarks. Affects all reporters. -DEFINE_bool(benchmark_report_aggregates_only, false); +BM_DEFINE_bool(benchmark_report_aggregates_only, false); // Display the result of each benchmark repetitions. When 'true' is specified // only the mean, standard deviation, and other statistics are displayed for // repeated benchmarks. Unlike benchmark_report_aggregates_only, only affects // the display reporter, but *NOT* file reporter, which will still contain // all the output. -DEFINE_bool(benchmark_display_aggregates_only, false); +BM_DEFINE_bool(benchmark_display_aggregates_only, false); // The format to use for console output. // Valid values are 'console', 'json', or 'csv'. -DEFINE_string(benchmark_format, "console"); +BM_DEFINE_string(benchmark_format, "console"); // The format to use for file output. // Valid values are 'console', 'json', or 'csv'. -DEFINE_string(benchmark_out_format, "json"); +BM_DEFINE_string(benchmark_out_format, "json"); // The file to write additional output to. -DEFINE_string(benchmark_out, ""); +BM_DEFINE_string(benchmark_out, ""); // Whether to use colors in the output. Valid values: // 'true'/'yes'/1, 'false'/'no'/0, and 'auto'. 'auto' means to use colors if // the output is being sent to a terminal and the TERM environment variable is // set to a terminal type that supports colors. -DEFINE_string(benchmark_color, "auto"); +BM_DEFINE_string(benchmark_color, "auto"); // Whether to use tabular format when printing user counters to the console. // Valid values: 'true'/'yes'/1, 'false'/'no'/0. Defaults to false. -DEFINE_bool(benchmark_counters_tabular, false); +BM_DEFINE_bool(benchmark_counters_tabular, false); -// The level of verbose logging to output -DEFINE_int32(v, 0); +// List of additional perf counters to collect, in libpfm format. For more +// information about libpfm: https://man7.org/linux/man-pages/man3/libpfm.3.html +BM_DEFINE_string(benchmark_perf_counters, ""); -namespace benchmark { +// Extra context to include in the output formatted as comma-separated key-value +// pairs. Kept internal as it's only used for parsing from env/command line. +BM_DEFINE_kvpairs(benchmark_context, {}); + +// Set the default time unit to use for reports +// Valid values are 'ns', 'us', 'ms' or 's' +BM_DEFINE_string(benchmark_time_unit, ""); + +// The level of verbose logging to output +BM_DEFINE_int32(v, 0); namespace internal { +std::map<std::string, std::string>* global_context = nullptr; + +BENCHMARK_EXPORT std::map<std::string, std::string>*& GetGlobalContext() { + return global_context; +} + // FIXME: wouldn't LTO mess this up? void UseCharPointer(char const volatile*) {} } // namespace internal -State::State(IterationCount max_iters, const std::vector<int64_t>& ranges, - int thread_i, int n_threads, internal::ThreadTimer* timer, - internal::ThreadManager* manager) +State::State(std::string name, IterationCount max_iters, + const std::vector<int64_t>& ranges, int thread_i, int n_threads, + internal::ThreadTimer* timer, internal::ThreadManager* manager, + internal::PerfCountersMeasurement* perf_counters_measurement) : total_iterations_(0), batch_leftover_(0), max_iterations(max_iters), started_(false), finished_(false), - error_occurred_(false), + skipped_(internal::NotSkipped), range_(ranges), complexity_n_(0), - counters(), - thread_index(thread_i), - threads(n_threads), + name_(std::move(name)), + thread_index_(thread_i), + threads_(n_threads), timer_(timer), - manager_(manager) { - CHECK(max_iterations != 0) << "At least one iteration must be run"; - CHECK_LT(thread_index, threads) << "thread_index must be less than threads"; + manager_(manager), + perf_counters_measurement_(perf_counters_measurement) { + BM_CHECK(max_iterations != 0) << "At least one iteration must be run"; + BM_CHECK_LT(thread_index_, threads_) + << "thread_index must be less than threads"; + + // Add counters with correct flag now. If added with `counters[name]` in + // `PauseTiming`, a new `Counter` will be inserted the first time, which + // won't have the flag. Inserting them now also reduces the allocations + // during the benchmark. + if (perf_counters_measurement_) { + for (const std::string& counter_name : + perf_counters_measurement_->names()) { + counters[counter_name] = Counter(0.0, Counter::kAvgIterations); + } + } // Note: The use of offsetof below is technically undefined until C++17 // because State is not a standard layout type. However, all compilers @@ -147,37 +203,78 @@ State::State(IterationCount max_iters, const std::vector<int64_t>& ranges, #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Winvalid-offsetof" #endif +#if defined(__NVCC__) +#pragma nv_diagnostic push +#pragma nv_diag_suppress 1427 +#endif +#if defined(__NVCOMPILER) +#pragma diagnostic push +#pragma diag_suppress offset_in_non_POD_nonstandard +#endif // Offset tests to ensure commonly accessed data is on the first cache line. const int cache_line_size = 64; - static_assert(offsetof(State, error_occurred_) <= - (cache_line_size - sizeof(error_occurred_)), - ""); + static_assert( + offsetof(State, skipped_) <= (cache_line_size - sizeof(skipped_)), ""); #if defined(__INTEL_COMPILER) #pragma warning pop #elif defined(__GNUC__) #pragma GCC diagnostic pop #endif +#if defined(__NVCC__) +#pragma nv_diagnostic pop +#endif +#if defined(__NVCOMPILER) +#pragma diagnostic pop +#endif } void State::PauseTiming() { // Add in time accumulated so far - CHECK(started_ && !finished_ && !error_occurred_); + BM_CHECK(started_ && !finished_ && !skipped()); timer_->StopTimer(); + if (perf_counters_measurement_) { + std::vector<std::pair<std::string, double>> measurements; + if (!perf_counters_measurement_->Stop(measurements)) { + BM_CHECK(false) << "Perf counters read the value failed."; + } + for (const auto& name_and_measurement : measurements) { + const std::string& name = name_and_measurement.first; + const double measurement = name_and_measurement.second; + // Counter was inserted with `kAvgIterations` flag by the constructor. + assert(counters.find(name) != counters.end()); + counters[name].value += measurement; + } + } } void State::ResumeTiming() { - CHECK(started_ && !finished_ && !error_occurred_); + BM_CHECK(started_ && !finished_ && !skipped()); timer_->StartTimer(); + if (perf_counters_measurement_) { + perf_counters_measurement_->Start(); + } } -void State::SkipWithError(const char* msg) { - CHECK(msg); - error_occurred_ = true; +void State::SkipWithMessage(const std::string& msg) { + skipped_ = internal::SkippedWithMessage; { MutexLock l(manager_->GetBenchmarkMutex()); - if (manager_->results.has_error_ == false) { - manager_->results.error_message_ = msg; - manager_->results.has_error_ = true; + if (internal::NotSkipped == manager_->results.skipped_) { + manager_->results.skip_message_ = msg; + manager_->results.skipped_ = skipped_; + } + } + total_iterations_ = 0; + if (timer_->running()) timer_->StopTimer(); +} + +void State::SkipWithError(const std::string& msg) { + skipped_ = internal::SkippedWithError; + { + MutexLock l(manager_->GetBenchmarkMutex()); + if (internal::NotSkipped == manager_->results.skipped_) { + manager_->results.skip_message_ = msg; + manager_->results.skipped_ = skipped_; } } total_iterations_ = 0; @@ -188,22 +285,22 @@ void State::SetIterationTime(double seconds) { timer_->SetIterationTime(seconds); } -void State::SetLabel(const char* label) { +void State::SetLabel(const std::string& label) { MutexLock l(manager_->GetBenchmarkMutex()); manager_->results.report_label_ = label; } void State::StartKeepRunning() { - CHECK(!started_ && !finished_); + BM_CHECK(!started_ && !finished_); started_ = true; - total_iterations_ = error_occurred_ ? 0 : max_iterations; + total_iterations_ = skipped() ? 0 : max_iterations; manager_->StartStopBarrier(); - if (!error_occurred_) ResumeTiming(); + if (!skipped()) ResumeTiming(); } void State::FinishKeepRunning() { - CHECK(started_ && (!finished_ || error_occurred_)); - if (!error_occurred_) { + BM_CHECK(started_ && (!finished_ || skipped())); + if (!skipped()) { PauseTiming(); } // Total iterations has now wrapped around past 0. Fix this. @@ -215,11 +312,42 @@ void State::FinishKeepRunning() { namespace internal { namespace { +// Flushes streams after invoking reporter methods that write to them. This +// ensures users get timely updates even when streams are not line-buffered. +void FlushStreams(BenchmarkReporter* reporter) { + if (!reporter) return; + std::flush(reporter->GetOutputStream()); + std::flush(reporter->GetErrorStream()); +} + +// Reports in both display and file reporters. +void Report(BenchmarkReporter* display_reporter, + BenchmarkReporter* file_reporter, const RunResults& run_results) { + auto report_one = [](BenchmarkReporter* reporter, bool aggregates_only, + const RunResults& results) { + assert(reporter); + // If there are no aggregates, do output non-aggregates. + aggregates_only &= !results.aggregates_only.empty(); + if (!aggregates_only) reporter->ReportRuns(results.non_aggregates); + if (!results.aggregates_only.empty()) + reporter->ReportRuns(results.aggregates_only); + }; + + report_one(display_reporter, run_results.display_report_aggregates_only, + run_results); + if (file_reporter) + report_one(file_reporter, run_results.file_report_aggregates_only, + run_results); + + FlushStreams(display_reporter); + FlushStreams(file_reporter); +} + void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks, BenchmarkReporter* display_reporter, BenchmarkReporter* file_reporter) { // Note the file_reporter can be null. - CHECK(display_reporter != nullptr); + BM_CHECK(display_reporter != nullptr); // Determine the width of the name field using a minimum width of 10. bool might_have_aggregates = FLAGS_benchmark_repetitions > 1; @@ -227,10 +355,10 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks, size_t stat_field_width = 0; for (const BenchmarkInstance& benchmark : benchmarks) { name_field_width = - std::max<size_t>(name_field_width, benchmark.name.str().size()); - might_have_aggregates |= benchmark.repetitions > 1; + std::max<size_t>(name_field_width, benchmark.name().str().size()); + might_have_aggregates |= benchmark.repetitions() > 1; - for (const auto& Stat : *benchmark.statistics) + for (const auto& Stat : benchmark.statistics()) stat_field_width = std::max<size_t>(stat_field_width, Stat.name_.size()); } if (might_have_aggregates) name_field_width += 1 + stat_field_width; @@ -239,75 +367,129 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks, BenchmarkReporter::Context context; context.name_field_width = name_field_width; - // Keep track of running times of all instances of current benchmark - std::vector<BenchmarkReporter::Run> complexity_reports; - - // We flush streams after invoking reporter methods that write to them. This - // ensures users get timely updates even when streams are not line-buffered. - auto flushStreams = [](BenchmarkReporter* reporter) { - if (!reporter) return; - std::flush(reporter->GetOutputStream()); - std::flush(reporter->GetErrorStream()); - }; + // Keep track of running times of all instances of each benchmark family. + std::map<int /*family_index*/, BenchmarkReporter::PerFamilyRunReports> + per_family_reports; if (display_reporter->ReportContext(context) && (!file_reporter || file_reporter->ReportContext(context))) { - flushStreams(display_reporter); - flushStreams(file_reporter); - - for (const auto& benchmark : benchmarks) { - RunResults run_results = RunBenchmark(benchmark, &complexity_reports); - - auto report = [&run_results](BenchmarkReporter* reporter, - bool report_aggregates_only) { - assert(reporter); - // If there are no aggregates, do output non-aggregates. - report_aggregates_only &= !run_results.aggregates_only.empty(); - if (!report_aggregates_only) - reporter->ReportRuns(run_results.non_aggregates); - if (!run_results.aggregates_only.empty()) - reporter->ReportRuns(run_results.aggregates_only); - }; - - report(display_reporter, run_results.display_report_aggregates_only); - if (file_reporter) - report(file_reporter, run_results.file_report_aggregates_only); + FlushStreams(display_reporter); + FlushStreams(file_reporter); + + size_t num_repetitions_total = 0; + + // This perfcounters object needs to be created before the runners vector + // below so it outlasts their lifetime. + PerfCountersMeasurement perfcounters( + StrSplit(FLAGS_benchmark_perf_counters, ',')); + + // Vector of benchmarks to run + std::vector<internal::BenchmarkRunner> runners; + runners.reserve(benchmarks.size()); + + // Count the number of benchmarks with threads to warn the user in case + // performance counters are used. + int benchmarks_with_threads = 0; + + // Loop through all benchmarks + for (const BenchmarkInstance& benchmark : benchmarks) { + BenchmarkReporter::PerFamilyRunReports* reports_for_family = nullptr; + if (benchmark.complexity() != oNone) + reports_for_family = &per_family_reports[benchmark.family_index()]; + benchmarks_with_threads += (benchmark.threads() > 1); + runners.emplace_back(benchmark, &perfcounters, reports_for_family); + int num_repeats_of_this_instance = runners.back().GetNumRepeats(); + num_repetitions_total += num_repeats_of_this_instance; + if (reports_for_family) + reports_for_family->num_runs_total += num_repeats_of_this_instance; + } + assert(runners.size() == benchmarks.size() && "Unexpected runner count."); + + // The use of performance counters with threads would be unintuitive for + // the average user so we need to warn them about this case + if ((benchmarks_with_threads > 0) && (perfcounters.num_counters() > 0)) { + GetErrorLogInstance() + << "***WARNING*** There are " << benchmarks_with_threads + << " benchmarks with threads and " << perfcounters.num_counters() + << " performance counters were requested. Beware counters will " + "reflect the combined usage across all " + "threads.\n"; + } + + std::vector<size_t> repetition_indices; + repetition_indices.reserve(num_repetitions_total); + for (size_t runner_index = 0, num_runners = runners.size(); + runner_index != num_runners; ++runner_index) { + const internal::BenchmarkRunner& runner = runners[runner_index]; + std::fill_n(std::back_inserter(repetition_indices), + runner.GetNumRepeats(), runner_index); + } + assert(repetition_indices.size() == num_repetitions_total && + "Unexpected number of repetition indexes."); + + if (FLAGS_benchmark_enable_random_interleaving) { + std::random_device rd; + std::mt19937 g(rd()); + std::shuffle(repetition_indices.begin(), repetition_indices.end(), g); + } + + for (size_t repetition_index : repetition_indices) { + internal::BenchmarkRunner& runner = runners[repetition_index]; + runner.DoOneRepetition(); + if (runner.HasRepeatsRemaining()) continue; + // FIXME: report each repetition separately, not all of them in bulk. - flushStreams(display_reporter); - flushStreams(file_reporter); + display_reporter->ReportRunsConfig( + runner.GetMinTime(), runner.HasExplicitIters(), runner.GetIters()); + if (file_reporter) + file_reporter->ReportRunsConfig( + runner.GetMinTime(), runner.HasExplicitIters(), runner.GetIters()); + + RunResults run_results = runner.GetResults(); + + // Maybe calculate complexity report + if (const auto* reports_for_family = runner.GetReportsForFamily()) { + if (reports_for_family->num_runs_done == + reports_for_family->num_runs_total) { + auto additional_run_stats = ComputeBigO(reports_for_family->Runs); + run_results.aggregates_only.insert(run_results.aggregates_only.end(), + additional_run_stats.begin(), + additional_run_stats.end()); + per_family_reports.erase( + static_cast<int>(reports_for_family->Runs.front().family_index)); + } + } + + Report(display_reporter, file_reporter, run_results); } } display_reporter->Finalize(); if (file_reporter) file_reporter->Finalize(); - flushStreams(display_reporter); - flushStreams(file_reporter); + FlushStreams(display_reporter); + FlushStreams(file_reporter); } // Disable deprecated warnings temporarily because we need to reference // CSVReporter but don't want to trigger -Werror=-Wdeprecated-declarations -#ifdef __GNUC__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#endif +BENCHMARK_DISABLE_DEPRECATED_WARNING std::unique_ptr<BenchmarkReporter> CreateReporter( std::string const& name, ConsoleReporter::OutputOptions output_opts) { typedef std::unique_ptr<BenchmarkReporter> PtrType; if (name == "console") { return PtrType(new ConsoleReporter(output_opts)); - } else if (name == "json") { - return PtrType(new JSONReporter); - } else if (name == "csv") { - return PtrType(new CSVReporter); - } else { - std::cerr << "Unexpected format: '" << name << "'\n"; - std::exit(1); } + if (name == "json") { + return PtrType(new JSONReporter()); + } + if (name == "csv") { + return PtrType(new CSVReporter()); + } + std::cerr << "Unexpected format: '" << name << "'\n"; + std::exit(1); } -#ifdef __GNUC__ -#pragma GCC diagnostic pop -#endif +BENCHMARK_RESTORE_DEPRECATED_WARNING } // end namespace @@ -341,17 +523,41 @@ ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color) { } // end namespace internal +BenchmarkReporter* CreateDefaultDisplayReporter() { + static auto default_display_reporter = + internal::CreateReporter(FLAGS_benchmark_format, + internal::GetOutputOptions()) + .release(); + return default_display_reporter; +} + size_t RunSpecifiedBenchmarks() { - return RunSpecifiedBenchmarks(nullptr, nullptr); + return RunSpecifiedBenchmarks(nullptr, nullptr, FLAGS_benchmark_filter); +} + +size_t RunSpecifiedBenchmarks(std::string spec) { + return RunSpecifiedBenchmarks(nullptr, nullptr, std::move(spec)); } size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter) { - return RunSpecifiedBenchmarks(display_reporter, nullptr); + return RunSpecifiedBenchmarks(display_reporter, nullptr, + FLAGS_benchmark_filter); +} + +size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, + std::string spec) { + return RunSpecifiedBenchmarks(display_reporter, nullptr, std::move(spec)); } size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, BenchmarkReporter* file_reporter) { - std::string spec = FLAGS_benchmark_filter; + return RunSpecifiedBenchmarks(display_reporter, file_reporter, + FLAGS_benchmark_filter); +} + +size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, + BenchmarkReporter* file_reporter, + std::string spec) { if (spec.empty() || spec == "all") spec = "."; // Regexp that matches all benchmarks @@ -360,8 +566,7 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, std::unique_ptr<BenchmarkReporter> default_display_reporter; std::unique_ptr<BenchmarkReporter> default_file_reporter; if (!display_reporter) { - default_display_reporter = internal::CreateReporter( - FLAGS_benchmark_format, internal::GetOutputOptions()); + default_display_reporter.reset(CreateDefaultDisplayReporter()); display_reporter = default_display_reporter.get(); } auto& Out = display_reporter->GetOutputStream(); @@ -377,12 +582,14 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, if (!fname.empty()) { output_file.open(fname); if (!output_file.is_open()) { - Err << "invalid file name: '" << fname << std::endl; + Err << "invalid file name: '" << fname << "'" << std::endl; std::exit(1); } if (!file_reporter) { default_file_reporter = internal::CreateReporter( - FLAGS_benchmark_out_format, ConsoleReporter::OO_None); + FLAGS_benchmark_out_format, FLAGS_benchmark_counters_tabular + ? ConsoleReporter::OO_Tabular + : ConsoleReporter::OO_None); file_reporter = default_file_reporter.get(); } file_reporter->SetOutputStream(&output_file); @@ -399,7 +606,7 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, if (FLAGS_benchmark_list_tests) { for (auto const& benchmark : benchmarks) - Out << benchmark.name.str() << "\n"; + Out << benchmark.name().str() << "\n"; } else { internal::RunBenchmarks(benchmarks, display_reporter, file_reporter); } @@ -407,30 +614,64 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, return benchmarks.size(); } +namespace { +// stores the time unit benchmarks use by default +TimeUnit default_time_unit = kNanosecond; +} // namespace + +TimeUnit GetDefaultTimeUnit() { return default_time_unit; } + +void SetDefaultTimeUnit(TimeUnit unit) { default_time_unit = unit; } + +std::string GetBenchmarkFilter() { return FLAGS_benchmark_filter; } + +void SetBenchmarkFilter(std::string value) { + FLAGS_benchmark_filter = std::move(value); +} + +int32_t GetBenchmarkVerbosity() { return FLAGS_v; } + void RegisterMemoryManager(MemoryManager* manager) { internal::memory_manager = manager; } +void AddCustomContext(const std::string& key, const std::string& value) { + if (internal::global_context == nullptr) { + internal::global_context = new std::map<std::string, std::string>(); + } + if (!internal::global_context->emplace(key, value).second) { + std::cerr << "Failed to add custom context \"" << key << "\" as it already " + << "exists with value \"" << value << "\"\n"; + } +} + namespace internal { +void (*HelperPrintf)(); + void PrintUsageAndExit() { - fprintf(stdout, - "benchmark" - " [--benchmark_list_tests={true|false}]\n" - " [--benchmark_filter=<regex>]\n" - " [--benchmark_min_time=<min_time>]\n" - " [--benchmark_repetitions=<num_repetitions>]\n" - " [--benchmark_report_aggregates_only={true|false}]\n" - " [--benchmark_display_aggregates_only={true|false}]\n" - " [--benchmark_format=<console|json|csv>]\n" - " [--benchmark_out=<filename>]\n" - " [--benchmark_out_format=<json|console|csv>]\n" - " [--benchmark_color={auto|true|false}]\n" - " [--benchmark_counters_tabular={true|false}]\n" - " [--v=<verbosity>]\n"); + HelperPrintf(); exit(0); } +void SetDefaultTimeUnitFromFlag(const std::string& time_unit_flag) { + if (time_unit_flag == "s") { + return SetDefaultTimeUnit(kSecond); + } + if (time_unit_flag == "ms") { + return SetDefaultTimeUnit(kMillisecond); + } + if (time_unit_flag == "us") { + return SetDefaultTimeUnit(kMicrosecond); + } + if (time_unit_flag == "ns") { + return SetDefaultTimeUnit(kNanosecond); + } + if (!time_unit_flag.empty()) { + PrintUsageAndExit(); + } +} + void ParseCommandLineFlags(int* argc, char** argv) { using namespace benchmark; BenchmarkReporter::Context::executable_name = @@ -439,10 +680,14 @@ void ParseCommandLineFlags(int* argc, char** argv) { if (ParseBoolFlag(argv[i], "benchmark_list_tests", &FLAGS_benchmark_list_tests) || ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) || - ParseDoubleFlag(argv[i], "benchmark_min_time", + ParseStringFlag(argv[i], "benchmark_min_time", &FLAGS_benchmark_min_time) || + ParseDoubleFlag(argv[i], "benchmark_min_warmup_time", + &FLAGS_benchmark_min_warmup_time) || ParseInt32Flag(argv[i], "benchmark_repetitions", &FLAGS_benchmark_repetitions) || + ParseBoolFlag(argv[i], "benchmark_enable_random_interleaving", + &FLAGS_benchmark_enable_random_interleaving) || ParseBoolFlag(argv[i], "benchmark_report_aggregates_only", &FLAGS_benchmark_report_aggregates_only) || ParseBoolFlag(argv[i], "benchmark_display_aggregates_only", @@ -452,11 +697,14 @@ void ParseCommandLineFlags(int* argc, char** argv) { ParseStringFlag(argv[i], "benchmark_out_format", &FLAGS_benchmark_out_format) || ParseStringFlag(argv[i], "benchmark_color", &FLAGS_benchmark_color) || - // "color_print" is the deprecated name for "benchmark_color". - // TODO: Remove this. - ParseStringFlag(argv[i], "color_print", &FLAGS_benchmark_color) || ParseBoolFlag(argv[i], "benchmark_counters_tabular", &FLAGS_benchmark_counters_tabular) || + ParseStringFlag(argv[i], "benchmark_perf_counters", + &FLAGS_benchmark_perf_counters) || + ParseKeyValueFlag(argv[i], "benchmark_context", + &FLAGS_benchmark_context) || + ParseStringFlag(argv[i], "benchmark_time_unit", + &FLAGS_benchmark_time_unit) || ParseInt32Flag(argv[i], "v", &FLAGS_v)) { for (int j = i; j != *argc - 1; ++j) argv[j] = argv[j + 1]; @@ -467,13 +715,18 @@ void ParseCommandLineFlags(int* argc, char** argv) { } } for (auto const* flag : - {&FLAGS_benchmark_format, &FLAGS_benchmark_out_format}) + {&FLAGS_benchmark_format, &FLAGS_benchmark_out_format}) { if (*flag != "console" && *flag != "json" && *flag != "csv") { PrintUsageAndExit(); } + } + SetDefaultTimeUnitFromFlag(FLAGS_benchmark_time_unit); if (FLAGS_benchmark_color.empty()) { PrintUsageAndExit(); } + for (const auto& kv : FLAGS_benchmark_context) { + AddCustomContext(kv.first, kv.second); + } } int InitializeStreams() { @@ -483,11 +736,38 @@ int InitializeStreams() { } // end namespace internal -void Initialize(int* argc, char** argv) { +void PrintDefaultHelp() { + fprintf(stdout, + "benchmark" + " [--benchmark_list_tests={true|false}]\n" + " [--benchmark_filter=<regex>]\n" + " [--benchmark_min_time=`<integer>x` OR `<float>s` ]\n" + " [--benchmark_min_warmup_time=<min_warmup_time>]\n" + " [--benchmark_repetitions=<num_repetitions>]\n" + " [--benchmark_enable_random_interleaving={true|false}]\n" + " [--benchmark_report_aggregates_only={true|false}]\n" + " [--benchmark_display_aggregates_only={true|false}]\n" + " [--benchmark_format=<console|json|csv>]\n" + " [--benchmark_out=<filename>]\n" + " [--benchmark_out_format=<json|console|csv>]\n" + " [--benchmark_color={auto|true|false}]\n" + " [--benchmark_counters_tabular={true|false}]\n" +#if defined HAVE_LIBPFM + " [--benchmark_perf_counters=<counter>,...]\n" +#endif + " [--benchmark_context=<key>=<value>,...]\n" + " [--benchmark_time_unit={ns|us|ms|s}]\n" + " [--v=<verbosity>]\n"); +} + +void Initialize(int* argc, char** argv, void (*HelperPrintf)()) { + internal::HelperPrintf = HelperPrintf; internal::ParseCommandLineFlags(argc, argv); internal::LogLevel() = FLAGS_v; } +void Shutdown() { delete internal::global_context; } + bool ReportUnrecognizedArguments(int argc, char** argv) { for (int i = 1; i < argc; ++i) { fprintf(stderr, "%s: error: unrecognized command-line flag: %s\n", argv[0], diff --git a/src/benchmark_api_internal.cc b/src/benchmark_api_internal.cc index d468a25..286f986 100644 --- a/src/benchmark_api_internal.cc +++ b/src/benchmark_api_internal.cc @@ -1,15 +1,118 @@ #include "benchmark_api_internal.h" +#include <cinttypes> + +#include "string_util.h" + namespace benchmark { namespace internal { -State BenchmarkInstance::Run(IterationCount iters, int thread_id, - internal::ThreadTimer* timer, - internal::ThreadManager* manager) const { - State st(iters, arg, thread_id, threads, timer, manager); - benchmark->Run(st); +BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, int family_idx, + int per_family_instance_idx, + const std::vector<int64_t>& args, + int thread_count) + : benchmark_(*benchmark), + family_index_(family_idx), + per_family_instance_index_(per_family_instance_idx), + aggregation_report_mode_(benchmark_.aggregation_report_mode_), + args_(args), + time_unit_(benchmark_.GetTimeUnit()), + measure_process_cpu_time_(benchmark_.measure_process_cpu_time_), + use_real_time_(benchmark_.use_real_time_), + use_manual_time_(benchmark_.use_manual_time_), + complexity_(benchmark_.complexity_), + complexity_lambda_(benchmark_.complexity_lambda_), + statistics_(benchmark_.statistics_), + repetitions_(benchmark_.repetitions_), + min_time_(benchmark_.min_time_), + min_warmup_time_(benchmark_.min_warmup_time_), + iterations_(benchmark_.iterations_), + threads_(thread_count) { + name_.function_name = benchmark_.name_; + + size_t arg_i = 0; + for (const auto& arg : args) { + if (!name_.args.empty()) { + name_.args += '/'; + } + + if (arg_i < benchmark->arg_names_.size()) { + const auto& arg_name = benchmark_.arg_names_[arg_i]; + if (!arg_name.empty()) { + name_.args += StrFormat("%s:", arg_name.c_str()); + } + } + + name_.args += StrFormat("%" PRId64, arg); + ++arg_i; + } + + if (!IsZero(benchmark->min_time_)) { + name_.min_time = StrFormat("min_time:%0.3f", benchmark_.min_time_); + } + + if (!IsZero(benchmark->min_warmup_time_)) { + name_.min_warmup_time = + StrFormat("min_warmup_time:%0.3f", benchmark_.min_warmup_time_); + } + + if (benchmark_.iterations_ != 0) { + name_.iterations = StrFormat( + "iterations:%lu", static_cast<unsigned long>(benchmark_.iterations_)); + } + + if (benchmark_.repetitions_ != 0) { + name_.repetitions = StrFormat("repeats:%d", benchmark_.repetitions_); + } + + if (benchmark_.measure_process_cpu_time_) { + name_.time_type = "process_time"; + } + + if (benchmark_.use_manual_time_) { + if (!name_.time_type.empty()) { + name_.time_type += '/'; + } + name_.time_type += "manual_time"; + } else if (benchmark_.use_real_time_) { + if (!name_.time_type.empty()) { + name_.time_type += '/'; + } + name_.time_type += "real_time"; + } + + if (!benchmark_.thread_counts_.empty()) { + name_.threads = StrFormat("threads:%d", threads_); + } + + setup_ = benchmark_.setup_; + teardown_ = benchmark_.teardown_; +} + +State BenchmarkInstance::Run( + IterationCount iters, int thread_id, internal::ThreadTimer* timer, + internal::ThreadManager* manager, + internal::PerfCountersMeasurement* perf_counters_measurement) const { + State st(name_.function_name, iters, args_, thread_id, threads_, timer, + manager, perf_counters_measurement); + benchmark_.Run(st); return st; } -} // internal -} // benchmark +void BenchmarkInstance::Setup() const { + if (setup_) { + State st(name_.function_name, /*iters*/ 1, args_, /*thread_id*/ 0, threads_, + nullptr, nullptr, nullptr); + setup_(st); + } +} + +void BenchmarkInstance::Teardown() const { + if (teardown_) { + State st(name_.function_name, /*iters*/ 1, args_, /*thread_id*/ 0, threads_, + nullptr, nullptr, nullptr); + teardown_(st); + } +} +} // namespace internal +} // namespace benchmark diff --git a/src/benchmark_api_internal.h b/src/benchmark_api_internal.h index 264eff9..94f5165 100644 --- a/src/benchmark_api_internal.h +++ b/src/benchmark_api_internal.h @@ -1,9 +1,6 @@ #ifndef BENCHMARK_API_INTERNAL_H #define BENCHMARK_API_INTERNAL_H -#include "benchmark/benchmark.h" -#include "commandlineflags.h" - #include <cmath> #include <iosfwd> #include <limits> @@ -11,32 +8,68 @@ #include <string> #include <vector> +#include "benchmark/benchmark.h" +#include "commandlineflags.h" + namespace benchmark { namespace internal { // Information kept per benchmark we may want to run -struct BenchmarkInstance { - BenchmarkName name; - Benchmark* benchmark; - AggregationReportMode aggregation_report_mode; - std::vector<int64_t> arg; - TimeUnit time_unit; - int range_multiplier; - bool measure_process_cpu_time; - bool use_real_time; - bool use_manual_time; - BigO complexity; - BigOFunc* complexity_lambda; - UserCounters counters; - const std::vector<Statistics>* statistics; - bool last_benchmark_instance; - int repetitions; - double min_time; - IterationCount iterations; - int threads; // Number of concurrent threads to us +class BenchmarkInstance { + public: + BenchmarkInstance(Benchmark* benchmark, int family_index, + int per_family_instance_index, + const std::vector<int64_t>& args, int threads); + + const BenchmarkName& name() const { return name_; } + int family_index() const { return family_index_; } + int per_family_instance_index() const { return per_family_instance_index_; } + AggregationReportMode aggregation_report_mode() const { + return aggregation_report_mode_; + } + TimeUnit time_unit() const { return time_unit_; } + bool measure_process_cpu_time() const { return measure_process_cpu_time_; } + bool use_real_time() const { return use_real_time_; } + bool use_manual_time() const { return use_manual_time_; } + BigO complexity() const { return complexity_; } + BigOFunc* complexity_lambda() const { return complexity_lambda_; } + const std::vector<Statistics>& statistics() const { return statistics_; } + int repetitions() const { return repetitions_; } + double min_time() const { return min_time_; } + double min_warmup_time() const { return min_warmup_time_; } + IterationCount iterations() const { return iterations_; } + int threads() const { return threads_; } + void Setup() const; + void Teardown() const; State Run(IterationCount iters, int thread_id, internal::ThreadTimer* timer, - internal::ThreadManager* manager) const; + internal::ThreadManager* manager, + internal::PerfCountersMeasurement* perf_counters_measurement) const; + + private: + BenchmarkName name_; + Benchmark& benchmark_; + const int family_index_; + const int per_family_instance_index_; + AggregationReportMode aggregation_report_mode_; + const std::vector<int64_t>& args_; + TimeUnit time_unit_; + bool measure_process_cpu_time_; + bool use_real_time_; + bool use_manual_time_; + BigO complexity_; + BigOFunc* complexity_lambda_; + UserCounters counters_; + const std::vector<Statistics>& statistics_; + int repetitions_; + double min_time_; + double min_warmup_time_; + IterationCount iterations_; + int threads_; // Number of concurrent threads to us + + typedef void (*callback_function)(const benchmark::State&); + callback_function setup_ = nullptr; + callback_function teardown_ = nullptr; }; bool FindBenchmarksInternal(const std::string& re, @@ -45,6 +78,7 @@ bool FindBenchmarksInternal(const std::string& re, bool IsZero(double n); +BENCHMARK_EXPORT ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color = false); } // end namespace internal diff --git a/src/benchmark_main.cc b/src/benchmark_main.cc index b3b2478..cd61cd2 100644 --- a/src/benchmark_main.cc +++ b/src/benchmark_main.cc @@ -14,4 +14,5 @@ #include "benchmark/benchmark.h" +BENCHMARK_EXPORT int main(int, char**); BENCHMARK_MAIN(); diff --git a/src/benchmark_name.cc b/src/benchmark_name.cc index 2a17ebc..01676bb 100644 --- a/src/benchmark_name.cc +++ b/src/benchmark_name.cc @@ -51,8 +51,9 @@ std::string join(char delimiter, const Ts&... ts) { } } // namespace +BENCHMARK_EXPORT std::string BenchmarkName::str() const { - return join('/', function_name, args, min_time, iterations, repetitions, - time_type, threads); + return join('/', function_name, args, min_time, min_warmup_time, iterations, + repetitions, time_type, threads); } } // namespace benchmark diff --git a/src/benchmark_register.cc b/src/benchmark_register.cc index 65d9944..e447c9a 100644 --- a/src/benchmark_register.cc +++ b/src/benchmark_register.cc @@ -15,7 +15,7 @@ #include "benchmark_register.h" #ifndef BENCHMARK_OS_WINDOWS -#ifndef BENCHMARK_OS_FUCHSIA +#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT) #include <sys/resource.h> #endif #include <sys/time.h> @@ -24,6 +24,7 @@ #include <algorithm> #include <atomic> +#include <cinttypes> #include <condition_variable> #include <cstdio> #include <cstdlib> @@ -35,11 +36,6 @@ #include <sstream> #include <thread> -#ifndef __STDC_FORMAT_MACROS -#define __STDC_FORMAT_MACROS -#endif -#include <inttypes.h> - #include "benchmark/benchmark.h" #include "benchmark_api_internal.h" #include "check.h" @@ -57,10 +53,13 @@ namespace benchmark { namespace { // For non-dense Range, intermediate values are powers of kRangeMultiplier. -static const int kRangeMultiplier = 8; +static constexpr int kRangeMultiplier = 8; + // The size of a benchmark family determines is the number of inputs to repeat // the benchmark on. If this is "large" then warn the user during configuration. -static const size_t kMaxFamilySize = 100; +static constexpr size_t kMaxFamilySize = 100; + +static constexpr char kDisabledPrefix[] = "DISABLED_"; } // end namespace namespace internal { @@ -115,15 +114,15 @@ void BenchmarkFamilies::ClearBenchmarks() { bool BenchmarkFamilies::FindBenchmarks( std::string spec, std::vector<BenchmarkInstance>* benchmarks, std::ostream* ErrStream) { - CHECK(ErrStream); + BM_CHECK(ErrStream); auto& Err = *ErrStream; // Make regular expression out of command-line flag std::string error_msg; Regex re; - bool isNegativeFilter = false; + bool is_negative_filter = false; if (spec[0] == '-') { spec.replace(0, 1, ""); - isNegativeFilter = true; + is_negative_filter = true; } if (!re.Init(spec, &error_msg)) { Err << "Could not compile benchmark re: " << error_msg << std::endl; @@ -133,8 +132,13 @@ bool BenchmarkFamilies::FindBenchmarks( // Special list of thread counts to use when none are specified const std::vector<int> one_thread = {1}; + int next_family_index = 0; + MutexLock l(mutex_); for (std::unique_ptr<Benchmark>& family : families_) { + int family_index = next_family_index; + int per_family_instance_index = 0; + // Family was deleted or benchmark doesn't match if (!family) continue; @@ -153,85 +157,27 @@ bool BenchmarkFamilies::FindBenchmarks( << " will be repeated at least " << family_size << " times.\n"; } // reserve in the special case the regex ".", since we know the final - // family size. - if (spec == ".") benchmarks->reserve(family_size); + // family size. this doesn't take into account any disabled benchmarks + // so worst case we reserve more than we need. + if (spec == ".") benchmarks->reserve(benchmarks->size() + family_size); for (auto const& args : family->args_) { for (int num_threads : *thread_counts) { - BenchmarkInstance instance; - instance.name.function_name = family->name_; - instance.benchmark = family.get(); - instance.aggregation_report_mode = family->aggregation_report_mode_; - instance.arg = args; - instance.time_unit = family->time_unit_; - instance.range_multiplier = family->range_multiplier_; - instance.min_time = family->min_time_; - instance.iterations = family->iterations_; - instance.repetitions = family->repetitions_; - instance.measure_process_cpu_time = family->measure_process_cpu_time_; - instance.use_real_time = family->use_real_time_; - instance.use_manual_time = family->use_manual_time_; - instance.complexity = family->complexity_; - instance.complexity_lambda = family->complexity_lambda_; - instance.statistics = &family->statistics_; - instance.threads = num_threads; - - // Add arguments to instance name - size_t arg_i = 0; - for (auto const& arg : args) { - if (!instance.name.args.empty()) { - instance.name.args += '/'; - } - - if (arg_i < family->arg_names_.size()) { - const auto& arg_name = family->arg_names_[arg_i]; - if (!arg_name.empty()) { - instance.name.args += StrFormat("%s:", arg_name.c_str()); - } - } - - instance.name.args += StrFormat("%" PRId64, arg); - ++arg_i; - } - - if (!IsZero(family->min_time_)) - instance.name.min_time = - StrFormat("min_time:%0.3f", family->min_time_); - if (family->iterations_ != 0) { - instance.name.iterations = - StrFormat("iterations:%lu", - static_cast<unsigned long>(family->iterations_)); - } - if (family->repetitions_ != 0) - instance.name.repetitions = - StrFormat("repeats:%d", family->repetitions_); - - if (family->measure_process_cpu_time_) { - instance.name.time_type = "process_time"; - } + BenchmarkInstance instance(family.get(), family_index, + per_family_instance_index, args, + num_threads); + + const auto full_name = instance.name().str(); + if (full_name.rfind(kDisabledPrefix, 0) != 0 && + ((re.Match(full_name) && !is_negative_filter) || + (!re.Match(full_name) && is_negative_filter))) { + benchmarks->push_back(std::move(instance)); - if (family->use_manual_time_) { - if (!instance.name.time_type.empty()) { - instance.name.time_type += '/'; - } - instance.name.time_type += "manual_time"; - } else if (family->use_real_time_) { - if (!instance.name.time_type.empty()) { - instance.name.time_type += '/'; - } - instance.name.time_type += "real_time"; - } + ++per_family_instance_index; - // Add the number of threads used to the name - if (!family->thread_counts_.empty()) { - instance.name.threads = StrFormat("threads:%d", instance.threads); - } - - const auto full_name = instance.name.str(); - if ((re.Match(full_name) && !isNegativeFilter) || - (!re.Match(full_name) && isNegativeFilter)) { - instance.last_benchmark_instance = (&args == &family->args_.back()); - benchmarks->push_back(std::move(instance)); + // Only bump the next family index once we've estabilished that + // at least one instance of this family will be run. + if (next_family_index == family_index) ++next_family_index; } } } @@ -258,39 +204,50 @@ bool FindBenchmarksInternal(const std::string& re, // Benchmark //=============================================================================// -Benchmark::Benchmark(const char* name) +Benchmark::Benchmark(const std::string& name) : name_(name), aggregation_report_mode_(ARM_Unspecified), - time_unit_(kNanosecond), + time_unit_(GetDefaultTimeUnit()), + use_default_time_unit_(true), range_multiplier_(kRangeMultiplier), min_time_(0), + min_warmup_time_(0), iterations_(0), repetitions_(0), measure_process_cpu_time_(false), use_real_time_(false), use_manual_time_(false), complexity_(oNone), - complexity_lambda_(nullptr) { + complexity_lambda_(nullptr), + setup_(nullptr), + teardown_(nullptr) { ComputeStatistics("mean", StatisticsMean); ComputeStatistics("median", StatisticsMedian); ComputeStatistics("stddev", StatisticsStdDev); + ComputeStatistics("cv", StatisticsCV, kPercentage); } Benchmark::~Benchmark() {} +Benchmark* Benchmark::Name(const std::string& name) { + SetName(name); + return this; +} + Benchmark* Benchmark::Arg(int64_t x) { - CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); + BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); args_.push_back({x}); return this; } Benchmark* Benchmark::Unit(TimeUnit unit) { time_unit_ = unit; + use_default_time_unit_ = false; return this; } Benchmark* Benchmark::Range(int64_t start, int64_t limit) { - CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); + BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); std::vector<int64_t> arglist; AddRange(&arglist, start, limit, range_multiplier_); @@ -302,7 +259,7 @@ Benchmark* Benchmark::Range(int64_t start, int64_t limit) { Benchmark* Benchmark::Ranges( const std::vector<std::pair<int64_t, int64_t>>& ranges) { - CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(ranges.size())); + BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(ranges.size())); std::vector<std::vector<int64_t>> arglists(ranges.size()); for (std::size_t i = 0; i < ranges.size(); i++) { AddRange(&arglists[i], ranges[i].first, ranges[i].second, @@ -316,7 +273,7 @@ Benchmark* Benchmark::Ranges( Benchmark* Benchmark::ArgsProduct( const std::vector<std::vector<int64_t>>& arglists) { - CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(arglists.size())); + BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(arglists.size())); std::vector<std::size_t> indices(arglists.size()); const std::size_t total = std::accumulate( @@ -343,20 +300,20 @@ Benchmark* Benchmark::ArgsProduct( } Benchmark* Benchmark::ArgName(const std::string& name) { - CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); + BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); arg_names_ = {name}; return this; } Benchmark* Benchmark::ArgNames(const std::vector<std::string>& names) { - CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(names.size())); + BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(names.size())); arg_names_ = names; return this; } Benchmark* Benchmark::DenseRange(int64_t start, int64_t limit, int step) { - CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); - CHECK_LE(start, limit); + BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); + BM_CHECK_LE(start, limit); for (int64_t arg = start; arg <= limit; arg += step) { args_.push_back({arg}); } @@ -364,7 +321,7 @@ Benchmark* Benchmark::DenseRange(int64_t start, int64_t limit, int step) { } Benchmark* Benchmark::Args(const std::vector<int64_t>& args) { - CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(args.size())); + BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(args.size())); args_.push_back(args); return this; } @@ -374,28 +331,48 @@ Benchmark* Benchmark::Apply(void (*custom_arguments)(Benchmark* benchmark)) { return this; } +Benchmark* Benchmark::Setup(void (*setup)(const benchmark::State&)) { + BM_CHECK(setup != nullptr); + setup_ = setup; + return this; +} + +Benchmark* Benchmark::Teardown(void (*teardown)(const benchmark::State&)) { + BM_CHECK(teardown != nullptr); + teardown_ = teardown; + return this; +} + Benchmark* Benchmark::RangeMultiplier(int multiplier) { - CHECK(multiplier > 1); + BM_CHECK(multiplier > 1); range_multiplier_ = multiplier; return this; } Benchmark* Benchmark::MinTime(double t) { - CHECK(t > 0.0); - CHECK(iterations_ == 0); + BM_CHECK(t > 0.0); + BM_CHECK(iterations_ == 0); min_time_ = t; return this; } +Benchmark* Benchmark::MinWarmUpTime(double t) { + BM_CHECK(t >= 0.0); + BM_CHECK(iterations_ == 0); + min_warmup_time_ = t; + return this; +} + Benchmark* Benchmark::Iterations(IterationCount n) { - CHECK(n > 0); - CHECK(IsZero(min_time_)); + BM_CHECK(n > 0); + BM_CHECK(IsZero(min_time_)); + BM_CHECK(IsZero(min_warmup_time_)); iterations_ = n; return this; } Benchmark* Benchmark::Repetitions(int n) { - CHECK(n > 0); + BM_CHECK(n > 0); repetitions_ = n; return this; } @@ -428,14 +405,14 @@ Benchmark* Benchmark::MeasureProcessCPUTime() { } Benchmark* Benchmark::UseRealTime() { - CHECK(!use_manual_time_) + BM_CHECK(!use_manual_time_) << "Cannot set UseRealTime and UseManualTime simultaneously."; use_real_time_ = true; return this; } Benchmark* Benchmark::UseManualTime() { - CHECK(!use_real_time_) + BM_CHECK(!use_real_time_) << "Cannot set UseRealTime and UseManualTime simultaneously."; use_manual_time_ = true; return this; @@ -452,21 +429,22 @@ Benchmark* Benchmark::Complexity(BigOFunc* complexity) { return this; } -Benchmark* Benchmark::ComputeStatistics(std::string name, - StatisticsFunc* statistics) { - statistics_.emplace_back(name, statistics); +Benchmark* Benchmark::ComputeStatistics(const std::string& name, + StatisticsFunc* statistics, + StatisticUnit unit) { + statistics_.emplace_back(name, statistics, unit); return this; } Benchmark* Benchmark::Threads(int t) { - CHECK_GT(t, 0); + BM_CHECK_GT(t, 0); thread_counts_.push_back(t); return this; } Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) { - CHECK_GT(min_threads, 0); - CHECK_GE(max_threads, min_threads); + BM_CHECK_GT(min_threads, 0); + BM_CHECK_GE(max_threads, min_threads); AddRange(&thread_counts_, min_threads, max_threads, 2); return this; @@ -474,9 +452,9 @@ Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) { Benchmark* Benchmark::DenseThreadRange(int min_threads, int max_threads, int stride) { - CHECK_GT(min_threads, 0); - CHECK_GE(max_threads, min_threads); - CHECK_GE(stride, 1); + BM_CHECK_GT(min_threads, 0); + BM_CHECK_GE(max_threads, min_threads); + BM_CHECK_GE(stride, 1); for (auto i = min_threads; i < max_threads; i += stride) { thread_counts_.push_back(i); @@ -490,7 +468,9 @@ Benchmark* Benchmark::ThreadPerCpu() { return this; } -void Benchmark::SetName(const char* name) { name_ = name; } +void Benchmark::SetName(const std::string& name) { name_ = name; } + +const char* Benchmark::GetName() const { return name_.c_str(); } int Benchmark::ArgsCnt() const { if (args_.empty()) { @@ -500,6 +480,16 @@ int Benchmark::ArgsCnt() const { return static_cast<int>(args_.front().size()); } +const char* Benchmark::GetArgName(int arg) const { + BM_CHECK_GE(arg, 0); + BM_CHECK_LT(arg, static_cast<int>(arg_names_.size())); + return arg_names_[arg].c_str(); +} + +TimeUnit Benchmark::GetTimeUnit() const { + return use_default_time_unit_ ? GetDefaultTimeUnit() : time_unit_; +} + //=============================================================================// // FunctionBenchmark //=============================================================================// @@ -512,4 +502,19 @@ void ClearRegisteredBenchmarks() { internal::BenchmarkFamilies::GetInstance()->ClearBenchmarks(); } +std::vector<int64_t> CreateRange(int64_t lo, int64_t hi, int multi) { + std::vector<int64_t> args; + internal::AddRange(&args, lo, hi, multi); + return args; +} + +std::vector<int64_t> CreateDenseRange(int64_t start, int64_t limit, int step) { + BM_CHECK_LE(start, limit); + std::vector<int64_t> args; + for (int64_t arg = start; arg <= limit; arg += step) { + args.push_back(arg); + } + return args; +} + } // end namespace benchmark diff --git a/src/benchmark_register.h b/src/benchmark_register.h index c774e6f..53367c7 100644 --- a/src/benchmark_register.h +++ b/src/benchmark_register.h @@ -1,6 +1,7 @@ #ifndef BENCHMARK_REGISTER_H #define BENCHMARK_REGISTER_H +#include <algorithm> #include <limits> #include <vector> @@ -12,18 +13,18 @@ namespace internal { // Append the powers of 'mult' in the closed interval [lo, hi]. // Returns iterator to the start of the inserted range. template <typename T> -typename std::vector<T>::iterator -AddPowers(std::vector<T>* dst, T lo, T hi, int mult) { - CHECK_GE(lo, 0); - CHECK_GE(hi, lo); - CHECK_GE(mult, 2); +typename std::vector<T>::iterator AddPowers(std::vector<T>* dst, T lo, T hi, + int mult) { + BM_CHECK_GE(lo, 0); + BM_CHECK_GE(hi, lo); + BM_CHECK_GE(mult, 2); const size_t start_offset = dst->size(); static const T kmax = std::numeric_limits<T>::max(); // Space out the values in multiples of "mult" - for (T i = 1; i <= hi; i *= mult) { + for (T i = static_cast<T>(1); i <= hi; i *= static_cast<T>(mult)) { if (i >= lo) { dst->push_back(i); } @@ -32,16 +33,16 @@ AddPowers(std::vector<T>* dst, T lo, T hi, int mult) { if (i > kmax / mult) break; } - return dst->begin() + start_offset; + return dst->begin() + static_cast<int>(start_offset); } template <typename T> void AddNegatedPowers(std::vector<T>* dst, T lo, T hi, int mult) { // We negate lo and hi so we require that they cannot be equal to 'min'. - CHECK_GT(lo, std::numeric_limits<T>::min()); - CHECK_GT(hi, std::numeric_limits<T>::min()); - CHECK_GE(hi, lo); - CHECK_LE(hi, 0); + BM_CHECK_GT(lo, std::numeric_limits<T>::min()); + BM_CHECK_GT(hi, std::numeric_limits<T>::min()); + BM_CHECK_GE(hi, lo); + BM_CHECK_LE(hi, 0); // Add positive powers, then negate and reverse. // Casts necessary since small integers get promoted @@ -60,8 +61,8 @@ void AddRange(std::vector<T>* dst, T lo, T hi, int mult) { static_assert(std::is_integral<T>::value && std::is_signed<T>::value, "Args type must be a signed integer"); - CHECK_GE(hi, lo); - CHECK_GE(mult, 2); + BM_CHECK_GE(hi, lo); + BM_CHECK_GE(mult, 2); // Add "lo" dst->push_back(lo); diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc index 7bc6b63..f7ae424 100644 --- a/src/benchmark_runner.cc +++ b/src/benchmark_runner.cc @@ -13,12 +13,13 @@ // limitations under the License. #include "benchmark_runner.h" + #include "benchmark/benchmark.h" #include "benchmark_api_internal.h" #include "internal_macros.h" #ifndef BENCHMARK_OS_WINDOWS -#ifndef BENCHMARK_OS_FUCHSIA +#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT) #include <sys/resource.h> #endif #include <sys/time.h> @@ -27,11 +28,14 @@ #include <algorithm> #include <atomic> +#include <climits> +#include <cmath> #include <condition_variable> #include <cstdio> #include <cstdlib> #include <fstream> #include <iostream> +#include <limits> #include <memory> #include <string> #include <thread> @@ -45,6 +49,7 @@ #include "internal_macros.h" #include "log.h" #include "mutex.h" +#include "perf_counters.h" #include "re.h" #include "statistics.h" #include "string_util.h" @@ -60,64 +65,72 @@ MemoryManager* memory_manager = nullptr; namespace { static constexpr IterationCount kMaxIterations = 1000000000; +const double kDefaultMinTime = + std::strtod(::benchmark::kDefaultMinTimeStr, /*p_end*/ nullptr); BenchmarkReporter::Run CreateRunReport( const benchmark::internal::BenchmarkInstance& b, const internal::ThreadManager::Result& results, IterationCount memory_iterations, - const MemoryManager::Result& memory_result, double seconds, - int64_t repetition_index) { + const MemoryManager::Result* memory_result, double seconds, + int64_t repetition_index, int64_t repeats) { // Create report about this benchmark run. BenchmarkReporter::Run report; - report.run_name = b.name; - report.error_occurred = results.has_error_; - report.error_message = results.error_message_; + report.run_name = b.name(); + report.family_index = b.family_index(); + report.per_family_instance_index = b.per_family_instance_index(); + report.skipped = results.skipped_; + report.skip_message = results.skip_message_; report.report_label = results.report_label_; // This is the total iterations across all threads. report.iterations = results.iterations; - report.time_unit = b.time_unit; - report.threads = b.threads; + report.time_unit = b.time_unit(); + report.threads = b.threads(); report.repetition_index = repetition_index; - report.repetitions = b.repetitions; + report.repetitions = repeats; - if (!report.error_occurred) { - if (b.use_manual_time) { + if (!report.skipped) { + if (b.use_manual_time()) { report.real_accumulated_time = results.manual_time_used; } else { report.real_accumulated_time = results.real_time_used; } report.cpu_accumulated_time = results.cpu_time_used; report.complexity_n = results.complexity_n; - report.complexity = b.complexity; - report.complexity_lambda = b.complexity_lambda; - report.statistics = b.statistics; + report.complexity = b.complexity(); + report.complexity_lambda = b.complexity_lambda(); + report.statistics = &b.statistics(); report.counters = results.counters; if (memory_iterations > 0) { - report.has_memory_result = true; + assert(memory_result != nullptr); + report.memory_result = memory_result; report.allocs_per_iter = - memory_iterations ? static_cast<double>(memory_result.num_allocs) / + memory_iterations ? static_cast<double>(memory_result->num_allocs) / memory_iterations : 0; - report.max_bytes_used = memory_result.max_bytes_used; } - internal::Finish(&report.counters, results.iterations, seconds, b.threads); + internal::Finish(&report.counters, results.iterations, seconds, + b.threads()); } return report; } // Execute one thread of benchmark b for the specified number of iterations. -// Adds the stats collected for the thread into *total. +// Adds the stats collected for the thread into manager->results. void RunInThread(const BenchmarkInstance* b, IterationCount iters, - int thread_id, ThreadManager* manager) { + int thread_id, ThreadManager* manager, + PerfCountersMeasurement* perf_counters_measurement) { internal::ThreadTimer timer( - b->measure_process_cpu_time + b->measure_process_cpu_time() ? internal::ThreadTimer::CreateProcessCpuTime() : internal::ThreadTimer::Create()); - State st = b->Run(iters, thread_id, &timer, manager); - CHECK(st.error_occurred() || st.iterations() >= st.max_iterations) + + State st = + b->Run(iters, thread_id, &timer, manager, perf_counters_measurement); + BM_CHECK(st.skipped() || st.iterations() >= st.max_iterations) << "Benchmark returned before State::KeepRunning() returned false!"; { MutexLock l(manager->GetBenchmarkMutex()); @@ -132,229 +145,351 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters, manager->NotifyThreadComplete(); } -class BenchmarkRunner { - public: - BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_, - std::vector<BenchmarkReporter::Run>* complexity_reports_) - : b(b_), - complexity_reports(*complexity_reports_), - min_time(!IsZero(b.min_time) ? b.min_time : FLAGS_benchmark_min_time), - repeats(b.repetitions != 0 ? b.repetitions +double ComputeMinTime(const benchmark::internal::BenchmarkInstance& b, + const BenchTimeType& iters_or_time) { + if (!IsZero(b.min_time())) return b.min_time(); + // If the flag was used to specify number of iters, then return the default + // min_time. + if (iters_or_time.tag == BenchTimeType::ITERS) return kDefaultMinTime; + + return iters_or_time.time; +} + +IterationCount ComputeIters(const benchmark::internal::BenchmarkInstance& b, + const BenchTimeType& iters_or_time) { + if (b.iterations() != 0) return b.iterations(); + + // We've already concluded that this flag is currently used to pass + // iters but do a check here again anyway. + BM_CHECK(iters_or_time.tag == BenchTimeType::ITERS); + return iters_or_time.iters; +} + +} // end namespace + +BenchTimeType ParseBenchMinTime(const std::string& value) { + BenchTimeType ret; + + if (value.empty()) { + ret.tag = BenchTimeType::TIME; + ret.time = 0.0; + return ret; + } + + if (value.back() == 'x') { + char* p_end; + // Reset errno before it's changed by strtol. + errno = 0; + IterationCount num_iters = std::strtol(value.c_str(), &p_end, 10); + + // After a valid parse, p_end should have been set to + // point to the 'x' suffix. + BM_CHECK(errno == 0 && p_end != nullptr && *p_end == 'x') + << "Malformed iters value passed to --benchmark_min_time: `" << value + << "`. Expected --benchmark_min_time=<integer>x."; + + ret.tag = BenchTimeType::ITERS; + ret.iters = num_iters; + return ret; + } + + bool has_suffix = value.back() == 's'; + if (!has_suffix) { + BM_VLOG(0) << "Value passed to --benchmark_min_time should have a suffix. " + "Eg., `30s` for 30-seconds."; + } + + char* p_end; + // Reset errno before it's changed by strtod. + errno = 0; + double min_time = std::strtod(value.c_str(), &p_end); + + // After a successful parse, p_end should point to the suffix 's', + // or the end of the string if the suffix was omitted. + BM_CHECK(errno == 0 && p_end != nullptr && + ((has_suffix && *p_end == 's') || *p_end == '\0')) + << "Malformed seconds value passed to --benchmark_min_time: `" << value + << "`. Expected --benchmark_min_time=<float>x."; + + ret.tag = BenchTimeType::TIME; + ret.time = min_time; + + return ret; +} + +BenchmarkRunner::BenchmarkRunner( + const benchmark::internal::BenchmarkInstance& b_, + PerfCountersMeasurement* pcm_, + BenchmarkReporter::PerFamilyRunReports* reports_for_family_) + : b(b_), + reports_for_family(reports_for_family_), + parsed_benchtime_flag(ParseBenchMinTime(FLAGS_benchmark_min_time)), + min_time(ComputeMinTime(b_, parsed_benchtime_flag)), + min_warmup_time((!IsZero(b.min_time()) && b.min_warmup_time() > 0.0) + ? b.min_warmup_time() + : FLAGS_benchmark_min_warmup_time), + warmup_done(!(min_warmup_time > 0.0)), + repeats(b.repetitions() != 0 ? b.repetitions() : FLAGS_benchmark_repetitions), - has_explicit_iteration_count(b.iterations != 0), - pool(b.threads - 1), - iters(has_explicit_iteration_count ? b.iterations : 1) { + has_explicit_iteration_count(b.iterations() != 0 || + parsed_benchtime_flag.tag == + BenchTimeType::ITERS), + pool(b.threads() - 1), + iters(has_explicit_iteration_count + ? ComputeIters(b_, parsed_benchtime_flag) + : 1), + perf_counters_measurement_ptr(pcm_) { + run_results.display_report_aggregates_only = + (FLAGS_benchmark_report_aggregates_only || + FLAGS_benchmark_display_aggregates_only); + run_results.file_report_aggregates_only = + FLAGS_benchmark_report_aggregates_only; + if (b.aggregation_report_mode() != internal::ARM_Unspecified) { run_results.display_report_aggregates_only = - (FLAGS_benchmark_report_aggregates_only || - FLAGS_benchmark_display_aggregates_only); + (b.aggregation_report_mode() & + internal::ARM_DisplayReportAggregatesOnly); run_results.file_report_aggregates_only = - FLAGS_benchmark_report_aggregates_only; - if (b.aggregation_report_mode != internal::ARM_Unspecified) { - run_results.display_report_aggregates_only = - (b.aggregation_report_mode & - internal::ARM_DisplayReportAggregatesOnly); - run_results.file_report_aggregates_only = - (b.aggregation_report_mode & internal::ARM_FileReportAggregatesOnly); - } + (b.aggregation_report_mode() & internal::ARM_FileReportAggregatesOnly); + BM_CHECK(FLAGS_benchmark_perf_counters.empty() || + (perf_counters_measurement_ptr->num_counters() == 0)) + << "Perf counters were requested but could not be set up."; + } +} - for (int repetition_num = 0; repetition_num < repeats; repetition_num++) { - DoOneRepetition(repetition_num); - } +BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() { + BM_VLOG(2) << "Running " << b.name().str() << " for " << iters << "\n"; - // Calculate additional statistics - run_results.aggregates_only = ComputeStats(run_results.non_aggregates); + std::unique_ptr<internal::ThreadManager> manager; + manager.reset(new internal::ThreadManager(b.threads())); - // Maybe calculate complexity report - if ((b.complexity != oNone) && b.last_benchmark_instance) { - auto additional_run_stats = ComputeBigO(complexity_reports); - run_results.aggregates_only.insert(run_results.aggregates_only.end(), - additional_run_stats.begin(), - additional_run_stats.end()); - complexity_reports.clear(); - } + // Run all but one thread in separate threads + for (std::size_t ti = 0; ti < pool.size(); ++ti) { + pool[ti] = std::thread(&RunInThread, &b, iters, static_cast<int>(ti + 1), + manager.get(), perf_counters_measurement_ptr); } + // And run one thread here directly. + // (If we were asked to run just one thread, we don't create new threads.) + // Yes, we need to do this here *after* we start the separate threads. + RunInThread(&b, iters, 0, manager.get(), perf_counters_measurement_ptr); - RunResults&& get_results() { return std::move(run_results); } + // The main thread has finished. Now let's wait for the other threads. + manager->WaitForAllThreads(); + for (std::thread& thread : pool) thread.join(); - private: - RunResults run_results; + IterationResults i; + // Acquire the measurements/counters from the manager, UNDER THE LOCK! + { + MutexLock l(manager->GetBenchmarkMutex()); + i.results = manager->results; + } - const benchmark::internal::BenchmarkInstance& b; - std::vector<BenchmarkReporter::Run>& complexity_reports; + // And get rid of the manager. + manager.reset(); - const double min_time; - const int repeats; - const bool has_explicit_iteration_count; + // Adjust real/manual time stats since they were reported per thread. + i.results.real_time_used /= b.threads(); + i.results.manual_time_used /= b.threads(); + // If we were measuring whole-process CPU usage, adjust the CPU time too. + if (b.measure_process_cpu_time()) i.results.cpu_time_used /= b.threads(); - std::vector<std::thread> pool; + BM_VLOG(2) << "Ran in " << i.results.cpu_time_used << "/" + << i.results.real_time_used << "\n"; - IterationCount iters; // preserved between repetitions! - // So only the first repetition has to find/calculate it, - // the other repetitions will just use that precomputed iteration count. + // By using KeepRunningBatch a benchmark can iterate more times than + // requested, so take the iteration count from i.results. + i.iters = i.results.iterations / b.threads(); - struct IterationResults { - internal::ThreadManager::Result results; - IterationCount iters; - double seconds; - }; - IterationResults DoNIterations() { - VLOG(2) << "Running " << b.name.str() << " for " << iters << "\n"; + // Base decisions off of real time if requested by this benchmark. + i.seconds = i.results.cpu_time_used; + if (b.use_manual_time()) { + i.seconds = i.results.manual_time_used; + } else if (b.use_real_time()) { + i.seconds = i.results.real_time_used; + } - std::unique_ptr<internal::ThreadManager> manager; - manager.reset(new internal::ThreadManager(b.threads)); + return i; +} - // Run all but one thread in separate threads - for (std::size_t ti = 0; ti < pool.size(); ++ti) { - pool[ti] = std::thread(&RunInThread, &b, iters, static_cast<int>(ti + 1), - manager.get()); - } - // And run one thread here directly. - // (If we were asked to run just one thread, we don't create new threads.) - // Yes, we need to do this here *after* we start the separate threads. - RunInThread(&b, iters, 0, manager.get()); +IterationCount BenchmarkRunner::PredictNumItersNeeded( + const IterationResults& i) const { + // See how much iterations should be increased by. + // Note: Avoid division by zero with max(seconds, 1ns). + double multiplier = GetMinTimeToApply() * 1.4 / std::max(i.seconds, 1e-9); + // If our last run was at least 10% of FLAGS_benchmark_min_time then we + // use the multiplier directly. + // Otherwise we use at most 10 times expansion. + // NOTE: When the last run was at least 10% of the min time the max + // expansion should be 14x. + const bool is_significant = (i.seconds / GetMinTimeToApply()) > 0.1; + multiplier = is_significant ? multiplier : 10.0; + + // So what seems to be the sufficiently-large iteration count? Round up. + const IterationCount max_next_iters = static_cast<IterationCount>( + std::lround(std::max(multiplier * static_cast<double>(i.iters), + static_cast<double>(i.iters) + 1.0))); + // But we do have *some* limits though.. + const IterationCount next_iters = std::min(max_next_iters, kMaxIterations); + + BM_VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n"; + return next_iters; // round up before conversion to integer. +} - // The main thread has finished. Now let's wait for the other threads. - manager->WaitForAllThreads(); - for (std::thread& thread : pool) thread.join(); +bool BenchmarkRunner::ShouldReportIterationResults( + const IterationResults& i) const { + // Determine if this run should be reported; + // Either it has run for a sufficient amount of time + // or because an error was reported. + return i.results.skipped_ || + i.iters >= kMaxIterations || // Too many iterations already. + i.seconds >= + GetMinTimeToApply() || // The elapsed time is large enough. + // CPU time is specified but the elapsed real time greatly exceeds + // the minimum time. + // Note that user provided timers are except from this test. + ((i.results.real_time_used >= 5 * GetMinTimeToApply()) && + !b.use_manual_time()); +} - IterationResults i; - // Acquire the measurements/counters from the manager, UNDER THE LOCK! - { - MutexLock l(manager->GetBenchmarkMutex()); - i.results = manager->results; - } +double BenchmarkRunner::GetMinTimeToApply() const { + // In order to re-use functionality to run and measure benchmarks for running + // a warmup phase of the benchmark, we need a way of telling whether to apply + // min_time or min_warmup_time. This function will figure out if we are in the + // warmup phase and therefore need to apply min_warmup_time or if we already + // in the benchmarking phase and min_time needs to be applied. + return warmup_done ? min_time : min_warmup_time; +} - // And get rid of the manager. - manager.reset(); +void BenchmarkRunner::FinishWarmUp(const IterationCount& i) { + warmup_done = true; + iters = i; +} - // Adjust real/manual time stats since they were reported per thread. - i.results.real_time_used /= b.threads; - i.results.manual_time_used /= b.threads; - // If we were measuring whole-process CPU usage, adjust the CPU time too. - if (b.measure_process_cpu_time) i.results.cpu_time_used /= b.threads; - - VLOG(2) << "Ran in " << i.results.cpu_time_used << "/" - << i.results.real_time_used << "\n"; - - // So for how long were we running? - i.iters = iters; - // Base decisions off of real time if requested by this benchmark. - i.seconds = i.results.cpu_time_used; - if (b.use_manual_time) { - i.seconds = i.results.manual_time_used; - } else if (b.use_real_time) { - i.seconds = i.results.real_time_used; +void BenchmarkRunner::RunWarmUp() { + // Use the same mechanisms for warming up the benchmark as used for actually + // running and measuring the benchmark. + IterationResults i_warmup; + // Dont use the iterations determined in the warmup phase for the actual + // measured benchmark phase. While this may be a good starting point for the + // benchmark and it would therefore get rid of the need to figure out how many + // iterations are needed if min_time is set again, this may also be a complete + // wrong guess since the warmup loops might be considerably slower (e.g + // because of caching effects). + const IterationCount i_backup = iters; + + for (;;) { + b.Setup(); + i_warmup = DoNIterations(); + b.Teardown(); + + const bool finish = ShouldReportIterationResults(i_warmup); + + if (finish) { + FinishWarmUp(i_backup); + break; } - return i; + // Although we are running "only" a warmup phase where running enough + // iterations at once without measuring time isn't as important as it is for + // the benchmarking phase, we still do it the same way as otherwise it is + // very confusing for the user to know how to choose a proper value for + // min_warmup_time if a different approach on running it is used. + iters = PredictNumItersNeeded(i_warmup); + assert(iters > i_warmup.iters && + "if we did more iterations than we want to do the next time, " + "then we should have accepted the current iteration run."); } +} - IterationCount PredictNumItersNeeded(const IterationResults& i) const { - // See how much iterations should be increased by. - // Note: Avoid division by zero with max(seconds, 1ns). - double multiplier = min_time * 1.4 / std::max(i.seconds, 1e-9); - // If our last run was at least 10% of FLAGS_benchmark_min_time then we - // use the multiplier directly. - // Otherwise we use at most 10 times expansion. - // NOTE: When the last run was at least 10% of the min time the max - // expansion should be 14x. - bool is_significant = (i.seconds / min_time) > 0.1; - multiplier = is_significant ? multiplier : std::min(10.0, multiplier); - if (multiplier <= 1.0) multiplier = 2.0; - - // So what seems to be the sufficiently-large iteration count? Round up. - const IterationCount max_next_iters = static_cast<IterationCount>( - std::lround(std::max(multiplier * static_cast<double>(i.iters), - static_cast<double>(i.iters) + 1.0))); - // But we do have *some* sanity limits though.. - const IterationCount next_iters = std::min(max_next_iters, kMaxIterations); - - VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n"; - return next_iters; // round up before conversion to integer. +void BenchmarkRunner::DoOneRepetition() { + assert(HasRepeatsRemaining() && "Already done all repetitions?"); + + const bool is_the_first_repetition = num_repetitions_done == 0; + + // In case a warmup phase is requested by the benchmark, run it now. + // After running the warmup phase the BenchmarkRunner should be in a state as + // this warmup never happened except the fact that warmup_done is set. Every + // other manipulation of the BenchmarkRunner instance would be a bug! Please + // fix it. + if (!warmup_done) RunWarmUp(); + + IterationResults i; + // We *may* be gradually increasing the length (iteration count) + // of the benchmark until we decide the results are significant. + // And once we do, we report those last results and exit. + // Please do note that the if there are repetitions, the iteration count + // is *only* calculated for the *first* repetition, and other repetitions + // simply use that precomputed iteration count. + for (;;) { + b.Setup(); + i = DoNIterations(); + b.Teardown(); + + // Do we consider the results to be significant? + // If we are doing repetitions, and the first repetition was already done, + // it has calculated the correct iteration time, so we have run that very + // iteration count just now. No need to calculate anything. Just report. + // Else, the normal rules apply. + const bool results_are_significant = !is_the_first_repetition || + has_explicit_iteration_count || + ShouldReportIterationResults(i); + + if (results_are_significant) break; // Good, let's report them! + + // Nope, bad iteration. Let's re-estimate the hopefully-sufficient + // iteration count, and run the benchmark again... + + iters = PredictNumItersNeeded(i); + assert(iters > i.iters && + "if we did more iterations than we want to do the next time, " + "then we should have accepted the current iteration run."); } - bool ShouldReportIterationResults(const IterationResults& i) const { - // Determine if this run should be reported; - // Either it has run for a sufficient amount of time - // or because an error was reported. - return i.results.has_error_ || - i.iters >= kMaxIterations || // Too many iterations already. - i.seconds >= min_time || // The elapsed time is large enough. - // CPU time is specified but the elapsed real time greatly exceeds - // the minimum time. - // Note that user provided timers are except from this sanity check. - ((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time); + // Oh, one last thing, we need to also produce the 'memory measurements'.. + MemoryManager::Result* memory_result = nullptr; + IterationCount memory_iterations = 0; + if (memory_manager != nullptr) { + // TODO(vyng): Consider making BenchmarkReporter::Run::memory_result an + // optional so we don't have to own the Result here. + // Can't do it now due to cxx03. + memory_results.push_back(MemoryManager::Result()); + memory_result = &memory_results.back(); + // Only run a few iterations to reduce the impact of one-time + // allocations in benchmarks that are not properly managed. + memory_iterations = std::min<IterationCount>(16, iters); + memory_manager->Start(); + std::unique_ptr<internal::ThreadManager> manager; + manager.reset(new internal::ThreadManager(1)); + b.Setup(); + RunInThread(&b, memory_iterations, 0, manager.get(), + perf_counters_measurement_ptr); + manager->WaitForAllThreads(); + manager.reset(); + b.Teardown(); + memory_manager->Stop(*memory_result); } - void DoOneRepetition(int64_t repetition_index) { - const bool is_the_first_repetition = repetition_index == 0; - IterationResults i; - - // We *may* be gradually increasing the length (iteration count) - // of the benchmark until we decide the results are significant. - // And once we do, we report those last results and exit. - // Please do note that the if there are repetitions, the iteration count - // is *only* calculated for the *first* repetition, and other repetitions - // simply use that precomputed iteration count. - for (;;) { - i = DoNIterations(); - - // Do we consider the results to be significant? - // If we are doing repetitions, and the first repetition was already done, - // it has calculated the correct iteration time, so we have run that very - // iteration count just now. No need to calculate anything. Just report. - // Else, the normal rules apply. - const bool results_are_significant = !is_the_first_repetition || - has_explicit_iteration_count || - ShouldReportIterationResults(i); - - if (results_are_significant) break; // Good, let's report them! - - // Nope, bad iteration. Let's re-estimate the hopefully-sufficient - // iteration count, and run the benchmark again... - - iters = PredictNumItersNeeded(i); - assert(iters > i.iters && - "if we did more iterations than we want to do the next time, " - "then we should have accepted the current iteration run."); - } + // Ok, now actually report. + BenchmarkReporter::Run report = + CreateRunReport(b, i.results, memory_iterations, memory_result, i.seconds, + num_repetitions_done, repeats); - // Oh, one last thing, we need to also produce the 'memory measurements'.. - MemoryManager::Result memory_result; - IterationCount memory_iterations = 0; - if (memory_manager != nullptr) { - // Only run a few iterations to reduce the impact of one-time - // allocations in benchmarks that are not properly managed. - memory_iterations = std::min<IterationCount>(16, iters); - memory_manager->Start(); - std::unique_ptr<internal::ThreadManager> manager; - manager.reset(new internal::ThreadManager(1)); - RunInThread(&b, memory_iterations, 0, manager.get()); - manager->WaitForAllThreads(); - manager.reset(); - - memory_manager->Stop(&memory_result); - } + if (reports_for_family) { + ++reports_for_family->num_runs_done; + if (!report.skipped) reports_for_family->Runs.push_back(report); + } - // Ok, now actualy report. - BenchmarkReporter::Run report = - CreateRunReport(b, i.results, memory_iterations, memory_result, - i.seconds, repetition_index); + run_results.non_aggregates.push_back(report); - if (!report.error_occurred && b.complexity != oNone) - complexity_reports.push_back(report); + ++num_repetitions_done; +} - run_results.non_aggregates.push_back(report); - } -}; +RunResults&& BenchmarkRunner::GetResults() { + assert(!HasRepeatsRemaining() && "Did not run all repetitions yet?"); -} // end namespace + // Calculate additional statistics over the repetitions of this instance. + run_results.aggregates_only = ComputeStats(run_results.non_aggregates); -RunResults RunBenchmark( - const benchmark::internal::BenchmarkInstance& b, - std::vector<BenchmarkReporter::Run>* complexity_reports) { - internal::BenchmarkRunner r(b, complexity_reports); - return r.get_results(); + return std::move(run_results); } } // end namespace internal diff --git a/src/benchmark_runner.h b/src/benchmark_runner.h index 96e8282..db2fa04 100644 --- a/src/benchmark_runner.h +++ b/src/benchmark_runner.h @@ -15,19 +15,23 @@ #ifndef BENCHMARK_RUNNER_H_ #define BENCHMARK_RUNNER_H_ +#include <thread> +#include <vector> + #include "benchmark_api_internal.h" #include "internal_macros.h" - -DECLARE_double(benchmark_min_time); - -DECLARE_int32(benchmark_repetitions); - -DECLARE_bool(benchmark_report_aggregates_only); - -DECLARE_bool(benchmark_display_aggregates_only); +#include "perf_counters.h" +#include "thread_manager.h" namespace benchmark { +BM_DECLARE_string(benchmark_min_time); +BM_DECLARE_double(benchmark_min_warmup_time); +BM_DECLARE_int32(benchmark_repetitions); +BM_DECLARE_bool(benchmark_report_aggregates_only); +BM_DECLARE_bool(benchmark_display_aggregates_only); +BM_DECLARE_string(benchmark_perf_counters); + namespace internal { extern MemoryManager* memory_manager; @@ -40,9 +44,85 @@ struct RunResults { bool file_report_aggregates_only = false; }; -RunResults RunBenchmark( - const benchmark::internal::BenchmarkInstance& b, - std::vector<BenchmarkReporter::Run>* complexity_reports); +struct BENCHMARK_EXPORT BenchTimeType { + enum { ITERS, TIME } tag; + union { + IterationCount iters; + double time; + }; +}; + +BENCHMARK_EXPORT +BenchTimeType ParseBenchMinTime(const std::string& value); + +class BenchmarkRunner { + public: + BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_, + benchmark::internal::PerfCountersMeasurement* pmc_, + BenchmarkReporter::PerFamilyRunReports* reports_for_family); + + int GetNumRepeats() const { return repeats; } + + bool HasRepeatsRemaining() const { + return GetNumRepeats() != num_repetitions_done; + } + + void DoOneRepetition(); + + RunResults&& GetResults(); + + BenchmarkReporter::PerFamilyRunReports* GetReportsForFamily() const { + return reports_for_family; + } + + double GetMinTime() const { return min_time; } + + bool HasExplicitIters() const { return has_explicit_iteration_count; } + + IterationCount GetIters() const { return iters; } + + private: + RunResults run_results; + + const benchmark::internal::BenchmarkInstance& b; + BenchmarkReporter::PerFamilyRunReports* reports_for_family; + + BenchTimeType parsed_benchtime_flag; + const double min_time; + const double min_warmup_time; + bool warmup_done; + const int repeats; + const bool has_explicit_iteration_count; + + int num_repetitions_done = 0; + + std::vector<std::thread> pool; + + std::vector<MemoryManager::Result> memory_results; + + IterationCount iters; // preserved between repetitions! + // So only the first repetition has to find/calculate it, + // the other repetitions will just use that precomputed iteration count. + + PerfCountersMeasurement* const perf_counters_measurement_ptr = nullptr; + + struct IterationResults { + internal::ThreadManager::Result results; + IterationCount iters; + double seconds; + }; + IterationResults DoNIterations(); + + IterationCount PredictNumItersNeeded(const IterationResults& i) const; + + bool ShouldReportIterationResults(const IterationResults& i) const; + + double GetMinTimeToApply() const; + + void FinishWarmUp(const IterationCount& i); + + void RunWarmUp(); +}; } // namespace internal diff --git a/src/check.cc b/src/check.cc new file mode 100644 index 0000000..5f7526e --- /dev/null +++ b/src/check.cc @@ -0,0 +1,11 @@ +#include "check.h" + +namespace benchmark { +namespace internal { + +static AbortHandlerT* handler = &std::abort; + +BENCHMARK_EXPORT AbortHandlerT*& GetAbortHandler() { return handler; } + +} // namespace internal +} // namespace benchmark diff --git a/src/check.h b/src/check.h index f5f8253..c1cd5e8 100644 --- a/src/check.h +++ b/src/check.h @@ -5,26 +5,43 @@ #include <cstdlib> #include <ostream> +#include "benchmark/export.h" #include "internal_macros.h" #include "log.h" +#if defined(__GNUC__) || defined(__clang__) +#define BENCHMARK_NOEXCEPT noexcept +#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x) +#elif defined(_MSC_VER) && !defined(__clang__) +#if _MSC_VER >= 1900 +#define BENCHMARK_NOEXCEPT noexcept +#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x) +#else +#define BENCHMARK_NOEXCEPT +#define BENCHMARK_NOEXCEPT_OP(x) +#endif +#define __func__ __FUNCTION__ +#else +#define BENCHMARK_NOEXCEPT +#define BENCHMARK_NOEXCEPT_OP(x) +#endif + namespace benchmark { namespace internal { typedef void(AbortHandlerT)(); -inline AbortHandlerT*& GetAbortHandler() { - static AbortHandlerT* handler = &std::abort; - return handler; -} +BENCHMARK_EXPORT +AbortHandlerT*& GetAbortHandler(); BENCHMARK_NORETURN inline void CallAbortHandler() { GetAbortHandler()(); std::abort(); // fallback to enforce noreturn } -// CheckHandler is the class constructed by failing CHECK macros. CheckHandler -// will log information about the failures and abort when it is destructed. +// CheckHandler is the class constructed by failing BM_CHECK macros. +// CheckHandler will log information about the failures and abort when it is +// destructed. class CheckHandler { public: CheckHandler(const char* check, const char* file, const char* func, int line) @@ -35,10 +52,17 @@ class CheckHandler { LogType& GetLog() { return log_; } +#if defined(COMPILER_MSVC) +#pragma warning(push) +#pragma warning(disable : 4722) +#endif BENCHMARK_NORETURN ~CheckHandler() BENCHMARK_NOEXCEPT_OP(false) { log_ << std::endl; CallAbortHandler(); } +#if defined(COMPILER_MSVC) +#pragma warning(pop) +#endif CheckHandler& operator=(const CheckHandler&) = delete; CheckHandler(const CheckHandler&) = delete; @@ -51,32 +75,32 @@ class CheckHandler { } // end namespace internal } // end namespace benchmark -// The CHECK macro returns a std::ostream object that can have extra information -// written to it. +// The BM_CHECK macro returns a std::ostream object that can have extra +// information written to it. #ifndef NDEBUG -#define CHECK(b) \ +#define BM_CHECK(b) \ (b ? ::benchmark::internal::GetNullLogInstance() \ : ::benchmark::internal::CheckHandler(#b, __FILE__, __func__, __LINE__) \ .GetLog()) #else -#define CHECK(b) ::benchmark::internal::GetNullLogInstance() +#define BM_CHECK(b) ::benchmark::internal::GetNullLogInstance() #endif // clang-format off // preserve whitespacing between operators for alignment -#define CHECK_EQ(a, b) CHECK((a) == (b)) -#define CHECK_NE(a, b) CHECK((a) != (b)) -#define CHECK_GE(a, b) CHECK((a) >= (b)) -#define CHECK_LE(a, b) CHECK((a) <= (b)) -#define CHECK_GT(a, b) CHECK((a) > (b)) -#define CHECK_LT(a, b) CHECK((a) < (b)) - -#define CHECK_FLOAT_EQ(a, b, eps) CHECK(std::fabs((a) - (b)) < (eps)) -#define CHECK_FLOAT_NE(a, b, eps) CHECK(std::fabs((a) - (b)) >= (eps)) -#define CHECK_FLOAT_GE(a, b, eps) CHECK((a) - (b) > -(eps)) -#define CHECK_FLOAT_LE(a, b, eps) CHECK((b) - (a) > -(eps)) -#define CHECK_FLOAT_GT(a, b, eps) CHECK((a) - (b) > (eps)) -#define CHECK_FLOAT_LT(a, b, eps) CHECK((b) - (a) > (eps)) +#define BM_CHECK_EQ(a, b) BM_CHECK((a) == (b)) +#define BM_CHECK_NE(a, b) BM_CHECK((a) != (b)) +#define BM_CHECK_GE(a, b) BM_CHECK((a) >= (b)) +#define BM_CHECK_LE(a, b) BM_CHECK((a) <= (b)) +#define BM_CHECK_GT(a, b) BM_CHECK((a) > (b)) +#define BM_CHECK_LT(a, b) BM_CHECK((a) < (b)) + +#define BM_CHECK_FLOAT_EQ(a, b, eps) BM_CHECK(std::fabs((a) - (b)) < (eps)) +#define BM_CHECK_FLOAT_NE(a, b, eps) BM_CHECK(std::fabs((a) - (b)) >= (eps)) +#define BM_CHECK_FLOAT_GE(a, b, eps) BM_CHECK((a) - (b) > -(eps)) +#define BM_CHECK_FLOAT_LE(a, b, eps) BM_CHECK((b) - (a) > -(eps)) +#define BM_CHECK_FLOAT_GT(a, b, eps) BM_CHECK((a) - (b) > (eps)) +#define BM_CHECK_FLOAT_LT(a, b, eps) BM_CHECK((b) - (a) > (eps)) //clang-format on #endif // CHECK_H_ diff --git a/src/colorprint.cc b/src/colorprint.cc index fff6a98..0bfd670 100644 --- a/src/colorprint.cc +++ b/src/colorprint.cc @@ -25,8 +25,8 @@ #include "internal_macros.h" #ifdef BENCHMARK_OS_WINDOWS -#include <windows.h> #include <io.h> +#include <windows.h> #else #include <unistd.h> #endif // BENCHMARK_OS_WINDOWS @@ -94,20 +94,20 @@ std::string FormatString(const char* msg, va_list args) { va_end(args_cp); // currently there is no error handling for failure, so this is hack. - CHECK(ret >= 0); + BM_CHECK(ret >= 0); - if (ret == 0) // handle empty expansion + if (ret == 0) { // handle empty expansion return {}; - else if (static_cast<size_t>(ret) < size) + } + if (static_cast<size_t>(ret) < size) { return local_buff; - else { - // we did not provide a long enough buffer on our first attempt. - size = (size_t)ret + 1; // + 1 for the null byte - std::unique_ptr<char[]> buff(new char[size]); - ret = vsnprintf(buff.get(), size, msg, args); - CHECK(ret > 0 && ((size_t)ret) < size); - return buff.get(); } + // we did not provide a long enough buffer on our first attempt. + size = static_cast<size_t>(ret) + 1; // + 1 for the null byte + std::unique_ptr<char[]> buff(new char[size]); + ret = vsnprintf(buff.get(), size, msg, args); + BM_CHECK(ret > 0 && (static_cast<size_t>(ret)) < size); + return buff.get(); } std::string FormatString(const char* msg, ...) { @@ -163,12 +163,24 @@ bool IsColorTerminal() { #else // On non-Windows platforms, we rely on the TERM variable. This list of // supported TERM values is copied from Google Test: - // <https://github.com/google/googletest/blob/master/googletest/src/gtest.cc#L2925>. + // <https://github.com/google/googletest/blob/v1.13.0/googletest/src/gtest.cc#L3225-L3259>. const char* const SUPPORTED_TERM_VALUES[] = { - "xterm", "xterm-color", "xterm-256color", - "screen", "screen-256color", "tmux", - "tmux-256color", "rxvt-unicode", "rxvt-unicode-256color", - "linux", "cygwin", + "xterm", + "xterm-color", + "xterm-256color", + "screen", + "screen-256color", + "tmux", + "tmux-256color", + "rxvt-unicode", + "rxvt-unicode-256color", + "linux", + "cygwin", + "xterm-kitty", + "alacritty", + "foot", + "foot-extra", + "wezterm", }; const char* const term = getenv("TERM"); diff --git a/src/commandlineflags.cc b/src/commandlineflags.cc index 0648fe3..dcb4149 100644 --- a/src/commandlineflags.cc +++ b/src/commandlineflags.cc @@ -20,6 +20,10 @@ #include <cstring> #include <iostream> #include <limits> +#include <map> +#include <utility> + +#include "../src/string_util.h" namespace benchmark { namespace { @@ -78,6 +82,30 @@ bool ParseDouble(const std::string& src_text, const char* str, double* value) { return true; } +// Parses 'str' into KV pairs. If successful, writes the result to *value and +// returns true; otherwise leaves *value unchanged and returns false. +bool ParseKvPairs(const std::string& src_text, const char* str, + std::map<std::string, std::string>* value) { + std::map<std::string, std::string> kvs; + for (const auto& kvpair : StrSplit(str, ',')) { + const auto kv = StrSplit(kvpair, '='); + if (kv.size() != 2) { + std::cerr << src_text << " is expected to be a comma-separated list of " + << "<key>=<value> strings, but actually has value \"" << str + << "\".\n"; + return false; + } + if (!kvs.emplace(kv[0], kv[1]).second) { + std::cerr << src_text << " is expected to contain unique keys but key \"" + << kv[0] << "\" was repeated.\n"; + return false; + } + } + + *value = kvs; + return true; +} + // Returns the name of the environment variable corresponding to the // given flag. For example, FlagToEnvVar("foo") will return // "BENCHMARK_FOO" in the open-source version. @@ -93,12 +121,14 @@ static std::string FlagToEnvVar(const char* flag) { } // namespace +BENCHMARK_EXPORT bool BoolFromEnv(const char* flag, bool default_val) { const std::string env_var = FlagToEnvVar(flag); const char* const value_str = getenv(env_var.c_str()); return value_str == nullptr ? default_val : IsTruthyFlagValue(value_str); } +BENCHMARK_EXPORT int32_t Int32FromEnv(const char* flag, int32_t default_val) { const std::string env_var = FlagToEnvVar(flag); const char* const value_str = getenv(env_var.c_str()); @@ -111,6 +141,7 @@ int32_t Int32FromEnv(const char* flag, int32_t default_val) { return value; } +BENCHMARK_EXPORT double DoubleFromEnv(const char* flag, double default_val) { const std::string env_var = FlagToEnvVar(flag); const char* const value_str = getenv(env_var.c_str()); @@ -123,12 +154,28 @@ double DoubleFromEnv(const char* flag, double default_val) { return value; } +BENCHMARK_EXPORT const char* StringFromEnv(const char* flag, const char* default_val) { const std::string env_var = FlagToEnvVar(flag); const char* const value = getenv(env_var.c_str()); return value == nullptr ? default_val : value; } +BENCHMARK_EXPORT +std::map<std::string, std::string> KvPairsFromEnv( + const char* flag, std::map<std::string, std::string> default_val) { + const std::string env_var = FlagToEnvVar(flag); + const char* const value_str = getenv(env_var.c_str()); + + if (value_str == nullptr) return default_val; + + std::map<std::string, std::string> value; + if (!ParseKvPairs("Environment variable " + env_var, value_str, &value)) { + return default_val; + } + return value; +} + // Parses a string as a command line flag. The string should have // the format "--flag=value". When def_optional is true, the "=value" // part can be omitted. @@ -159,6 +206,7 @@ const char* ParseFlagValue(const char* str, const char* flag, return flag_end + 1; } +BENCHMARK_EXPORT bool ParseBoolFlag(const char* str, const char* flag, bool* value) { // Gets the value of the flag as a string. const char* const value_str = ParseFlagValue(str, flag, true); @@ -171,6 +219,7 @@ bool ParseBoolFlag(const char* str, const char* flag, bool* value) { return true; } +BENCHMARK_EXPORT bool ParseInt32Flag(const char* str, const char* flag, int32_t* value) { // Gets the value of the flag as a string. const char* const value_str = ParseFlagValue(str, flag, false); @@ -183,6 +232,7 @@ bool ParseInt32Flag(const char* str, const char* flag, int32_t* value) { value); } +BENCHMARK_EXPORT bool ParseDoubleFlag(const char* str, const char* flag, double* value) { // Gets the value of the flag as a string. const char* const value_str = ParseFlagValue(str, flag, false); @@ -195,6 +245,7 @@ bool ParseDoubleFlag(const char* str, const char* flag, double* value) { value); } +BENCHMARK_EXPORT bool ParseStringFlag(const char* str, const char* flag, std::string* value) { // Gets the value of the flag as a string. const char* const value_str = ParseFlagValue(str, flag, false); @@ -206,23 +257,42 @@ bool ParseStringFlag(const char* str, const char* flag, std::string* value) { return true; } +BENCHMARK_EXPORT +bool ParseKeyValueFlag(const char* str, const char* flag, + std::map<std::string, std::string>* value) { + const char* const value_str = ParseFlagValue(str, flag, false); + + if (value_str == nullptr) return false; + + for (const auto& kvpair : StrSplit(value_str, ',')) { + const auto kv = StrSplit(kvpair, '='); + if (kv.size() != 2) return false; + value->emplace(kv[0], kv[1]); + } + + return true; +} + +BENCHMARK_EXPORT bool IsFlag(const char* str, const char* flag) { return (ParseFlagValue(str, flag, true) != nullptr); } +BENCHMARK_EXPORT bool IsTruthyFlagValue(const std::string& value) { if (value.size() == 1) { char v = value[0]; return isalnum(v) && !(v == '0' || v == 'f' || v == 'F' || v == 'n' || v == 'N'); - } else if (!value.empty()) { + } + if (!value.empty()) { std::string value_lower(value); std::transform(value_lower.begin(), value_lower.end(), value_lower.begin(), [](char c) { return static_cast<char>(::tolower(c)); }); return !(value_lower == "false" || value_lower == "no" || value_lower == "off"); - } else - return true; + } + return true; } } // end namespace benchmark diff --git a/src/commandlineflags.h b/src/commandlineflags.h index 3a1f6a8..7882628 100644 --- a/src/commandlineflags.h +++ b/src/commandlineflags.h @@ -2,61 +2,80 @@ #define BENCHMARK_COMMANDLINEFLAGS_H_ #include <cstdint> +#include <map> #include <string> +#include "benchmark/export.h" + // Macro for referencing flags. #define FLAG(name) FLAGS_##name // Macros for declaring flags. -#define DECLARE_bool(name) extern bool FLAG(name) -#define DECLARE_int32(name) extern int32_t FLAG(name) -#define DECLARE_double(name) extern double FLAG(name) -#define DECLARE_string(name) extern std::string FLAG(name) +#define BM_DECLARE_bool(name) BENCHMARK_EXPORT extern bool FLAG(name) +#define BM_DECLARE_int32(name) BENCHMARK_EXPORT extern int32_t FLAG(name) +#define BM_DECLARE_double(name) BENCHMARK_EXPORT extern double FLAG(name) +#define BM_DECLARE_string(name) BENCHMARK_EXPORT extern std::string FLAG(name) +#define BM_DECLARE_kvpairs(name) \ + BENCHMARK_EXPORT extern std::map<std::string, std::string> FLAG(name) // Macros for defining flags. -#define DEFINE_bool(name, default_val) \ - bool FLAG(name) = \ - benchmark::BoolFromEnv(#name, default_val) -#define DEFINE_int32(name, default_val) \ - int32_t FLAG(name) = \ - benchmark::Int32FromEnv(#name, default_val) -#define DEFINE_double(name, default_val) \ - double FLAG(name) = \ - benchmark::DoubleFromEnv(#name, default_val) -#define DEFINE_string(name, default_val) \ - std::string FLAG(name) = \ - benchmark::StringFromEnv(#name, default_val) +#define BM_DEFINE_bool(name, default_val) \ + BENCHMARK_EXPORT bool FLAG(name) = benchmark::BoolFromEnv(#name, default_val) +#define BM_DEFINE_int32(name, default_val) \ + BENCHMARK_EXPORT int32_t FLAG(name) = \ + benchmark::Int32FromEnv(#name, default_val) +#define BM_DEFINE_double(name, default_val) \ + BENCHMARK_EXPORT double FLAG(name) = \ + benchmark::DoubleFromEnv(#name, default_val) +#define BM_DEFINE_string(name, default_val) \ + BENCHMARK_EXPORT std::string FLAG(name) = \ + benchmark::StringFromEnv(#name, default_val) +#define BM_DEFINE_kvpairs(name, default_val) \ + BENCHMARK_EXPORT std::map<std::string, std::string> FLAG(name) = \ + benchmark::KvPairsFromEnv(#name, default_val) namespace benchmark { -// Parses a bool from the environment variable -// corresponding to the given flag. +// Parses a bool from the environment variable corresponding to the given flag. // // If the variable exists, returns IsTruthyFlagValue() value; if not, // returns the given default value. +BENCHMARK_EXPORT bool BoolFromEnv(const char* flag, bool default_val); -// Parses an Int32 from the environment variable -// corresponding to the given flag. +// Parses an Int32 from the environment variable corresponding to the given +// flag. // // If the variable exists, returns ParseInt32() value; if not, returns // the given default value. +BENCHMARK_EXPORT int32_t Int32FromEnv(const char* flag, int32_t default_val); -// Parses an Double from the environment variable -// corresponding to the given flag. +// Parses an Double from the environment variable corresponding to the given +// flag. // // If the variable exists, returns ParseDouble(); if not, returns // the given default value. +BENCHMARK_EXPORT double DoubleFromEnv(const char* flag, double default_val); -// Parses a string from the environment variable -// corresponding to the given flag. +// Parses a string from the environment variable corresponding to the given +// flag. // // If variable exists, returns its value; if not, returns // the given default value. +BENCHMARK_EXPORT const char* StringFromEnv(const char* flag, const char* default_val); +// Parses a set of kvpairs from the environment variable corresponding to the +// given flag. +// +// If variable exists, returns its value; if not, returns +// the given default value. +BENCHMARK_EXPORT +std::map<std::string, std::string> KvPairsFromEnv( + const char* flag, std::map<std::string, std::string> default_val); + // Parses a string for a bool flag, in the form of either // "--flag=value" or "--flag". // @@ -66,36 +85,47 @@ const char* StringFromEnv(const char* flag, const char* default_val); // // On success, stores the value of the flag in *value, and returns // true. On failure, returns false without changing *value. +BENCHMARK_EXPORT bool ParseBoolFlag(const char* str, const char* flag, bool* value); -// Parses a string for an Int32 flag, in the form of -// "--flag=value". +// Parses a string for an Int32 flag, in the form of "--flag=value". // // On success, stores the value of the flag in *value, and returns // true. On failure, returns false without changing *value. +BENCHMARK_EXPORT bool ParseInt32Flag(const char* str, const char* flag, int32_t* value); -// Parses a string for a Double flag, in the form of -// "--flag=value". +// Parses a string for a Double flag, in the form of "--flag=value". // // On success, stores the value of the flag in *value, and returns // true. On failure, returns false without changing *value. +BENCHMARK_EXPORT bool ParseDoubleFlag(const char* str, const char* flag, double* value); -// Parses a string for a string flag, in the form of -// "--flag=value". +// Parses a string for a string flag, in the form of "--flag=value". // // On success, stores the value of the flag in *value, and returns // true. On failure, returns false without changing *value. +BENCHMARK_EXPORT bool ParseStringFlag(const char* str, const char* flag, std::string* value); +// Parses a string for a kvpairs flag in the form "--flag=key=value,key=value" +// +// On success, stores the value of the flag in *value and returns true. On +// failure returns false, though *value may have been mutated. +BENCHMARK_EXPORT +bool ParseKeyValueFlag(const char* str, const char* flag, + std::map<std::string, std::string>* value); + // Returns true if the string matches the flag. +BENCHMARK_EXPORT bool IsFlag(const char* str, const char* flag); // Returns true unless value starts with one of: '0', 'f', 'F', 'n' or 'N', or // some non-alphanumeric character. Also returns false if the value matches // one of 'no', 'false', 'off' (case-insensitive). As a special case, also // returns true if value is the empty string. +BENCHMARK_EXPORT bool IsTruthyFlagValue(const std::string& value); } // end namespace benchmark diff --git a/src/complexity.cc b/src/complexity.cc index aeed67f..825c573 100644 --- a/src/complexity.cc +++ b/src/complexity.cc @@ -15,12 +15,13 @@ // Source project : https://github.com/ismaelJimenez/cpp.leastsq // Adapted to be used with google benchmark -#include "benchmark/benchmark.h" +#include "complexity.h" #include <algorithm> #include <cmath> + +#include "benchmark/benchmark.h" #include "check.h" -#include "complexity.h" namespace benchmark { @@ -82,7 +83,6 @@ std::string GetBigOString(BigO complexity) { LeastSq MinimalLeastSq(const std::vector<int64_t>& n, const std::vector<double>& time, BigOFunc* fitting_curve) { - double sigma_gn = 0.0; double sigma_gn_squared = 0.0; double sigma_time = 0.0; double sigma_time_gn = 0.0; @@ -90,7 +90,6 @@ LeastSq MinimalLeastSq(const std::vector<int64_t>& n, // Calculate least square fitting parameter for (size_t i = 0; i < n.size(); ++i) { double gn_i = fitting_curve(n[i]); - sigma_gn += gn_i; sigma_gn_squared += gn_i * gn_i; sigma_time += time[i]; sigma_time_gn += time[i] * gn_i; @@ -125,10 +124,10 @@ LeastSq MinimalLeastSq(const std::vector<int64_t>& n, // fitting curve. LeastSq MinimalLeastSq(const std::vector<int64_t>& n, const std::vector<double>& time, const BigO complexity) { - CHECK_EQ(n.size(), time.size()); - CHECK_GE(n.size(), 2); // Do not compute fitting curve is less than two - // benchmark runs are given - CHECK_NE(complexity, oNone); + BM_CHECK_EQ(n.size(), time.size()); + BM_CHECK_GE(n.size(), 2); // Do not compute fitting curve is less than two + // benchmark runs are given + BM_CHECK_NE(complexity, oNone); LeastSq best_fit; @@ -169,7 +168,8 @@ std::vector<BenchmarkReporter::Run> ComputeBigO( // Populate the accumulators. for (const Run& run : reports) { - CHECK_GT(run.complexity_n, 0) << "Did you forget to call SetComplexityN?"; + BM_CHECK_GT(run.complexity_n, 0) + << "Did you forget to call SetComplexityN?"; n.push_back(run.complexity_n); real_time.push_back(run.real_accumulated_time / run.iterations); cpu_time.push_back(run.cpu_accumulated_time / run.iterations); @@ -193,11 +193,14 @@ std::vector<BenchmarkReporter::Run> ComputeBigO( // Get the data from the accumulator to BenchmarkReporter::Run's. Run big_o; big_o.run_name = run_name; + big_o.family_index = reports[0].family_index; + big_o.per_family_instance_index = reports[0].per_family_instance_index; big_o.run_type = BenchmarkReporter::Run::RT_Aggregate; big_o.repetitions = reports[0].repetitions; big_o.repetition_index = Run::no_repetition_index; big_o.threads = reports[0].threads; big_o.aggregate_name = "BigO"; + big_o.aggregate_unit = StatisticUnit::kTime; big_o.report_label = reports[0].report_label; big_o.iterations = 0; big_o.real_accumulated_time = result_real.coef; @@ -215,8 +218,11 @@ std::vector<BenchmarkReporter::Run> ComputeBigO( // Only add label to mean/stddev if it is same for all runs Run rms; rms.run_name = run_name; + rms.family_index = reports[0].family_index; + rms.per_family_instance_index = reports[0].per_family_instance_index; rms.run_type = BenchmarkReporter::Run::RT_Aggregate; rms.aggregate_name = "RMS"; + rms.aggregate_unit = StatisticUnit::kPercentage; rms.report_label = big_o.report_label; rms.iterations = 0; rms.repetition_index = Run::no_repetition_index; diff --git a/src/complexity.h b/src/complexity.h index df29b48..0a0679b 100644 --- a/src/complexity.h +++ b/src/complexity.h @@ -31,7 +31,7 @@ std::vector<BenchmarkReporter::Run> ComputeBigO( const std::vector<BenchmarkReporter::Run>& reports); // This data structure will contain the result returned by MinimalLeastSq -// - coef : Estimated coeficient for the high-order term as +// - coef : Estimated coefficient for the high-order term as // interpolated from data. // - rms : Normalized Root Mean Squared Error. // - complexity : Scalability form (e.g. oN, oNLogN). In case a scalability diff --git a/src/console_reporter.cc b/src/console_reporter.cc index 6fd7645..10e05e1 100644 --- a/src/console_reporter.cc +++ b/src/console_reporter.cc @@ -33,6 +33,7 @@ namespace benchmark { +BENCHMARK_EXPORT bool ConsoleReporter::ReportContext(const Context& context) { name_field_width_ = context.name_field_width; printed_header_ = false; @@ -45,19 +46,21 @@ bool ConsoleReporter::ReportContext(const Context& context) { GetErrorStream() << "Color printing is only supported for stdout on windows." " Disabling color printing\n"; - output_options_ = static_cast< OutputOptions >(output_options_ & ~OO_Color); + output_options_ = static_cast<OutputOptions>(output_options_ & ~OO_Color); } #endif return true; } +BENCHMARK_EXPORT void ConsoleReporter::PrintHeader(const Run& run) { - std::string str = FormatString("%-*s %13s %15s %12s", static_cast<int>(name_field_width_), - "Benchmark", "Time", "CPU", "Iterations"); - if(!run.counters.empty()) { - if(output_options_ & OO_Tabular) { - for(auto const& c : run.counters) { + std::string str = + FormatString("%-*s %13s %15s %12s", static_cast<int>(name_field_width_), + "Benchmark", "Time", "CPU", "Iterations"); + if (!run.counters.empty()) { + if (output_options_ & OO_Tabular) { + for (auto const& c : run.counters) { str += FormatString(" %10s", c.first.c_str()); } } else { @@ -68,6 +71,7 @@ void ConsoleReporter::PrintHeader(const Run& run) { GetOutputStream() << line << "\n" << str << "\n" << line << "\n"; } +BENCHMARK_EXPORT void ConsoleReporter::ReportRuns(const std::vector<Run>& reports) { for (const auto& run : reports) { // print the header: @@ -97,8 +101,10 @@ static void IgnoreColorPrint(std::ostream& out, LogColor, const char* fmt, va_end(args); } - static std::string FormatTime(double time) { + // For the time columns of the console printer 13 digits are reserved. One of + // them is a space and max two of them are the time unit (e.g ns). That puts + // us at 10 digits usable for the number. // Align decimal places... if (time < 1.0) { return FormatString("%10.3f", time); @@ -109,22 +115,33 @@ static std::string FormatTime(double time) { if (time < 100.0) { return FormatString("%10.1f", time); } + // Assuming the time is at max 9.9999e+99 and we have 10 digits for the + // number, we get 10-1(.)-1(e)-1(sign)-2(exponent) = 5 digits to print. + if (time > 9999999999 /*max 10 digit number*/) { + return FormatString("%1.4e", time); + } return FormatString("%10.0f", time); } +BENCHMARK_EXPORT void ConsoleReporter::PrintRunData(const Run& result) { typedef void(PrinterFn)(std::ostream&, LogColor, const char*, ...); auto& Out = GetOutputStream(); - PrinterFn* printer = (output_options_ & OO_Color) ? - (PrinterFn*)ColorPrintf : IgnoreColorPrint; + PrinterFn* printer = (output_options_ & OO_Color) + ? static_cast<PrinterFn*>(ColorPrintf) + : IgnoreColorPrint; auto name_color = (result.report_big_o || result.report_rms) ? COLOR_BLUE : COLOR_GREEN; printer(Out, name_color, "%-*s ", name_field_width_, result.benchmark_name().c_str()); - if (result.error_occurred) { + if (internal::SkippedWithError == result.skipped) { printer(Out, COLOR_RED, "ERROR OCCURRED: \'%s\'", - result.error_message.c_str()); + result.skip_message.c_str()); + printer(Out, COLOR_DEFAULT, "\n"); + return; + } else if (internal::SkippedWithMessage == result.skipped) { + printer(Out, COLOR_WHITE, "SKIPPED: \'%s\'", result.skip_message.c_str()); printer(Out, COLOR_DEFAULT, "\n"); return; } @@ -134,18 +151,23 @@ void ConsoleReporter::PrintRunData(const Run& result) { const std::string real_time_str = FormatTime(real_time); const std::string cpu_time_str = FormatTime(cpu_time); - if (result.report_big_o) { std::string big_o = GetBigOString(result.complexity); - printer(Out, COLOR_YELLOW, "%10.2f %-4s %10.2f %-4s ", real_time, big_o.c_str(), - cpu_time, big_o.c_str()); + printer(Out, COLOR_YELLOW, "%10.2f %-4s %10.2f %-4s ", real_time, + big_o.c_str(), cpu_time, big_o.c_str()); } else if (result.report_rms) { printer(Out, COLOR_YELLOW, "%10.0f %-4s %10.0f %-4s ", real_time * 100, "%", cpu_time * 100, "%"); - } else { + } else if (result.run_type != Run::RT_Aggregate || + result.aggregate_unit == StatisticUnit::kTime) { const char* timeLabel = GetTimeUnitString(result.time_unit); - printer(Out, COLOR_YELLOW, "%s %-4s %s %-4s ", real_time_str.c_str(), timeLabel, - cpu_time_str.c_str(), timeLabel); + printer(Out, COLOR_YELLOW, "%s %-4s %s %-4s ", real_time_str.c_str(), + timeLabel, cpu_time_str.c_str(), timeLabel); + } else { + assert(result.aggregate_unit == StatisticUnit::kPercentage); + printer(Out, COLOR_YELLOW, "%10.2f %-4s %10.2f %-4s ", + (100. * result.real_accumulated_time), "%", + (100. * result.cpu_accumulated_time), "%"); } if (!result.report_big_o && !result.report_rms) { @@ -153,12 +175,19 @@ void ConsoleReporter::PrintRunData(const Run& result) { } for (auto& c : result.counters) { - const std::size_t cNameLen = std::max(std::string::size_type(10), - c.first.length()); - auto const& s = HumanReadableNumber(c.second.value, c.second.oneK); + const std::size_t cNameLen = + std::max(std::string::size_type(10), c.first.length()); + std::string s; const char* unit = ""; - if (c.second.flags & Counter::kIsRate) - unit = (c.second.flags & Counter::kInvert) ? "s" : "/s"; + if (result.run_type == Run::RT_Aggregate && + result.aggregate_unit == StatisticUnit::kPercentage) { + s = StrFormat("%.2f", 100. * c.second.value); + unit = "%"; + } else { + s = HumanReadableNumber(c.second.value, c.second.oneK); + if (c.second.flags & Counter::kIsRate) + unit = (c.second.flags & Counter::kInvert) ? "s" : "/s"; + } if (output_options_ & OO_Tabular) { printer(Out, COLOR_DEFAULT, " %*s%s", cNameLen - strlen(unit), s.c_str(), unit); diff --git a/src/csv_reporter.cc b/src/csv_reporter.cc index af2c18f..7b56da1 100644 --- a/src/csv_reporter.cc +++ b/src/csv_reporter.cc @@ -12,9 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "benchmark/benchmark.h" -#include "complexity.h" - #include <algorithm> #include <cstdint> #include <iostream> @@ -22,7 +19,9 @@ #include <tuple> #include <vector> +#include "benchmark/benchmark.h" #include "check.h" +#include "complexity.h" #include "string_util.h" #include "timers.h" @@ -37,23 +36,29 @@ std::vector<std::string> elements = { "error_occurred", "error_message"}; } // namespace -std::string CsvEscape(const std::string & s) { +std::string CsvEscape(const std::string& s) { std::string tmp; tmp.reserve(s.size() + 2); for (char c : s) { switch (c) { - case '"' : tmp += "\"\""; break; - default : tmp += c; break; + case '"': + tmp += "\"\""; + break; + default: + tmp += c; + break; } } return '"' + tmp + '"'; } +BENCHMARK_EXPORT bool CSVReporter::ReportContext(const Context& context) { PrintBasicContext(&GetErrorStream(), context); return true; } +BENCHMARK_EXPORT void CSVReporter::ReportRuns(const std::vector<Run>& reports) { std::ostream& Out = GetOutputStream(); @@ -85,7 +90,8 @@ void CSVReporter::ReportRuns(const std::vector<Run>& reports) { for (const auto& cnt : run.counters) { if (cnt.first == "bytes_per_second" || cnt.first == "items_per_second") continue; - CHECK(user_counter_names_.find(cnt.first) != user_counter_names_.end()) + BM_CHECK(user_counter_names_.find(cnt.first) != + user_counter_names_.end()) << "All counters must be present in each run. " << "Counter named \"" << cnt.first << "\" was not in a run after being added to the header"; @@ -99,13 +105,14 @@ void CSVReporter::ReportRuns(const std::vector<Run>& reports) { } } +BENCHMARK_EXPORT void CSVReporter::PrintRunData(const Run& run) { std::ostream& Out = GetOutputStream(); Out << CsvEscape(run.benchmark_name()) << ","; - if (run.error_occurred) { + if (run.skipped) { Out << std::string(elements.size() - 3, ','); - Out << "true,"; - Out << CsvEscape(run.error_message) << "\n"; + Out << std::boolalpha << (internal::SkippedWithError == run.skipped) << ","; + Out << CsvEscape(run.skip_message) << "\n"; return; } diff --git a/src/cycleclock.h b/src/cycleclock.h index 6843b69..ae1ef2d 100644 --- a/src/cycleclock.h +++ b/src/cycleclock.h @@ -36,7 +36,8 @@ // declarations of some other intrinsics, breaking compilation. // Therefore, we simply declare __rdtsc ourselves. See also // http://connect.microsoft.com/VisualStudio/feedback/details/262047 -#if defined(COMPILER_MSVC) && !defined(_M_IX86) && !defined(_M_ARM64) +#if defined(COMPILER_MSVC) && !defined(_M_IX86) && !defined(_M_ARM64) && \ + !defined(_M_ARM64EC) extern "C" uint64_t __rdtsc(); #pragma intrinsic(__rdtsc) #endif @@ -114,8 +115,8 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() { // when I know it will work. Otherwise, I'll use __rdtsc and hope // the code is being compiled with a non-ancient compiler. _asm rdtsc -#elif defined(COMPILER_MSVC) && defined(_M_ARM64) - // See https://docs.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=vs-2019 +#elif defined(COMPILER_MSVC) && (defined(_M_ARM64) || defined(_M_ARM64EC)) + // See // https://docs.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics // and https://reviews.llvm.org/D53115 int64_t virtual_timer_value; virtual_timer_value = _ReadStatusReg(ARM64_CNTVCT); @@ -132,7 +133,7 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() { // Native Client does not provide any API to access cycle counter. // Use clock_gettime(CLOCK_MONOTONIC, ...) instead of gettimeofday - // because is provides nanosecond resolution (which is noticable at + // because is provides nanosecond resolution (which is noticeable at // least for PNaCl modules running on x86 Mac & Linux). // Initialize to always return 0 if clock_gettime fails. struct timespec ts = {0, 0}; @@ -173,6 +174,10 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() { struct timeval tv; gettimeofday(&tv, nullptr); return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec; +#elif defined(__loongarch__) || defined(__csky__) + struct timeval tv; + gettimeofday(&tv, nullptr); + return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec; #elif defined(__s390__) // Covers both s390 and s390x. // Return the CPU clock. uint64_t tsc; @@ -183,7 +188,7 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() { asm("stck %0" : "=Q"(tsc) : : "cc"); #endif return tsc; -#elif defined(__riscv) // RISC-V +#elif defined(__riscv) // RISC-V // Use RDCYCLE (and RDCYCLEH on riscv32) #if __riscv_xlen == 32 uint32_t cycles_lo, cycles_hi0, cycles_hi1; @@ -204,6 +209,14 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() { asm volatile("rdcycle %0" : "=r"(cycles)); return cycles; #endif +#elif defined(__e2k__) || defined(__elbrus__) + struct timeval tv; + gettimeofday(&tv, nullptr); + return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec; +#elif defined(__hexagon__) + uint64_t pcycle; + asm volatile("%0 = C15:14" : "=r"(pcycle)); + return static_cast<double>(pcycle); #else // The soft failover to a generic implementation is automatic only for ARM. // For other platforms the developer is expected to make an attempt to create diff --git a/src/internal_macros.h b/src/internal_macros.h index 91f367b..8dd7d0c 100644 --- a/src/internal_macros.h +++ b/src/internal_macros.h @@ -1,8 +1,6 @@ #ifndef BENCHMARK_INTERNAL_MACROS_H_ #define BENCHMARK_INTERNAL_MACROS_H_ -#include "benchmark/benchmark.h" - /* Needed to detect STL */ #include <cstdlib> @@ -44,6 +42,19 @@ #define BENCHMARK_OS_CYGWIN 1 #elif defined(_WIN32) #define BENCHMARK_OS_WINDOWS 1 + // WINAPI_FAMILY_PARTITION is defined in winapifamily.h. + // We include windows.h which implicitly includes winapifamily.h for compatibility. + #ifndef NOMINMAX + #define NOMINMAX + #endif + #include <windows.h> + #if defined(WINAPI_FAMILY_PARTITION) + #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) + #define BENCHMARK_OS_WINDOWS_WIN32 1 + #elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) + #define BENCHMARK_OS_WINDOWS_RT 1 + #endif + #endif #if defined(__MINGW32__) #define BENCHMARK_OS_MINGW 1 #endif @@ -80,6 +91,8 @@ #define BENCHMARK_OS_QNX 1 #elif defined(__MVS__) #define BENCHMARK_OS_ZOS 1 +#elif defined(__hexagon__) +#define BENCHMARK_OS_QURT 1 #endif #if defined(__ANDROID__) && defined(__GLIBCXX__) diff --git a/src/json_reporter.cc b/src/json_reporter.cc index 959d245..6559dfd 100644 --- a/src/json_reporter.cc +++ b/src/json_reporter.cc @@ -12,9 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "benchmark/benchmark.h" -#include "complexity.h" - #include <algorithm> #include <cmath> #include <cstdint> @@ -25,41 +22,61 @@ #include <tuple> #include <vector> +#include "benchmark/benchmark.h" +#include "complexity.h" #include "string_util.h" #include "timers.h" namespace benchmark { - namespace { -std::string StrEscape(const std::string & s) { +std::string StrEscape(const std::string& s) { std::string tmp; tmp.reserve(s.size()); for (char c : s) { switch (c) { - case '\b': tmp += "\\b"; break; - case '\f': tmp += "\\f"; break; - case '\n': tmp += "\\n"; break; - case '\r': tmp += "\\r"; break; - case '\t': tmp += "\\t"; break; - case '\\': tmp += "\\\\"; break; - case '"' : tmp += "\\\""; break; - default : tmp += c; break; + case '\b': + tmp += "\\b"; + break; + case '\f': + tmp += "\\f"; + break; + case '\n': + tmp += "\\n"; + break; + case '\r': + tmp += "\\r"; + break; + case '\t': + tmp += "\\t"; + break; + case '\\': + tmp += "\\\\"; + break; + case '"': + tmp += "\\\""; + break; + default: + tmp += c; + break; } } return tmp; } std::string FormatKV(std::string const& key, std::string const& value) { - return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(), StrEscape(value).c_str()); + return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(), + StrEscape(value).c_str()); } std::string FormatKV(std::string const& key, const char* value) { - return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(), StrEscape(value).c_str()); + return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(), + StrEscape(value).c_str()); } std::string FormatKV(std::string const& key, bool value) { - return StrFormat("\"%s\": %s", StrEscape(key).c_str(), value ? "true" : "false"); + return StrFormat("\"%s\": %s", StrEscape(key).c_str(), + value ? "true" : "false"); } std::string FormatKV(std::string const& key, int64_t value) { @@ -68,12 +85,6 @@ std::string FormatKV(std::string const& key, int64_t value) { return ss.str(); } -std::string FormatKV(std::string const& key, IterationCount value) { - std::stringstream ss; - ss << '"' << StrEscape(key) << "\": " << value; - return ss.str(); -} - std::string FormatKV(std::string const& key, double value) { std::stringstream ss; ss << '"' << StrEscape(key) << "\": "; @@ -123,7 +134,9 @@ bool JSONReporter::ReportContext(const Context& context) { RoundDouble(info.cycles_per_second / 1000000.0)) << ",\n"; if (CPUInfo::Scaling::UNKNOWN != info.scaling) { - out << indent << FormatKV("cpu_scaling_enabled", info.scaling == CPUInfo::Scaling::ENABLED ? true : false) + out << indent + << FormatKV("cpu_scaling_enabled", + info.scaling == CPUInfo::Scaling::ENABLED ? true : false) << ",\n"; } @@ -136,8 +149,8 @@ bool JSONReporter::ReportContext(const Context& context) { out << cache_indent << FormatKV("type", CI.type) << ",\n"; out << cache_indent << FormatKV("level", static_cast<int64_t>(CI.level)) << ",\n"; - out << cache_indent - << FormatKV("size", static_cast<int64_t>(CI.size)) << ",\n"; + out << cache_indent << FormatKV("size", static_cast<int64_t>(CI.size)) + << ",\n"; out << cache_indent << FormatKV("num_sharing", static_cast<int64_t>(CI.num_sharing)) << "\n"; @@ -159,7 +172,19 @@ bool JSONReporter::ReportContext(const Context& context) { #else const char build_type[] = "debug"; #endif - out << indent << FormatKV("library_build_type", build_type) << "\n"; + out << indent << FormatKV("library_build_type", build_type); + + std::map<std::string, std::string>* global_context = + internal::GetGlobalContext(); + + if (global_context != nullptr) { + for (const auto& kv : *global_context) { + out << ",\n"; + out << indent << FormatKV(kv.first, kv.second); + } + } + out << "\n"; + // Close context block and open the list of benchmarks. out << inner_indent << "},\n"; out << inner_indent << "\"benchmarks\": [\n"; @@ -197,6 +222,10 @@ void JSONReporter::PrintRunData(Run const& run) { std::string indent(6, ' '); std::ostream& out = GetOutputStream(); out << indent << FormatKV("name", run.benchmark_name()) << ",\n"; + out << indent << FormatKV("family_index", run.family_index) << ",\n"; + out << indent + << FormatKV("per_family_instance_index", run.per_family_instance_index) + << ",\n"; out << indent << FormatKV("run_name", run.run_name.str()) << ",\n"; out << indent << FormatKV("run_type", [&run]() -> const char* { switch (run.run_type) { @@ -215,15 +244,36 @@ void JSONReporter::PrintRunData(Run const& run) { out << indent << FormatKV("threads", run.threads) << ",\n"; if (run.run_type == BenchmarkReporter::Run::RT_Aggregate) { out << indent << FormatKV("aggregate_name", run.aggregate_name) << ",\n"; + out << indent << FormatKV("aggregate_unit", [&run]() -> const char* { + switch (run.aggregate_unit) { + case StatisticUnit::kTime: + return "time"; + case StatisticUnit::kPercentage: + return "percentage"; + } + BENCHMARK_UNREACHABLE(); + }()) << ",\n"; } - if (run.error_occurred) { - out << indent << FormatKV("error_occurred", run.error_occurred) << ",\n"; - out << indent << FormatKV("error_message", run.error_message) << ",\n"; + if (internal::SkippedWithError == run.skipped) { + out << indent << FormatKV("error_occurred", true) << ",\n"; + out << indent << FormatKV("error_message", run.skip_message) << ",\n"; + } else if (internal::SkippedWithMessage == run.skipped) { + out << indent << FormatKV("skipped", true) << ",\n"; + out << indent << FormatKV("skip_message", run.skip_message) << ",\n"; } if (!run.report_big_o && !run.report_rms) { out << indent << FormatKV("iterations", run.iterations) << ",\n"; - out << indent << FormatKV("real_time", run.GetAdjustedRealTime()) << ",\n"; - out << indent << FormatKV("cpu_time", run.GetAdjustedCPUTime()); + if (run.run_type != Run::RT_Aggregate || + run.aggregate_unit == StatisticUnit::kTime) { + out << indent << FormatKV("real_time", run.GetAdjustedRealTime()) + << ",\n"; + out << indent << FormatKV("cpu_time", run.GetAdjustedCPUTime()); + } else { + assert(run.aggregate_unit == StatisticUnit::kPercentage); + out << indent << FormatKV("real_time", run.real_accumulated_time) + << ",\n"; + out << indent << FormatKV("cpu_time", run.cpu_accumulated_time); + } out << ",\n" << indent << FormatKV("time_unit", GetTimeUnitString(run.time_unit)); } else if (run.report_big_o) { @@ -241,9 +291,21 @@ void JSONReporter::PrintRunData(Run const& run) { out << ",\n" << indent << FormatKV(c.first, c.second); } - if (run.has_memory_result) { + if (run.memory_result) { + const MemoryManager::Result memory_result = *run.memory_result; out << ",\n" << indent << FormatKV("allocs_per_iter", run.allocs_per_iter); - out << ",\n" << indent << FormatKV("max_bytes_used", run.max_bytes_used); + out << ",\n" + << indent << FormatKV("max_bytes_used", memory_result.max_bytes_used); + + auto report_if_present = [&out, &indent](const std::string& label, + int64_t val) { + if (val != MemoryManager::TombstoneValue) + out << ",\n" << indent << FormatKV(label, val); + }; + + report_if_present("total_allocated_bytes", + memory_result.total_allocated_bytes); + report_if_present("net_heap_growth", memory_result.net_heap_growth); } if (!run.report_label.empty()) { @@ -252,4 +314,7 @@ void JSONReporter::PrintRunData(Run const& run) { out << '\n'; } +const int64_t MemoryManager::TombstoneValue = + std::numeric_limits<int64_t>::max(); + } // end namespace benchmark @@ -4,7 +4,12 @@ #include <iostream> #include <ostream> -#include "benchmark/benchmark.h" +// NOTE: this is also defined in benchmark.h but we're trying to avoid a +// dependency. +// The _MSVC_LANG check should detect Visual Studio 2015 Update 3 and newer. +#if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L) +#define BENCHMARK_HAS_CXX11 +#endif namespace benchmark { namespace internal { @@ -23,7 +28,16 @@ class LogType { private: LogType(std::ostream* out) : out_(out) {} std::ostream* out_; - BENCHMARK_DISALLOW_COPY_AND_ASSIGN(LogType); + + // NOTE: we could use BENCHMARK_DISALLOW_COPY_AND_ASSIGN but we shouldn't have + // a dependency on benchmark.h from here. +#ifndef BENCHMARK_HAS_CXX11 + LogType(const LogType&); + LogType& operator=(const LogType&); +#else + LogType(const LogType&) = delete; + LogType& operator=(const LogType&) = delete; +#endif }; template <class Tp> @@ -47,13 +61,13 @@ inline int& LogLevel() { } inline LogType& GetNullLogInstance() { - static LogType log(nullptr); - return log; + static LogType null_log(static_cast<std::ostream*>(nullptr)); + return null_log; } inline LogType& GetErrorLogInstance() { - static LogType log(&std::clog); - return log; + static LogType error_log(&std::clog); + return error_log; } inline LogType& GetLogInstanceForLevel(int level) { @@ -67,7 +81,7 @@ inline LogType& GetLogInstanceForLevel(int level) { } // end namespace benchmark // clang-format off -#define VLOG(x) \ +#define BM_VLOG(x) \ (::benchmark::internal::GetLogInstanceForLevel(x) << "-- LOG(" << x << "):" \ " ") // clang-format on diff --git a/src/mutex.h b/src/mutex.h index 3fac79a..bec78d9 100644 --- a/src/mutex.h +++ b/src/mutex.h @@ -9,60 +9,60 @@ // Enable thread safety attributes only with clang. // The attributes can be safely erased when compiling with other compilers. #if defined(HAVE_THREAD_SAFETY_ATTRIBUTES) -#define THREAD_ANNOTATION_ATTRIBUTE__(x) __attribute__((x)) +#define THREAD_ANNOTATION_ATTRIBUTE_(x) __attribute__((x)) #else -#define THREAD_ANNOTATION_ATTRIBUTE__(x) // no-op +#define THREAD_ANNOTATION_ATTRIBUTE_(x) // no-op #endif -#define CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE__(capability(x)) +#define CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(capability(x)) -#define SCOPED_CAPABILITY THREAD_ANNOTATION_ATTRIBUTE__(scoped_lockable) +#define SCOPED_CAPABILITY THREAD_ANNOTATION_ATTRIBUTE_(scoped_lockable) -#define GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE__(guarded_by(x)) +#define GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE_(guarded_by(x)) -#define PT_GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE__(pt_guarded_by(x)) +#define PT_GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE_(pt_guarded_by(x)) #define ACQUIRED_BEFORE(...) \ - THREAD_ANNOTATION_ATTRIBUTE__(acquired_before(__VA_ARGS__)) + THREAD_ANNOTATION_ATTRIBUTE_(acquired_before(__VA_ARGS__)) #define ACQUIRED_AFTER(...) \ - THREAD_ANNOTATION_ATTRIBUTE__(acquired_after(__VA_ARGS__)) + THREAD_ANNOTATION_ATTRIBUTE_(acquired_after(__VA_ARGS__)) #define REQUIRES(...) \ - THREAD_ANNOTATION_ATTRIBUTE__(requires_capability(__VA_ARGS__)) + THREAD_ANNOTATION_ATTRIBUTE_(requires_capability(__VA_ARGS__)) #define REQUIRES_SHARED(...) \ - THREAD_ANNOTATION_ATTRIBUTE__(requires_shared_capability(__VA_ARGS__)) + THREAD_ANNOTATION_ATTRIBUTE_(requires_shared_capability(__VA_ARGS__)) #define ACQUIRE(...) \ - THREAD_ANNOTATION_ATTRIBUTE__(acquire_capability(__VA_ARGS__)) + THREAD_ANNOTATION_ATTRIBUTE_(acquire_capability(__VA_ARGS__)) #define ACQUIRE_SHARED(...) \ - THREAD_ANNOTATION_ATTRIBUTE__(acquire_shared_capability(__VA_ARGS__)) + THREAD_ANNOTATION_ATTRIBUTE_(acquire_shared_capability(__VA_ARGS__)) #define RELEASE(...) \ - THREAD_ANNOTATION_ATTRIBUTE__(release_capability(__VA_ARGS__)) + THREAD_ANNOTATION_ATTRIBUTE_(release_capability(__VA_ARGS__)) #define RELEASE_SHARED(...) \ - THREAD_ANNOTATION_ATTRIBUTE__(release_shared_capability(__VA_ARGS__)) + THREAD_ANNOTATION_ATTRIBUTE_(release_shared_capability(__VA_ARGS__)) #define TRY_ACQUIRE(...) \ - THREAD_ANNOTATION_ATTRIBUTE__(try_acquire_capability(__VA_ARGS__)) + THREAD_ANNOTATION_ATTRIBUTE_(try_acquire_capability(__VA_ARGS__)) #define TRY_ACQUIRE_SHARED(...) \ - THREAD_ANNOTATION_ATTRIBUTE__(try_acquire_shared_capability(__VA_ARGS__)) + THREAD_ANNOTATION_ATTRIBUTE_(try_acquire_shared_capability(__VA_ARGS__)) -#define EXCLUDES(...) THREAD_ANNOTATION_ATTRIBUTE__(locks_excluded(__VA_ARGS__)) +#define EXCLUDES(...) THREAD_ANNOTATION_ATTRIBUTE_(locks_excluded(__VA_ARGS__)) -#define ASSERT_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE__(assert_capability(x)) +#define ASSERT_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(assert_capability(x)) #define ASSERT_SHARED_CAPABILITY(x) \ - THREAD_ANNOTATION_ATTRIBUTE__(assert_shared_capability(x)) + THREAD_ANNOTATION_ATTRIBUTE_(assert_shared_capability(x)) -#define RETURN_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE__(lock_returned(x)) +#define RETURN_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(lock_returned(x)) #define NO_THREAD_SAFETY_ANALYSIS \ - THREAD_ANNOTATION_ATTRIBUTE__(no_thread_safety_analysis) + THREAD_ANNOTATION_ATTRIBUTE_(no_thread_safety_analysis) namespace benchmark { @@ -130,7 +130,7 @@ class Barrier { // entered the barrier. Returns iff this is the last thread to // enter the barrier. bool createBarrier(MutexLock& ml) REQUIRES(lock_) { - CHECK_LT(entered_, running_threads_); + BM_CHECK_LT(entered_, running_threads_); entered_++; if (entered_ < running_threads_) { // Wait for all threads to enter diff --git a/src/perf_counters.cc b/src/perf_counters.cc new file mode 100644 index 0000000..417acdb --- /dev/null +++ b/src/perf_counters.cc @@ -0,0 +1,282 @@ +// Copyright 2021 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "perf_counters.h" + +#include <cstring> +#include <memory> +#include <vector> + +#if defined HAVE_LIBPFM +#include "perfmon/pfmlib.h" +#include "perfmon/pfmlib_perf_event.h" +#endif + +namespace benchmark { +namespace internal { + +constexpr size_t PerfCounterValues::kMaxCounters; + +#if defined HAVE_LIBPFM + +size_t PerfCounterValues::Read(const std::vector<int>& leaders) { + // Create a pointer for multiple reads + const size_t bufsize = values_.size() * sizeof(values_[0]); + char* ptr = reinterpret_cast<char*>(values_.data()); + size_t size = bufsize; + for (int lead : leaders) { + auto read_bytes = ::read(lead, ptr, size); + if (read_bytes >= ssize_t(sizeof(uint64_t))) { + // Actual data bytes are all bytes minus initial padding + std::size_t data_bytes = read_bytes - sizeof(uint64_t); + // This should be very cheap since it's in hot cache + std::memmove(ptr, ptr + sizeof(uint64_t), data_bytes); + // Increment our counters + ptr += data_bytes; + size -= data_bytes; + } else { + int err = errno; + GetErrorLogInstance() << "Error reading lead " << lead << " errno:" << err + << " " << ::strerror(err) << "\n"; + return 0; + } + } + return (bufsize - size) / sizeof(uint64_t); +} + +const bool PerfCounters::kSupported = true; + +// Initializes libpfm only on the first call. Returns whether that single +// initialization was successful. +bool PerfCounters::Initialize() { + // Function-scope static gets initialized only once on first call. + static const bool success = []() { + return pfm_initialize() == PFM_SUCCESS; + }(); + return success; +} + +bool PerfCounters::IsCounterSupported(const std::string& name) { + Initialize(); + perf_event_attr_t attr; + std::memset(&attr, 0, sizeof(attr)); + pfm_perf_encode_arg_t arg; + std::memset(&arg, 0, sizeof(arg)); + arg.attr = &attr; + const int mode = PFM_PLM3; // user mode only + int ret = pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT_EXT, + &arg); + return (ret == PFM_SUCCESS); +} + +PerfCounters PerfCounters::Create( + const std::vector<std::string>& counter_names) { + if (!counter_names.empty()) { + Initialize(); + } + + // Valid counters will populate these arrays but we start empty + std::vector<std::string> valid_names; + std::vector<int> counter_ids; + std::vector<int> leader_ids; + + // Resize to the maximum possible + valid_names.reserve(counter_names.size()); + counter_ids.reserve(counter_names.size()); + + const int kCounterMode = PFM_PLM3; // user mode only + + // Group leads will be assigned on demand. The idea is that once we cannot + // create a counter descriptor, the reason is that this group has maxed out + // so we set the group_id again to -1 and retry - giving the algorithm a + // chance to create a new group leader to hold the next set of counters. + int group_id = -1; + + // Loop through all performance counters + for (size_t i = 0; i < counter_names.size(); ++i) { + // we are about to push into the valid names vector + // check if we did not reach the maximum + if (valid_names.size() == PerfCounterValues::kMaxCounters) { + // Log a message if we maxed out and stop adding + GetErrorLogInstance() + << counter_names.size() << " counters were requested. The maximum is " + << PerfCounterValues::kMaxCounters << " and " << valid_names.size() + << " were already added. All remaining counters will be ignored\n"; + // stop the loop and return what we have already + break; + } + + // Check if this name is empty + const auto& name = counter_names[i]; + if (name.empty()) { + GetErrorLogInstance() + << "A performance counter name was the empty string\n"; + continue; + } + + // Here first means first in group, ie the group leader + const bool is_first = (group_id < 0); + + // This struct will be populated by libpfm from the counter string + // and then fed into the syscall perf_event_open + struct perf_event_attr attr {}; + attr.size = sizeof(attr); + + // This is the input struct to libpfm. + pfm_perf_encode_arg_t arg{}; + arg.attr = &attr; + const int pfm_get = pfm_get_os_event_encoding(name.c_str(), kCounterMode, + PFM_OS_PERF_EVENT, &arg); + if (pfm_get != PFM_SUCCESS) { + GetErrorLogInstance() + << "Unknown performance counter name: " << name << "\n"; + continue; + } + + // We then proceed to populate the remaining fields in our attribute struct + // Note: the man page for perf_event_create suggests inherit = true and + // read_format = PERF_FORMAT_GROUP don't work together, but that's not the + // case. + attr.disabled = is_first; + attr.inherit = true; + attr.pinned = is_first; + attr.exclude_kernel = true; + attr.exclude_user = false; + attr.exclude_hv = true; + + // Read all counters in a group in one read. + attr.read_format = PERF_FORMAT_GROUP; + + int id = -1; + while (id < 0) { + static constexpr size_t kNrOfSyscallRetries = 5; + // Retry syscall as it was interrupted often (b/64774091). + for (size_t num_retries = 0; num_retries < kNrOfSyscallRetries; + ++num_retries) { + id = perf_event_open(&attr, 0, -1, group_id, 0); + if (id >= 0 || errno != EINTR) { + break; + } + } + if (id < 0) { + // If the file descriptor is negative we might have reached a limit + // in the current group. Set the group_id to -1 and retry + if (group_id >= 0) { + // Create a new group + group_id = -1; + } else { + // At this point we have already retried to set a new group id and + // failed. We then give up. + break; + } + } + } + + // We failed to get a new file descriptor. We might have reached a hard + // hardware limit that cannot be resolved even with group multiplexing + if (id < 0) { + GetErrorLogInstance() << "***WARNING** Failed to get a file descriptor " + "for performance counter " + << name << ". Ignoring\n"; + + // We give up on this counter but try to keep going + // as the others would be fine + continue; + } + if (group_id < 0) { + // This is a leader, store and assign it to the current file descriptor + leader_ids.push_back(id); + group_id = id; + } + // This is a valid counter, add it to our descriptor's list + counter_ids.push_back(id); + valid_names.push_back(name); + } + + // Loop through all group leaders activating them + // There is another option of starting ALL counters in a process but + // that would be far reaching an intrusion. If the user is using PMCs + // by themselves then this would have a side effect on them. It is + // friendlier to loop through all groups individually. + for (int lead : leader_ids) { + if (ioctl(lead, PERF_EVENT_IOC_ENABLE) != 0) { + // This should never happen but if it does, we give up on the + // entire batch as recovery would be a mess. + GetErrorLogInstance() << "***WARNING*** Failed to start counters. " + "Claring out all counters.\n"; + + // Close all peformance counters + for (int id : counter_ids) { + ::close(id); + } + + // Return an empty object so our internal state is still good and + // the process can continue normally without impact + return NoCounters(); + } + } + + return PerfCounters(std::move(valid_names), std::move(counter_ids), + std::move(leader_ids)); +} + +void PerfCounters::CloseCounters() const { + if (counter_ids_.empty()) { + return; + } + for (int lead : leader_ids_) { + ioctl(lead, PERF_EVENT_IOC_DISABLE); + } + for (int fd : counter_ids_) { + close(fd); + } +} +#else // defined HAVE_LIBPFM +size_t PerfCounterValues::Read(const std::vector<int>&) { return 0; } + +const bool PerfCounters::kSupported = false; + +bool PerfCounters::Initialize() { return false; } + +bool PerfCounters::IsCounterSupported(const std::string&) { return false; } + +PerfCounters PerfCounters::Create( + const std::vector<std::string>& counter_names) { + if (!counter_names.empty()) { + GetErrorLogInstance() << "Performance counters not supported."; + } + return NoCounters(); +} + +void PerfCounters::CloseCounters() const {} +#endif // defined HAVE_LIBPFM + +PerfCountersMeasurement::PerfCountersMeasurement( + const std::vector<std::string>& counter_names) + : start_values_(counter_names.size()), end_values_(counter_names.size()) { + counters_ = PerfCounters::Create(counter_names); +} + +PerfCounters& PerfCounters::operator=(PerfCounters&& other) noexcept { + if (this != &other) { + CloseCounters(); + + counter_ids_ = std::move(other.counter_ids_); + leader_ids_ = std::move(other.leader_ids_); + counter_names_ = std::move(other.counter_names_); + } + return *this; +} +} // namespace internal +} // namespace benchmark diff --git a/src/perf_counters.h b/src/perf_counters.h new file mode 100644 index 0000000..bf5eb6b --- /dev/null +++ b/src/perf_counters.h @@ -0,0 +1,200 @@ +// Copyright 2021 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef BENCHMARK_PERF_COUNTERS_H +#define BENCHMARK_PERF_COUNTERS_H + +#include <array> +#include <cstdint> +#include <cstring> +#include <memory> +#include <vector> + +#include "benchmark/benchmark.h" +#include "check.h" +#include "log.h" +#include "mutex.h" + +#ifndef BENCHMARK_OS_WINDOWS +#include <unistd.h> +#endif + +#if defined(_MSC_VER) +#pragma warning(push) +// C4251: <symbol> needs to have dll-interface to be used by clients of class +#pragma warning(disable : 4251) +#endif + +namespace benchmark { +namespace internal { + +// Typically, we can only read a small number of counters. There is also a +// padding preceding counter values, when reading multiple counters with one +// syscall (which is desirable). PerfCounterValues abstracts these details. +// The implementation ensures the storage is inlined, and allows 0-based +// indexing into the counter values. +// The object is used in conjunction with a PerfCounters object, by passing it +// to Snapshot(). The Read() method relocates individual reads, discarding +// the initial padding from each group leader in the values buffer such that +// all user accesses through the [] operator are correct. +class BENCHMARK_EXPORT PerfCounterValues { + public: + explicit PerfCounterValues(size_t nr_counters) : nr_counters_(nr_counters) { + BM_CHECK_LE(nr_counters_, kMaxCounters); + } + + // We are reading correctly now so the values don't need to skip padding + uint64_t operator[](size_t pos) const { return values_[pos]; } + + // Increased the maximum to 32 only since the buffer + // is std::array<> backed + static constexpr size_t kMaxCounters = 32; + + private: + friend class PerfCounters; + // Get the byte buffer in which perf counters can be captured. + // This is used by PerfCounters::Read + std::pair<char*, size_t> get_data_buffer() { + return {reinterpret_cast<char*>(values_.data()), + sizeof(uint64_t) * (kPadding + nr_counters_)}; + } + + // This reading is complex and as the goal of this class is to + // abstract away the intrincacies of the reading process, this is + // a better place for it + size_t Read(const std::vector<int>& leaders); + + // Move the padding to 2 due to the reading algorithm (1st padding plus a + // current read padding) + static constexpr size_t kPadding = 2; + std::array<uint64_t, kPadding + kMaxCounters> values_; + const size_t nr_counters_; +}; + +// Collect PMU counters. The object, once constructed, is ready to be used by +// calling read(). PMU counter collection is enabled from the time create() is +// called, to obtain the object, until the object's destructor is called. +class BENCHMARK_EXPORT PerfCounters final { + public: + // True iff this platform supports performance counters. + static const bool kSupported; + + // Returns an empty object + static PerfCounters NoCounters() { return PerfCounters(); } + + ~PerfCounters() { CloseCounters(); } + PerfCounters() = default; + PerfCounters(PerfCounters&&) = default; + PerfCounters(const PerfCounters&) = delete; + PerfCounters& operator=(PerfCounters&&) noexcept; + PerfCounters& operator=(const PerfCounters&) = delete; + + // Platform-specific implementations may choose to do some library + // initialization here. + static bool Initialize(); + + // Check if the given counter is supported, if the app wants to + // check before passing + static bool IsCounterSupported(const std::string& name); + + // Return a PerfCounters object ready to read the counters with the names + // specified. The values are user-mode only. The counter name format is + // implementation and OS specific. + // In case of failure, this method will in the worst case return an + // empty object whose state will still be valid. + static PerfCounters Create(const std::vector<std::string>& counter_names); + + // Take a snapshot of the current value of the counters into the provided + // valid PerfCounterValues storage. The values are populated such that: + // names()[i]'s value is (*values)[i] + BENCHMARK_ALWAYS_INLINE bool Snapshot(PerfCounterValues* values) const { +#ifndef BENCHMARK_OS_WINDOWS + assert(values != nullptr); + return values->Read(leader_ids_) == counter_ids_.size(); +#else + (void)values; + return false; +#endif + } + + const std::vector<std::string>& names() const { return counter_names_; } + size_t num_counters() const { return counter_names_.size(); } + + private: + PerfCounters(const std::vector<std::string>& counter_names, + std::vector<int>&& counter_ids, std::vector<int>&& leader_ids) + : counter_ids_(std::move(counter_ids)), + leader_ids_(std::move(leader_ids)), + counter_names_(counter_names) {} + + void CloseCounters() const; + + std::vector<int> counter_ids_; + std::vector<int> leader_ids_; + std::vector<std::string> counter_names_; +}; + +// Typical usage of the above primitives. +class BENCHMARK_EXPORT PerfCountersMeasurement final { + public: + PerfCountersMeasurement(const std::vector<std::string>& counter_names); + + size_t num_counters() const { return counters_.num_counters(); } + + std::vector<std::string> names() const { return counters_.names(); } + + BENCHMARK_ALWAYS_INLINE bool Start() { + if (num_counters() == 0) return true; + // Tell the compiler to not move instructions above/below where we take + // the snapshot. + ClobberMemory(); + valid_read_ &= counters_.Snapshot(&start_values_); + ClobberMemory(); + + return valid_read_; + } + + BENCHMARK_ALWAYS_INLINE bool Stop( + std::vector<std::pair<std::string, double>>& measurements) { + if (num_counters() == 0) return true; + // Tell the compiler to not move instructions above/below where we take + // the snapshot. + ClobberMemory(); + valid_read_ &= counters_.Snapshot(&end_values_); + ClobberMemory(); + + for (size_t i = 0; i < counters_.names().size(); ++i) { + double measurement = static_cast<double>(end_values_[i]) - + static_cast<double>(start_values_[i]); + measurements.push_back({counters_.names()[i], measurement}); + } + + return valid_read_; + } + + private: + PerfCounters counters_; + bool valid_read_ = true; + PerfCounterValues start_values_; + PerfCounterValues end_values_; +}; + +} // namespace internal +} // namespace benchmark + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + +#endif // BENCHMARK_PERF_COUNTERS_H @@ -33,7 +33,7 @@ // Prefer C regex libraries when compiling w/o exceptions so that we can // correctly report errors. #if defined(BENCHMARK_HAS_NO_EXCEPTIONS) && \ - defined(BENCHMARK_HAVE_STD_REGEX) && \ + defined(HAVE_STD_REGEX) && \ (defined(HAVE_GNU_POSIX_REGEX) || defined(HAVE_POSIX_REGEX)) #undef HAVE_STD_REGEX #endif @@ -126,7 +126,7 @@ inline bool Regex::Init(const std::string& spec, std::string* error) { // regerror returns the number of bytes necessary to null terminate // the string, so we move that when assigning to error. - CHECK_NE(needed, 0); + BM_CHECK_NE(needed, 0); error->assign(errbuf, needed - 1); delete[] errbuf; diff --git a/src/reporter.cc b/src/reporter.cc index 337575a..076bc31 100644 --- a/src/reporter.cc +++ b/src/reporter.cc @@ -12,17 +12,17 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "benchmark/benchmark.h" -#include "timers.h" - #include <cstdlib> - #include <iostream> +#include <map> +#include <string> #include <tuple> #include <vector> +#include "benchmark/benchmark.h" #include "check.h" #include "string_util.h" +#include "timers.h" namespace benchmark { @@ -33,10 +33,14 @@ BenchmarkReporter::~BenchmarkReporter() {} void BenchmarkReporter::PrintBasicContext(std::ostream *out, Context const &context) { - CHECK(out) << "cannot be null"; + BM_CHECK(out) << "cannot be null"; auto &Out = *out; +#ifndef BENCHMARK_OS_QURT + // Date/time information is not available on QuRT. + // Attempting to get it via this call cause the binary to crash. Out << LocalDateTimeString() << "\n"; +#endif if (context.executable_name) Out << "Running " << context.executable_name << "\n"; @@ -64,6 +68,15 @@ void BenchmarkReporter::PrintBasicContext(std::ostream *out, Out << "\n"; } + std::map<std::string, std::string> *global_context = + internal::GetGlobalContext(); + + if (global_context != nullptr) { + for (const auto &kv : *global_context) { + Out << kv.first << ": " << kv.second << "\n"; + } + } + if (CPUInfo::Scaling::ENABLED == info.scaling) { Out << "***WARNING*** CPU scaling is enabled, the benchmark " "real time measurements may be noisy and will incur extra " diff --git a/src/sleep.cc b/src/sleep.cc deleted file mode 100644 index 4609d54..0000000 --- a/src/sleep.cc +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright 2015 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "sleep.h" - -#include <cerrno> -#include <cstdlib> -#include <ctime> - -#include "internal_macros.h" - -#ifdef BENCHMARK_OS_WINDOWS -#include <windows.h> -#endif - -#ifdef BENCHMARK_OS_ZOS -#include <unistd.h> -#endif - -namespace benchmark { -#ifdef BENCHMARK_OS_WINDOWS -// Window's Sleep takes milliseconds argument. -void SleepForMilliseconds(int milliseconds) { Sleep(milliseconds); } -void SleepForSeconds(double seconds) { - SleepForMilliseconds(static_cast<int>(kNumMillisPerSecond * seconds)); -} -#else // BENCHMARK_OS_WINDOWS -void SleepForMicroseconds(int microseconds) { -#ifdef BENCHMARK_OS_ZOS - // z/OS does not support nanosleep. Instead call sleep() and then usleep() to - // sleep for the remaining microseconds because usleep() will fail if its - // argument is greater than 1000000. - div_t sleepTime = div(microseconds, kNumMicrosPerSecond); - int seconds = sleepTime.quot; - while (seconds != 0) - seconds = sleep(seconds); - while (usleep(sleepTime.rem) == -1 && errno == EINTR) - ; -#else - struct timespec sleep_time; - sleep_time.tv_sec = microseconds / kNumMicrosPerSecond; - sleep_time.tv_nsec = (microseconds % kNumMicrosPerSecond) * kNumNanosPerMicro; - while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR) - ; // Ignore signals and wait for the full interval to elapse. -#endif -} - -void SleepForMilliseconds(int milliseconds) { - SleepForMicroseconds(milliseconds * kNumMicrosPerMilli); -} - -void SleepForSeconds(double seconds) { - SleepForMicroseconds(static_cast<int>(seconds * kNumMicrosPerSecond)); -} -#endif // BENCHMARK_OS_WINDOWS -} // end namespace benchmark diff --git a/src/sleep.h b/src/sleep.h deleted file mode 100644 index f98551a..0000000 --- a/src/sleep.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef BENCHMARK_SLEEP_H_ -#define BENCHMARK_SLEEP_H_ - -namespace benchmark { -const int kNumMillisPerSecond = 1000; -const int kNumMicrosPerMilli = 1000; -const int kNumMicrosPerSecond = kNumMillisPerSecond * 1000; -const int kNumNanosPerMicro = 1000; -const int kNumNanosPerSecond = kNumNanosPerMicro * kNumMicrosPerSecond; - -void SleepForMilliseconds(int milliseconds); -void SleepForSeconds(double seconds); -} // end namespace benchmark - -#endif // BENCHMARK_SLEEP_H_ diff --git a/src/statistics.cc b/src/statistics.cc index bd5a3d6..844e926 100644 --- a/src/statistics.cc +++ b/src/statistics.cc @@ -13,15 +13,16 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "benchmark/benchmark.h" +#include "statistics.h" #include <algorithm> #include <cmath> #include <numeric> #include <string> #include <vector> + +#include "benchmark/benchmark.h" #include "check.h" -#include "statistics.h" namespace benchmark { @@ -41,13 +42,13 @@ double StatisticsMedian(const std::vector<double>& v) { auto center = copy.begin() + v.size() / 2; std::nth_element(copy.begin(), center, copy.end()); - // did we have an odd number of samples? - // if yes, then center is the median - // it no, then we are looking for the average between center and the value - // before + // Did we have an odd number of samples? If yes, then center is the median. + // If not, then we are looking for the average between center and the value + // before. Instead of resorting, we just look for the max value before it, + // which is not necessarily the element immediately preceding `center` Since + // `copy` is only partially sorted by `nth_element`. if (v.size() % 2 == 1) return *center; - auto center2 = copy.begin() + v.size() / 2 - 1; - std::nth_element(copy.begin(), center2, copy.end()); + auto center2 = std::max_element(copy.begin(), center); return (*center + *center2) / 2.0; } @@ -74,14 +75,22 @@ double StatisticsStdDev(const std::vector<double>& v) { return Sqrt(v.size() / (v.size() - 1.0) * (avg_squares - Sqr(mean))); } +double StatisticsCV(const std::vector<double>& v) { + if (v.size() < 2) return 0.0; + + const auto stddev = StatisticsStdDev(v); + const auto mean = StatisticsMean(v); + + return stddev / mean; +} + std::vector<BenchmarkReporter::Run> ComputeStats( const std::vector<BenchmarkReporter::Run>& reports) { typedef BenchmarkReporter::Run Run; std::vector<Run> results; - auto error_count = - std::count_if(reports.begin(), reports.end(), - [](Run const& run) { return run.error_occurred; }); + auto error_count = std::count_if(reports.begin(), reports.end(), + [](Run const& run) { return run.skipped; }); if (reports.size() - error_count < 2) { // We don't report aggregated data if there was a single run. @@ -108,26 +117,28 @@ std::vector<BenchmarkReporter::Run> ComputeStats( for (auto const& cnt : r.counters) { auto it = counter_stats.find(cnt.first); if (it == counter_stats.end()) { - counter_stats.insert({cnt.first, {cnt.second, std::vector<double>{}}}); - it = counter_stats.find(cnt.first); + it = counter_stats + .emplace(cnt.first, + CounterStat{cnt.second, std::vector<double>{}}) + .first; it->second.s.reserve(reports.size()); } else { - CHECK_EQ(counter_stats[cnt.first].c.flags, cnt.second.flags); + BM_CHECK_EQ(it->second.c.flags, cnt.second.flags); } } } // Populate the accumulators. for (Run const& run : reports) { - CHECK_EQ(reports[0].benchmark_name(), run.benchmark_name()); - CHECK_EQ(run_iterations, run.iterations); - if (run.error_occurred) continue; + BM_CHECK_EQ(reports[0].benchmark_name(), run.benchmark_name()); + BM_CHECK_EQ(run_iterations, run.iterations); + if (run.skipped) continue; real_accumulated_time_stat.emplace_back(run.real_accumulated_time); cpu_accumulated_time_stat.emplace_back(run.cpu_accumulated_time); // user counters for (auto const& cnt : run.counters) { auto it = counter_stats.find(cnt.first); - CHECK_NE(it, counter_stats.end()); + BM_CHECK_NE(it, counter_stats.end()); it->second.s.emplace_back(cnt.second); } } @@ -148,11 +159,14 @@ std::vector<BenchmarkReporter::Run> ComputeStats( // Get the data from the accumulator to BenchmarkReporter::Run's. Run data; data.run_name = reports[0].run_name; + data.family_index = reports[0].family_index; + data.per_family_instance_index = reports[0].per_family_instance_index; data.run_type = BenchmarkReporter::Run::RT_Aggregate; data.threads = reports[0].threads; data.repetitions = reports[0].repetitions; data.repetition_index = Run::no_repetition_index; data.aggregate_name = Stat.name_; + data.aggregate_unit = Stat.unit_; data.report_label = report_label; // It is incorrect to say that an aggregate is computed over @@ -165,13 +179,15 @@ std::vector<BenchmarkReporter::Run> ComputeStats( data.real_accumulated_time = Stat.compute_(real_accumulated_time_stat); data.cpu_accumulated_time = Stat.compute_(cpu_accumulated_time_stat); - // We will divide these times by data.iterations when reporting, but the - // data.iterations is not nessesairly the scale of these measurements, - // because in each repetition, these timers are sum over all the iterations. - // And if we want to say that the stats are over N repetitions and not - // M iterations, we need to multiply these by (N/M). - data.real_accumulated_time *= iteration_rescale_factor; - data.cpu_accumulated_time *= iteration_rescale_factor; + if (data.aggregate_unit == StatisticUnit::kTime) { + // We will divide these times by data.iterations when reporting, but the + // data.iterations is not necessarily the scale of these measurements, + // because in each repetition, these timers are sum over all the iters. + // And if we want to say that the stats are over N repetitions and not + // M iterations, we need to multiply these by (N/M). + data.real_accumulated_time *= iteration_rescale_factor; + data.cpu_accumulated_time *= iteration_rescale_factor; + } data.time_unit = reports[0].time_unit; diff --git a/src/statistics.h b/src/statistics.h index 7eccc85..6e5560e 100644 --- a/src/statistics.h +++ b/src/statistics.h @@ -22,15 +22,22 @@ namespace benchmark { -// Return a vector containing the mean, median and standard devation information -// (and any user-specified info) for the specified list of reports. If 'reports' -// contains less than two non-errored runs an empty vector is returned +// Return a vector containing the mean, median and standard deviation +// information (and any user-specified info) for the specified list of reports. +// If 'reports' contains less than two non-errored runs an empty vector is +// returned +BENCHMARK_EXPORT std::vector<BenchmarkReporter::Run> ComputeStats( const std::vector<BenchmarkReporter::Run>& reports); +BENCHMARK_EXPORT double StatisticsMean(const std::vector<double>& v); +BENCHMARK_EXPORT double StatisticsMedian(const std::vector<double>& v); +BENCHMARK_EXPORT double StatisticsStdDev(const std::vector<double>& v); +BENCHMARK_EXPORT +double StatisticsCV(const std::vector<double>& v); } // end namespace benchmark diff --git a/src/string_util.cc b/src/string_util.cc index ac60b55..c69e40a 100644 --- a/src/string_util.cc +++ b/src/string_util.cc @@ -11,16 +11,17 @@ #include <sstream> #include "arraysize.h" +#include "benchmark/benchmark.h" namespace benchmark { namespace { - // kilo, Mega, Giga, Tera, Peta, Exa, Zetta, Yotta. -const char kBigSIUnits[] = "kMGTPEZY"; +const char* const kBigSIUnits[] = {"k", "M", "G", "T", "P", "E", "Z", "Y"}; // Kibi, Mebi, Gibi, Tebi, Pebi, Exbi, Zebi, Yobi. -const char kBigIECUnits[] = "KMGTPEZY"; +const char* const kBigIECUnits[] = {"Ki", "Mi", "Gi", "Ti", + "Pi", "Ei", "Zi", "Yi"}; // milli, micro, nano, pico, femto, atto, zepto, yocto. -const char kSmallSIUnits[] = "munpfazy"; +const char* const kSmallSIUnits[] = {"m", "u", "n", "p", "f", "a", "z", "y"}; // We require that all three arrays have the same size. static_assert(arraysize(kBigSIUnits) == arraysize(kBigIECUnits), @@ -30,9 +31,8 @@ static_assert(arraysize(kSmallSIUnits) == arraysize(kBigSIUnits), static const int64_t kUnitsSize = arraysize(kBigSIUnits); -void ToExponentAndMantissa(double val, double thresh, int precision, - double one_k, std::string* mantissa, - int64_t* exponent) { +void ToExponentAndMantissa(double val, int precision, double one_k, + std::string* mantissa, int64_t* exponent) { std::stringstream mantissa_stream; if (val < 0) { @@ -43,8 +43,8 @@ void ToExponentAndMantissa(double val, double thresh, int precision, // Adjust threshold so that it never excludes things which can't be rendered // in 'precision' digits. const double adjusted_threshold = - std::max(thresh, 1.0 / std::pow(10.0, precision)); - const double big_threshold = adjusted_threshold * one_k; + std::max(1.0, 1.0 / std::pow(10.0, precision)); + const double big_threshold = (adjusted_threshold * one_k) - 1; const double small_threshold = adjusted_threshold; // Values in ]simple_threshold,small_threshold[ will be printed as-is const double simple_threshold = 0.01; @@ -92,37 +92,20 @@ std::string ExponentToPrefix(int64_t exponent, bool iec) { const int64_t index = (exponent > 0 ? exponent - 1 : -exponent - 1); if (index >= kUnitsSize) return ""; - const char* array = + const char* const* array = (exponent > 0 ? (iec ? kBigIECUnits : kBigSIUnits) : kSmallSIUnits); - if (iec) - return array[index] + std::string("i"); - else - return std::string(1, array[index]); + + return std::string(array[index]); } -std::string ToBinaryStringFullySpecified(double value, double threshold, - int precision, double one_k = 1024.0) { +std::string ToBinaryStringFullySpecified(double value, int precision, + Counter::OneK one_k) { std::string mantissa; int64_t exponent; - ToExponentAndMantissa(value, threshold, precision, one_k, &mantissa, + ToExponentAndMantissa(value, precision, + one_k == Counter::kIs1024 ? 1024.0 : 1000.0, &mantissa, &exponent); - return mantissa + ExponentToPrefix(exponent, false); -} - -} // end namespace - -void AppendHumanReadable(int n, std::string* str) { - std::stringstream ss; - // Round down to the nearest SI prefix. - ss << ToBinaryStringFullySpecified(n, 1.0, 0); - *str += ss.str(); -} - -std::string HumanReadableNumber(double n, double one_k) { - // 1.1 means that figures up to 1.1k should be shown with the next unit down; - // this softens edge effects. - // 1 means that we should show one decimal place of precision. - return ToBinaryStringFullySpecified(n, 1.1, 1, one_k); + return mantissa + ExponentToPrefix(exponent, one_k == Counter::kIs1024); } std::string StrFormatImp(const char* msg, va_list args) { @@ -133,28 +116,34 @@ std::string StrFormatImp(const char* msg, va_list args) { // TODO(ericwf): use std::array for first attempt to avoid one memory // allocation guess what the size might be std::array<char, 256> local_buff; - std::size_t size = local_buff.size(); + // 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation // in the android-ndk - auto ret = vsnprintf(local_buff.data(), size, msg, args_cp); + auto ret = vsnprintf(local_buff.data(), local_buff.size(), msg, args_cp); va_end(args_cp); // handle empty expansion if (ret == 0) return std::string{}; - if (static_cast<std::size_t>(ret) < size) + if (static_cast<std::size_t>(ret) < local_buff.size()) return std::string(local_buff.data()); // we did not provide a long enough buffer on our first attempt. // add 1 to size to account for null-byte in size cast to prevent overflow - size = static_cast<std::size_t>(ret) + 1; + std::size_t size = static_cast<std::size_t>(ret) + 1; auto buff_ptr = std::unique_ptr<char[]>(new char[size]); // 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation // in the android-ndk - ret = vsnprintf(buff_ptr.get(), size, msg, args); + vsnprintf(buff_ptr.get(), size, msg, args); return std::string(buff_ptr.get()); } +} // end namespace + +std::string HumanReadableNumber(double n, Counter::OneK one_k) { + return ToBinaryStringFullySpecified(n, 1, one_k); +} + std::string StrFormat(const char* format, ...) { va_list args; va_start(args, format); @@ -163,6 +152,19 @@ std::string StrFormat(const char* format, ...) { return tmp; } +std::vector<std::string> StrSplit(const std::string& str, char delim) { + if (str.empty()) return {}; + std::vector<std::string> ret; + size_t first = 0; + size_t next = str.find(delim); + for (; next != std::string::npos; + first = next + 1, next = str.find(delim, first)) { + ret.push_back(str.substr(first, next - first)); + } + ret.push_back(str.substr(first)); + return ret; +} + #ifdef BENCHMARK_STL_ANDROID_GNUSTL /* * GNU STL in Android NDK lacks support for some C++11 functions, including @@ -185,11 +187,10 @@ unsigned long stoul(const std::string& str, size_t* pos, int base) { /* Check for errors and return */ if (strtoulErrno == ERANGE) { - throw std::out_of_range( - "stoul failed: " + str + " is outside of range of unsigned long"); + throw std::out_of_range("stoul failed: " + str + + " is outside of range of unsigned long"); } else if (strEnd == strStart || strtoulErrno != 0) { - throw std::invalid_argument( - "stoul failed: " + str + " is not an integer"); + throw std::invalid_argument("stoul failed: " + str + " is not an integer"); } if (pos != nullptr) { *pos = static_cast<size_t>(strEnd - strStart); @@ -212,11 +213,10 @@ int stoi(const std::string& str, size_t* pos, int base) { /* Check for errors and return */ if (strtolErrno == ERANGE || long(int(result)) != result) { - throw std::out_of_range( - "stoul failed: " + str + " is outside of range of int"); + throw std::out_of_range("stoul failed: " + str + + " is outside of range of int"); } else if (strEnd == strStart || strtolErrno != 0) { - throw std::invalid_argument( - "stoul failed: " + str + " is not an integer"); + throw std::invalid_argument("stoul failed: " + str + " is not an integer"); } if (pos != nullptr) { *pos = static_cast<size_t>(strEnd - strStart); @@ -239,11 +239,10 @@ double stod(const std::string& str, size_t* pos) { /* Check for errors and return */ if (strtodErrno == ERANGE) { - throw std::out_of_range( - "stoul failed: " + str + " is outside of range of int"); + throw std::out_of_range("stoul failed: " + str + + " is outside of range of int"); } else if (strEnd == strStart || strtodErrno != 0) { - throw std::invalid_argument( - "stoul failed: " + str + " is not an integer"); + throw std::invalid_argument("stoul failed: " + str + " is not an integer"); } if (pos != nullptr) { *pos = static_cast<size_t>(strEnd - strStart); diff --git a/src/string_util.h b/src/string_util.h index 09d7b4b..731aa2c 100644 --- a/src/string_util.h +++ b/src/string_util.h @@ -4,14 +4,19 @@ #include <sstream> #include <string> #include <utility> +#include <vector> + +#include "benchmark/benchmark.h" +#include "benchmark/export.h" +#include "check.h" #include "internal_macros.h" namespace benchmark { -void AppendHumanReadable(int n, std::string* str); - -std::string HumanReadableNumber(double n, double one_k = 1024.0); +BENCHMARK_EXPORT +std::string HumanReadableNumber(double n, Counter::OneK one_k); +BENCHMARK_EXPORT #if defined(__MINGW32__) __attribute__((format(__MINGW_PRINTF_FORMAT, 1, 2))) #elif defined(__GNUC__) @@ -37,6 +42,11 @@ inline std::string StrCat(Args&&... args) { return ss.str(); } +BENCHMARK_EXPORT +std::vector<std::string> StrSplit(const std::string& str, char delim); + +// Disable lint checking for this block since it re-implements C functions. +// NOLINTBEGIN #ifdef BENCHMARK_STL_ANDROID_GNUSTL /* * GNU STL in Android NDK lacks support for some C++11 functions, including @@ -45,14 +55,15 @@ inline std::string StrCat(Args&&... args) { * namespace, not std:: namespace. */ unsigned long stoul(const std::string& str, size_t* pos = nullptr, - int base = 10); + int base = 10); int stoi(const std::string& str, size_t* pos = nullptr, int base = 10); double stod(const std::string& str, size_t* pos = nullptr); #else -using std::stoul; -using std::stoi; -using std::stod; +using std::stod; // NOLINT(misc-unused-using-decls) +using std::stoi; // NOLINT(misc-unused-using-decls) +using std::stoul; // NOLINT(misc-unused-using-decls) #endif +// NOLINTEND } // end namespace benchmark diff --git a/src/sysinfo.cc b/src/sysinfo.cc index b30b4f8..922e83a 100644 --- a/src/sysinfo.cc +++ b/src/sysinfo.cc @@ -19,10 +19,11 @@ #undef StrCat // Don't let StrCat in string_util.h be renamed to lstrcatA #include <versionhelpers.h> #include <windows.h> + #include <codecvt> #else #include <fcntl.h> -#ifndef BENCHMARK_OS_FUCHSIA +#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT) #include <sys/resource.h> #endif #include <sys/time.h> @@ -37,10 +38,17 @@ #endif #if defined(BENCHMARK_OS_SOLARIS) #include <kstat.h> +#include <netdb.h> #endif #if defined(BENCHMARK_OS_QNX) #include <sys/syspage.h> #endif +#if defined(BENCHMARK_OS_QURT) +#include <qurt.h> +#endif +#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY) +#include <pthread.h> +#endif #include <algorithm> #include <array> @@ -55,17 +63,19 @@ #include <iostream> #include <iterator> #include <limits> +#include <locale> #include <memory> +#include <random> #include <sstream> -#include <locale> #include <utility> +#include "benchmark/benchmark.h" #include "check.h" #include "cycleclock.h" #include "internal_macros.h" #include "log.h" -#include "sleep.h" #include "string_util.h" +#include "timers.h" namespace benchmark { namespace { @@ -90,67 +100,59 @@ BENCHMARK_NORETURN void PrintErrorAndDie(Args&&... args) { /// `sysctl` with the result type it's to be interpreted as. struct ValueUnion { union DataT { - uint32_t uint32_value; - uint64_t uint64_value; + int32_t int32_value; + int64_t int64_value; // For correct aliasing of union members from bytes. char bytes[8]; }; using DataPtr = std::unique_ptr<DataT, decltype(&std::free)>; // The size of the data union member + its trailing array size. - size_t Size; - DataPtr Buff; + std::size_t size; + DataPtr buff; public: - ValueUnion() : Size(0), Buff(nullptr, &std::free) {} + ValueUnion() : size(0), buff(nullptr, &std::free) {} - explicit ValueUnion(size_t BuffSize) - : Size(sizeof(DataT) + BuffSize), - Buff(::new (std::malloc(Size)) DataT(), &std::free) {} + explicit ValueUnion(std::size_t buff_size) + : size(sizeof(DataT) + buff_size), + buff(::new (std::malloc(size)) DataT(), &std::free) {} ValueUnion(ValueUnion&& other) = default; - explicit operator bool() const { return bool(Buff); } + explicit operator bool() const { return bool(buff); } - char* data() const { return Buff->bytes; } + char* data() const { return buff->bytes; } std::string GetAsString() const { return std::string(data()); } int64_t GetAsInteger() const { - if (Size == sizeof(Buff->uint32_value)) - return static_cast<int32_t>(Buff->uint32_value); - else if (Size == sizeof(Buff->uint64_value)) - return static_cast<int64_t>(Buff->uint64_value); - BENCHMARK_UNREACHABLE(); - } - - uint64_t GetAsUnsigned() const { - if (Size == sizeof(Buff->uint32_value)) - return Buff->uint32_value; - else if (Size == sizeof(Buff->uint64_value)) - return Buff->uint64_value; + if (size == sizeof(buff->int32_value)) + return buff->int32_value; + else if (size == sizeof(buff->int64_value)) + return buff->int64_value; BENCHMARK_UNREACHABLE(); } template <class T, int N> std::array<T, N> GetAsArray() { - const int ArrSize = sizeof(T) * N; - CHECK_LE(ArrSize, Size); - std::array<T, N> Arr; - std::memcpy(Arr.data(), data(), ArrSize); - return Arr; + const int arr_size = sizeof(T) * N; + BM_CHECK_LE(arr_size, size); + std::array<T, N> arr; + std::memcpy(arr.data(), data(), arr_size); + return arr; } }; -ValueUnion GetSysctlImp(std::string const& Name) { +ValueUnion GetSysctlImp(std::string const& name) { #if defined BENCHMARK_OS_OPENBSD int mib[2]; mib[0] = CTL_HW; - if ((Name == "hw.ncpu") || (Name == "hw.cpuspeed")){ + if ((name == "hw.ncpu") || (name == "hw.cpuspeed")) { ValueUnion buff(sizeof(int)); - if (Name == "hw.ncpu") { + if (name == "hw.ncpu") { mib[1] = HW_NCPU; } else { mib[1] = HW_CPUSPEED; @@ -163,41 +165,41 @@ ValueUnion GetSysctlImp(std::string const& Name) { } return ValueUnion(); #else - size_t CurBuffSize = 0; - if (sysctlbyname(Name.c_str(), nullptr, &CurBuffSize, nullptr, 0) == -1) + std::size_t cur_buff_size = 0; + if (sysctlbyname(name.c_str(), nullptr, &cur_buff_size, nullptr, 0) == -1) return ValueUnion(); - ValueUnion buff(CurBuffSize); - if (sysctlbyname(Name.c_str(), buff.data(), &buff.Size, nullptr, 0) == 0) + ValueUnion buff(cur_buff_size); + if (sysctlbyname(name.c_str(), buff.data(), &buff.size, nullptr, 0) == 0) return buff; return ValueUnion(); #endif } BENCHMARK_MAYBE_UNUSED -bool GetSysctl(std::string const& Name, std::string* Out) { - Out->clear(); - auto Buff = GetSysctlImp(Name); - if (!Buff) return false; - Out->assign(Buff.data()); +bool GetSysctl(std::string const& name, std::string* out) { + out->clear(); + auto buff = GetSysctlImp(name); + if (!buff) return false; + out->assign(buff.data()); return true; } template <class Tp, class = typename std::enable_if<std::is_integral<Tp>::value>::type> -bool GetSysctl(std::string const& Name, Tp* Out) { - *Out = 0; - auto Buff = GetSysctlImp(Name); - if (!Buff) return false; - *Out = static_cast<Tp>(Buff.GetAsUnsigned()); +bool GetSysctl(std::string const& name, Tp* out) { + *out = 0; + auto buff = GetSysctlImp(name); + if (!buff) return false; + *out = static_cast<Tp>(buff.GetAsInteger()); return true; } template <class Tp, size_t N> -bool GetSysctl(std::string const& Name, std::array<Tp, N>* Out) { - auto Buff = GetSysctlImp(Name); - if (!Buff) return false; - *Out = Buff.GetAsArray<Tp, N>(); +bool GetSysctl(std::string const& name, std::array<Tp, N>* out) { + auto buff = GetSysctlImp(name); + if (!buff) return false; + *out = buff.GetAsArray<Tp, N>(); return true; } #endif @@ -214,10 +216,9 @@ bool ReadFromFile(std::string const& fname, ArgT* arg) { CPUInfo::Scaling CpuScaling(int num_cpus) { // We don't have a valid CPU count, so don't even bother. if (num_cpus <= 0) return CPUInfo::Scaling::UNKNOWN; -#ifdef BENCHMARK_OS_QNX +#if defined(BENCHMARK_OS_QNX) return CPUInfo::Scaling::UNKNOWN; -#endif -#ifndef BENCHMARK_OS_WINDOWS +#elif !defined(BENCHMARK_OS_WINDOWS) // On Linux, the CPUfreq subsystem exposes CPU information as files on the // local file system. If reading the exported files fails, then we may not be // running on Linux, so we silently ignore all the read errors. @@ -225,28 +226,30 @@ CPUInfo::Scaling CpuScaling(int num_cpus) { for (int cpu = 0; cpu < num_cpus; ++cpu) { std::string governor_file = StrCat("/sys/devices/system/cpu/cpu", cpu, "/cpufreq/scaling_governor"); - if (ReadFromFile(governor_file, &res) && res != "performance") return CPUInfo::Scaling::ENABLED; + if (ReadFromFile(governor_file, &res) && res != "performance") + return CPUInfo::Scaling::ENABLED; } return CPUInfo::Scaling::DISABLED; -#endif +#else return CPUInfo::Scaling::UNKNOWN; +#endif } -int CountSetBitsInCPUMap(std::string Val) { - auto CountBits = [](std::string Part) { +int CountSetBitsInCPUMap(std::string val) { + auto CountBits = [](std::string part) { using CPUMask = std::bitset<sizeof(std::uintptr_t) * CHAR_BIT>; - Part = "0x" + Part; - CPUMask Mask(benchmark::stoul(Part, nullptr, 16)); - return static_cast<int>(Mask.count()); + part = "0x" + part; + CPUMask mask(benchmark::stoul(part, nullptr, 16)); + return static_cast<int>(mask.count()); }; - size_t Pos; + std::size_t pos; int total = 0; - while ((Pos = Val.find(',')) != std::string::npos) { - total += CountBits(Val.substr(0, Pos)); - Val = Val.substr(Pos + 1); + while ((pos = val.find(',')) != std::string::npos) { + total += CountBits(val.substr(0, pos)); + val = val.substr(pos + 1); } - if (!Val.empty()) { - total += CountBits(Val); + if (!val.empty()) { + total += CountBits(val); } return total; } @@ -255,16 +258,16 @@ BENCHMARK_MAYBE_UNUSED std::vector<CPUInfo::CacheInfo> GetCacheSizesFromKVFS() { std::vector<CPUInfo::CacheInfo> res; std::string dir = "/sys/devices/system/cpu/cpu0/cache/"; - int Idx = 0; + int idx = 0; while (true) { CPUInfo::CacheInfo info; - std::string FPath = StrCat(dir, "index", Idx++, "/"); - std::ifstream f(StrCat(FPath, "size").c_str()); + std::string fpath = StrCat(dir, "index", idx++, "/"); + std::ifstream f(StrCat(fpath, "size").c_str()); if (!f.is_open()) break; std::string suffix; f >> info.size; if (f.fail()) - PrintErrorAndDie("Failed while reading file '", FPath, "size'"); + PrintErrorAndDie("Failed while reading file '", fpath, "size'"); if (f.good()) { f >> suffix; if (f.bad()) @@ -275,13 +278,13 @@ std::vector<CPUInfo::CacheInfo> GetCacheSizesFromKVFS() { else if (suffix == "K") info.size *= 1024; } - if (!ReadFromFile(StrCat(FPath, "type"), &info.type)) - PrintErrorAndDie("Failed to read from file ", FPath, "type"); - if (!ReadFromFile(StrCat(FPath, "level"), &info.level)) - PrintErrorAndDie("Failed to read from file ", FPath, "level"); + if (!ReadFromFile(StrCat(fpath, "type"), &info.type)) + PrintErrorAndDie("Failed to read from file ", fpath, "type"); + if (!ReadFromFile(StrCat(fpath, "level"), &info.level)) + PrintErrorAndDie("Failed to read from file ", fpath, "level"); std::string map_str; - if (!ReadFromFile(StrCat(FPath, "shared_cpu_map"), &map_str)) - PrintErrorAndDie("Failed to read from file ", FPath, "shared_cpu_map"); + if (!ReadFromFile(StrCat(fpath, "shared_cpu_map"), &map_str)) + PrintErrorAndDie("Failed to read from file ", fpath, "shared_cpu_map"); info.num_sharing = CountSetBitsInCPUMap(map_str); res.push_back(info); } @@ -292,26 +295,26 @@ std::vector<CPUInfo::CacheInfo> GetCacheSizesFromKVFS() { #ifdef BENCHMARK_OS_MACOSX std::vector<CPUInfo::CacheInfo> GetCacheSizesMacOSX() { std::vector<CPUInfo::CacheInfo> res; - std::array<uint64_t, 4> CacheCounts{{0, 0, 0, 0}}; - GetSysctl("hw.cacheconfig", &CacheCounts); + std::array<int, 4> cache_counts{{0, 0, 0, 0}}; + GetSysctl("hw.cacheconfig", &cache_counts); struct { std::string name; std::string type; int level; - uint64_t num_sharing; - } Cases[] = {{"hw.l1dcachesize", "Data", 1, CacheCounts[1]}, - {"hw.l1icachesize", "Instruction", 1, CacheCounts[1]}, - {"hw.l2cachesize", "Unified", 2, CacheCounts[2]}, - {"hw.l3cachesize", "Unified", 3, CacheCounts[3]}}; - for (auto& C : Cases) { + int num_sharing; + } cases[] = {{"hw.l1dcachesize", "Data", 1, cache_counts[1]}, + {"hw.l1icachesize", "Instruction", 1, cache_counts[1]}, + {"hw.l2cachesize", "Unified", 2, cache_counts[2]}, + {"hw.l3cachesize", "Unified", 3, cache_counts[3]}}; + for (auto& c : cases) { int val; - if (!GetSysctl(C.name, &val)) continue; + if (!GetSysctl(c.name, &val)) continue; CPUInfo::CacheInfo info; - info.type = C.type; - info.level = C.level; + info.type = c.type; + info.level = c.level; info.size = val; - info.num_sharing = static_cast<int>(C.num_sharing); + info.num_sharing = c.num_sharing; res.push_back(std::move(info)); } return res; @@ -325,7 +328,7 @@ std::vector<CPUInfo::CacheInfo> GetCacheSizesWindows() { using UPtr = std::unique_ptr<PInfo, decltype(&std::free)>; GetLogicalProcessorInformation(nullptr, &buffer_size); - UPtr buff((PInfo*)malloc(buffer_size), &std::free); + UPtr buff(static_cast<PInfo*>(std::malloc(buffer_size)), &std::free); if (!GetLogicalProcessorInformation(buff.get(), &buffer_size)) PrintErrorAndDie("Failed during call to GetLogicalProcessorInformation: ", GetLastError()); @@ -336,15 +339,16 @@ std::vector<CPUInfo::CacheInfo> GetCacheSizesWindows() { for (; it != end; ++it) { if (it->Relationship != RelationCache) continue; using BitSet = std::bitset<sizeof(ULONG_PTR) * CHAR_BIT>; - BitSet B(it->ProcessorMask); + BitSet b(it->ProcessorMask); // To prevent duplicates, only consider caches where CPU 0 is specified - if (!B.test(0)) continue; - CInfo* Cache = &it->Cache; + if (!b.test(0)) continue; + const CInfo& cache = it->Cache; CPUInfo::CacheInfo C; - C.num_sharing = static_cast<int>(B.count()); - C.level = Cache->Level; - C.size = Cache->Size; - switch (Cache->Type) { + C.num_sharing = static_cast<int>(b.count()); + C.level = cache.Level; + C.size = cache.Size; + C.type = "Unknown"; + switch (cache.Type) { case CacheUnified: C.type = "Unified"; break; @@ -357,9 +361,6 @@ std::vector<CPUInfo::CacheInfo> GetCacheSizesWindows() { case CacheTrace: C.type = "Trace"; break; - default: - C.type = "Unknown"; - break; } res.push_back(C); } @@ -368,29 +369,29 @@ std::vector<CPUInfo::CacheInfo> GetCacheSizesWindows() { #elif BENCHMARK_OS_QNX std::vector<CPUInfo::CacheInfo> GetCacheSizesQNX() { std::vector<CPUInfo::CacheInfo> res; - struct cacheattr_entry *cache = SYSPAGE_ENTRY(cacheattr); + struct cacheattr_entry* cache = SYSPAGE_ENTRY(cacheattr); uint32_t const elsize = SYSPAGE_ELEMENT_SIZE(cacheattr); - int num = SYSPAGE_ENTRY_SIZE(cacheattr) / elsize ; - for(int i = 0; i < num; ++i ) { + int num = SYSPAGE_ENTRY_SIZE(cacheattr) / elsize; + for (int i = 0; i < num; ++i) { CPUInfo::CacheInfo info; - switch (cache->flags){ - case CACHE_FLAG_INSTR : + switch (cache->flags) { + case CACHE_FLAG_INSTR: info.type = "Instruction"; info.level = 1; break; - case CACHE_FLAG_DATA : + case CACHE_FLAG_DATA: info.type = "Data"; info.level = 1; break; - case CACHE_FLAG_UNIFIED : + case CACHE_FLAG_UNIFIED: info.type = "Unified"; info.level = 2; break; - case CACHE_FLAG_SHARED : + case CACHE_FLAG_SHARED: info.type = "Shared"; info.level = 3; break; - default : + default: continue; break; } @@ -410,6 +411,8 @@ std::vector<CPUInfo::CacheInfo> GetCacheSizes() { return GetCacheSizesWindows(); #elif defined(BENCHMARK_OS_QNX) return GetCacheSizesQNX(); +#elif defined(BENCHMARK_OS_QURT) + return std::vector<CPUInfo::CacheInfo>(); #else return GetCacheSizesFromKVFS(); #endif @@ -418,24 +421,32 @@ std::vector<CPUInfo::CacheInfo> GetCacheSizes() { std::string GetSystemName() { #if defined(BENCHMARK_OS_WINDOWS) std::string str; - const unsigned COUNT = MAX_COMPUTERNAME_LENGTH+1; - TCHAR hostname[COUNT] = {'\0'}; + static constexpr int COUNT = MAX_COMPUTERNAME_LENGTH + 1; + TCHAR hostname[COUNT] = {'\0'}; DWORD DWCOUNT = COUNT; - if (!GetComputerName(hostname, &DWCOUNT)) - return std::string(""); + if (!GetComputerName(hostname, &DWCOUNT)) return std::string(""); #ifndef UNICODE str = std::string(hostname, DWCOUNT); #else - //Using wstring_convert, Is deprecated in C++17 - using convert_type = std::codecvt_utf8<wchar_t>; - std::wstring_convert<convert_type, wchar_t> converter; - std::wstring wStr(hostname, DWCOUNT); - str = converter.to_bytes(wStr); + // `WideCharToMultiByte` returns `0` when conversion fails. + int len = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, hostname, + DWCOUNT, NULL, 0, NULL, NULL); + str.resize(len); + WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, hostname, DWCOUNT, &str[0], + str.size(), NULL, NULL); #endif return str; -#else // defined(BENCHMARK_OS_WINDOWS) +#elif defined(BENCHMARK_OS_QURT) + std::string str = "Hexagon DSP"; + qurt_arch_version_t arch_version_struct; + if (qurt_sysenv_get_arch_version(&arch_version_struct) == QURT_EOK) { + str += " v"; + str += std::to_string(arch_version_struct.arch_version); + } + return str; +#else #ifndef HOST_NAME_MAX -#ifdef BENCHMARK_HAS_SYSCTL // BSD/Mac Doesnt have HOST_NAME_MAX defined +#ifdef BENCHMARK_HAS_SYSCTL // BSD/Mac doesn't have HOST_NAME_MAX defined #define HOST_NAME_MAX 64 #elif defined(BENCHMARK_OS_NACL) #define HOST_NAME_MAX 64 @@ -443,22 +454,24 @@ std::string GetSystemName() { #define HOST_NAME_MAX 154 #elif defined(BENCHMARK_OS_RTEMS) #define HOST_NAME_MAX 256 +#elif defined(BENCHMARK_OS_SOLARIS) +#define HOST_NAME_MAX MAXHOSTNAMELEN #else -#warning "HOST_NAME_MAX not defined. using 64" +#pragma message("HOST_NAME_MAX not defined. using 64") #define HOST_NAME_MAX 64 #endif -#endif // def HOST_NAME_MAX +#endif // def HOST_NAME_MAX char hostname[HOST_NAME_MAX]; int retVal = gethostname(hostname, HOST_NAME_MAX); if (retVal != 0) return std::string(""); return std::string(hostname); -#endif // Catch-all POSIX block. +#endif // Catch-all POSIX block. } int GetNumCPUs() { #ifdef BENCHMARK_HAS_SYSCTL - int NumCPU = -1; - if (GetSysctl("hw.ncpu", &NumCPU)) return NumCPU; + int num_cpu = -1; + if (GetSysctl("hw.ncpu", &num_cpu)) return num_cpu; fprintf(stderr, "Err: %s\n", strerror(errno)); std::exit(EXIT_FAILURE); #elif defined(BENCHMARK_OS_WINDOWS) @@ -472,18 +485,23 @@ int GetNumCPUs() { // group #elif defined(BENCHMARK_OS_SOLARIS) // Returns -1 in case of a failure. - int NumCPU = sysconf(_SC_NPROCESSORS_ONLN); - if (NumCPU < 0) { - fprintf(stderr, - "sysconf(_SC_NPROCESSORS_ONLN) failed with error: %s\n", + long num_cpu = sysconf(_SC_NPROCESSORS_ONLN); + if (num_cpu < 0) { + fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed with error: %s\n", strerror(errno)); } - return NumCPU; + return (int)num_cpu; #elif defined(BENCHMARK_OS_QNX) return static_cast<int>(_syspage_ptr->num_cpu); +#elif defined(BENCHMARK_OS_QURT) + qurt_sysenv_max_hthreads_t hardware_threads; + if (qurt_sysenv_get_max_hw_threads(&hardware_threads) != QURT_EOK) { + hardware_threads.max_hthreads = 1; + } + return hardware_threads.max_hthreads; #else - int NumCPUs = 0; - int MaxID = -1; + int num_cpus = 0; + int max_id = -1; std::ifstream f("/proc/cpuinfo"); if (!f.is_open()) { std::cerr << "failed to open /proc/cpuinfo\n"; @@ -493,20 +511,21 @@ int GetNumCPUs() { std::string ln; while (std::getline(f, ln)) { if (ln.empty()) continue; - size_t SplitIdx = ln.find(':'); + std::size_t split_idx = ln.find(':'); std::string value; #if defined(__s390__) // s390 has another format in /proc/cpuinfo // it needs to be parsed differently - if (SplitIdx != std::string::npos) value = ln.substr(Key.size()+1,SplitIdx-Key.size()-1); + if (split_idx != std::string::npos) + value = ln.substr(Key.size() + 1, split_idx - Key.size() - 1); #else - if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1); + if (split_idx != std::string::npos) value = ln.substr(split_idx + 1); #endif if (ln.size() >= Key.size() && ln.compare(0, Key.size(), Key) == 0) { - NumCPUs++; + num_cpus++; if (!value.empty()) { - int CurID = benchmark::stoi(value); - MaxID = std::max(CurID, MaxID); + const int cur_id = benchmark::stoi(value); + max_id = std::max(cur_id, max_id); } } } @@ -520,17 +539,95 @@ int GetNumCPUs() { } f.close(); - if ((MaxID + 1) != NumCPUs) { + if ((max_id + 1) != num_cpus) { fprintf(stderr, "CPU ID assignments in /proc/cpuinfo seem messed up." " This is usually caused by a bad BIOS.\n"); } - return NumCPUs; + return num_cpus; #endif BENCHMARK_UNREACHABLE(); } -double GetCPUCyclesPerSecond() { +class ThreadAffinityGuard final { + public: + ThreadAffinityGuard() : reset_affinity(SetAffinity()) { + if (!reset_affinity) + std::cerr << "***WARNING*** Failed to set thread affinity. Estimated CPU " + "frequency may be incorrect." + << std::endl; + } + + ~ThreadAffinityGuard() { + if (!reset_affinity) return; + +#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY) + int ret = pthread_setaffinity_np(self, sizeof(previous_affinity), + &previous_affinity); + if (ret == 0) return; +#elif defined(BENCHMARK_OS_WINDOWS_WIN32) + DWORD_PTR ret = SetThreadAffinityMask(self, previous_affinity); + if (ret != 0) return; +#endif // def BENCHMARK_HAS_PTHREAD_AFFINITY + PrintErrorAndDie("Failed to reset thread affinity"); + } + + ThreadAffinityGuard(ThreadAffinityGuard&&) = delete; + ThreadAffinityGuard(const ThreadAffinityGuard&) = delete; + ThreadAffinityGuard& operator=(ThreadAffinityGuard&&) = delete; + ThreadAffinityGuard& operator=(const ThreadAffinityGuard&) = delete; + + private: + bool SetAffinity() { +#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY) + int ret; + self = pthread_self(); + ret = pthread_getaffinity_np(self, sizeof(previous_affinity), + &previous_affinity); + if (ret != 0) return false; + + cpu_set_t affinity; + memcpy(&affinity, &previous_affinity, sizeof(affinity)); + + bool is_first_cpu = true; + + for (int i = 0; i < CPU_SETSIZE; ++i) + if (CPU_ISSET(i, &affinity)) { + if (is_first_cpu) + is_first_cpu = false; + else + CPU_CLR(i, &affinity); + } + + if (is_first_cpu) return false; + + ret = pthread_setaffinity_np(self, sizeof(affinity), &affinity); + return ret == 0; +#elif defined(BENCHMARK_OS_WINDOWS_WIN32) + self = GetCurrentThread(); + DWORD_PTR mask = static_cast<DWORD_PTR>(1) << GetCurrentProcessorNumber(); + previous_affinity = SetThreadAffinityMask(self, mask); + return previous_affinity != 0; +#else + return false; +#endif // def BENCHMARK_HAS_PTHREAD_AFFINITY + } + +#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY) + pthread_t self; + cpu_set_t previous_affinity; +#elif defined(BENCHMARK_OS_WINDOWS_WIN32) + HANDLE self; + DWORD_PTR previous_affinity; +#endif // def BENCHMARK_HAS_PTHREAD_AFFINITY + bool reset_affinity; +}; + +double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) { + // Currently, scaling is only used on linux path here, + // suppress diagnostics about it being unused on other paths. + (void)scaling; + #if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN long freq; @@ -541,8 +638,15 @@ double GetCPUCyclesPerSecond() { // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as // well. if (ReadFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq) - // If CPU scaling is in effect, we want to use the *maximum* frequency, - // not whatever CPU speed some random processor happens to be using now. + // If CPU scaling is disabled, use the *current* frequency. + // Note that we specifically don't want to read cpuinfo_cur_freq, + // because it is only readable by root. + || (scaling == CPUInfo::Scaling::DISABLED && + ReadFromFile("/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq", + &freq)) + // Otherwise, if CPU scaling may be in effect, we want to use + // the *maximum* frequency, not whatever CPU speed some random processor + // happens to be using now. || ReadFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", &freq)) { // The value is in kHz (as the file name suggests). For example, on a @@ -559,7 +663,7 @@ double GetCPUCyclesPerSecond() { return error_value; } - auto startsWithKey = [](std::string const& Value, std::string const& Key) { + auto StartsWithKey = [](std::string const& Value, std::string const& Key) { if (Key.size() > Value.size()) return false; auto Cmp = [&](char X, char Y) { return std::tolower(X) == std::tolower(Y); @@ -570,18 +674,18 @@ double GetCPUCyclesPerSecond() { std::string ln; while (std::getline(f, ln)) { if (ln.empty()) continue; - size_t SplitIdx = ln.find(':'); + std::size_t split_idx = ln.find(':'); std::string value; - if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1); + if (split_idx != std::string::npos) value = ln.substr(split_idx + 1); // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only // accept positive values. Some environments (virtual machines) report zero, // which would cause infinite looping in WallTime_Init. - if (startsWithKey(ln, "cpu MHz")) { + if (StartsWithKey(ln, "cpu MHz")) { if (!value.empty()) { double cycles_per_second = benchmark::stod(value) * 1000000.0; if (cycles_per_second > 0) return cycles_per_second; } - } else if (startsWithKey(ln, "bogomips")) { + } else if (StartsWithKey(ln, "bogomips")) { if (!value.empty()) { bogo_clock = benchmark::stod(value) * 1000000.0; if (bogo_clock < 0.0) bogo_clock = error_value; @@ -603,7 +707,7 @@ double GetCPUCyclesPerSecond() { if (bogo_clock >= 0.0) return bogo_clock; #elif defined BENCHMARK_HAS_SYSCTL - constexpr auto* FreqStr = + constexpr auto* freqStr = #if defined(BENCHMARK_OS_FREEBSD) || defined(BENCHMARK_OS_NETBSD) "machdep.tsc_freq"; #elif defined BENCHMARK_OS_OPENBSD @@ -615,14 +719,17 @@ double GetCPUCyclesPerSecond() { #endif unsigned long long hz = 0; #if defined BENCHMARK_OS_OPENBSD - if (GetSysctl(FreqStr, &hz)) return hz * 1000000; + if (GetSysctl(freqStr, &hz)) return hz * 1000000; #else - if (GetSysctl(FreqStr, &hz)) return hz; + if (GetSysctl(freqStr, &hz)) return hz; #endif fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n", - FreqStr, strerror(errno)); + freqStr, strerror(errno)); + fprintf(stderr, + "This does not affect benchmark measurements, only the " + "metadata output.\n"); -#elif defined BENCHMARK_OS_WINDOWS +#elif defined BENCHMARK_OS_WINDOWS_WIN32 // In NT, read MHz from the registry. If we fail to do so or we're in win9x // then make a crude estimate. DWORD data, data_size = sizeof(data); @@ -631,15 +738,16 @@ double GetCPUCyclesPerSecond() { SHGetValueA(HKEY_LOCAL_MACHINE, "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", "~MHz", nullptr, &data, &data_size))) - return static_cast<double>((int64_t)data * - (int64_t)(1000 * 1000)); // was mhz -#elif defined (BENCHMARK_OS_SOLARIS) - kstat_ctl_t *kc = kstat_open(); + return static_cast<double>(static_cast<int64_t>(data) * + static_cast<int64_t>(1000 * 1000)); // was mhz +#elif defined(BENCHMARK_OS_SOLARIS) + kstat_ctl_t* kc = kstat_open(); if (!kc) { std::cerr << "failed to open /dev/kstat\n"; return -1; } - kstat_t *ksp = kstat_lookup(kc, (char*)"cpu_info", -1, (char*)"cpu_info0"); + kstat_t* ksp = kstat_lookup(kc, const_cast<char*>("cpu_info"), -1, + const_cast<char*>("cpu_info0")); if (!ksp) { std::cerr << "failed to lookup in /dev/kstat\n"; return -1; @@ -648,8 +756,8 @@ double GetCPUCyclesPerSecond() { std::cerr << "failed to read from /dev/kstat\n"; return -1; } - kstat_named_t *knp = - (kstat_named_t*)kstat_data_lookup(ksp, (char*)"current_clock_Hz"); + kstat_named_t* knp = (kstat_named_t*)kstat_data_lookup( + ksp, const_cast<char*>("current_clock_Hz")); if (!knp) { std::cerr << "failed to lookup data in /dev/kstat\n"; return -1; @@ -662,23 +770,55 @@ double GetCPUCyclesPerSecond() { double clock_hz = knp->value.ui64; kstat_close(kc); return clock_hz; -#elif defined (BENCHMARK_OS_QNX) +#elif defined(BENCHMARK_OS_QNX) return static_cast<double>((int64_t)(SYSPAGE_ENTRY(cpuinfo)->speed) * (int64_t)(1000 * 1000)); +#elif defined(BENCHMARK_OS_QURT) + // QuRT doesn't provide any API to query Hexagon frequency. + return 1000000000; #endif // If we've fallen through, attempt to roughly estimate the CPU clock rate. - const int estimate_time_ms = 1000; + + // Make sure to use the same cycle counter when starting and stopping the + // cycle timer. We just pin the current thread to a cpu in the previous + // affinity set. + ThreadAffinityGuard affinity_guard; + + static constexpr double estimate_time_s = 1.0; + const double start_time = ChronoClockNow(); const auto start_ticks = cycleclock::Now(); - SleepForMilliseconds(estimate_time_ms); - return static_cast<double>(cycleclock::Now() - start_ticks); + + // Impose load instead of calling sleep() to make sure the cycle counter + // works. + using PRNG = std::minstd_rand; + using Result = PRNG::result_type; + PRNG rng(static_cast<Result>(start_ticks)); + + Result state = 0; + + do { + static constexpr size_t batch_size = 10000; + rng.discard(batch_size); + state += rng(); + + } while (ChronoClockNow() - start_time < estimate_time_s); + + DoNotOptimize(state); + + const auto end_ticks = cycleclock::Now(); + const double end_time = ChronoClockNow(); + + return static_cast<double>(end_ticks - start_ticks) / (end_time - start_time); + // Reset the affinity of current thread when the lifetime of affinity_guard + // ends. } std::vector<double> GetLoadAvg() { #if (defined BENCHMARK_OS_FREEBSD || defined(BENCHMARK_OS_LINUX) || \ defined BENCHMARK_OS_MACOSX || defined BENCHMARK_OS_NETBSD || \ defined BENCHMARK_OS_OPENBSD || defined BENCHMARK_OS_DRAGONFLY) && \ - !defined(__ANDROID__) - constexpr int kMaxSamples = 3; + !(defined(__ANDROID__) && __ANDROID_API__ < 29) + static constexpr int kMaxSamples = 3; std::vector<double> res(kMaxSamples, 0.0); const int nelem = getloadavg(res.data(), kMaxSamples); if (nelem < 1) { @@ -701,12 +841,11 @@ const CPUInfo& CPUInfo::Get() { CPUInfo::CPUInfo() : num_cpus(GetNumCPUs()), - cycles_per_second(GetCPUCyclesPerSecond()), - caches(GetCacheSizes()), scaling(CpuScaling(num_cpus)), + cycles_per_second(GetCPUCyclesPerSecond(scaling)), + caches(GetCacheSizes()), load_avg(GetLoadAvg()) {} - const SystemInfo& SystemInfo::Get() { static const SystemInfo* info = new SystemInfo(); return *info; diff --git a/src/thread_manager.h b/src/thread_manager.h index 28e2dd5..819b3c4 100644 --- a/src/thread_manager.h +++ b/src/thread_manager.h @@ -36,7 +36,6 @@ class ThreadManager { [this]() { return alive_threads_ == 0; }); } - public: struct Result { IterationCount iterations = 0; double real_time_used = 0; @@ -44,8 +43,8 @@ class ThreadManager { double manual_time_used = 0; int64_t complexity_n = 0; std::string report_label_; - std::string error_message_; - bool has_error_ = false; + std::string skip_message_; + internal::Skipped skipped_ = internal::NotSkipped; UserCounters counters; }; GUARDED_BY(GetBenchmarkMutex()) Result results; diff --git a/src/thread_timer.h b/src/thread_timer.h index 1703ca0..eb23f59 100644 --- a/src/thread_timer.h +++ b/src/thread_timer.h @@ -28,7 +28,7 @@ class ThreadTimer { // Called by each thread void StopTimer() { - CHECK(running_); + BM_CHECK(running_); running_ = false; real_time_used_ += ChronoClockNow() - start_real_time_; // Floating point error can result in the subtraction producing a negative @@ -44,19 +44,19 @@ class ThreadTimer { // REQUIRES: timer is not running double real_time_used() const { - CHECK(!running_); + BM_CHECK(!running_); return real_time_used_; } // REQUIRES: timer is not running double cpu_time_used() const { - CHECK(!running_); + BM_CHECK(!running_); return cpu_time_used_; } // REQUIRES: timer is not running double manual_time_used() const { - CHECK(!running_); + BM_CHECK(!running_); return manual_time_used_; } diff --git a/src/timers.cc b/src/timers.cc index 1d3ab9a..b23feea 100644 --- a/src/timers.cc +++ b/src/timers.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "timers.h" + #include "internal_macros.h" #ifdef BENCHMARK_OS_WINDOWS @@ -22,7 +23,7 @@ #include <windows.h> #else #include <fcntl.h> -#ifndef BENCHMARK_OS_FUCHSIA +#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT) #include <sys/resource.h> #endif #include <sys/time.h> @@ -37,6 +38,9 @@ #include <mach/mach_port.h> #include <mach/thread_act.h> #endif +#if defined(BENCHMARK_OS_QURT) +#include <qurt.h> +#endif #endif #ifdef BENCHMARK_OS_EMSCRIPTEN @@ -55,7 +59,6 @@ #include "check.h" #include "log.h" -#include "sleep.h" #include "string_util.h" namespace benchmark { @@ -64,6 +67,9 @@ namespace benchmark { #if defined(__GNUC__) #pragma GCC diagnostic ignored "-Wunused-function" #endif +#if defined(__NVCOMPILER) +#pragma diag_suppress declared_but_not_referenced +#endif namespace { #if defined(BENCHMARK_OS_WINDOWS) @@ -78,7 +84,7 @@ double MakeTime(FILETIME const& kernel_time, FILETIME const& user_time) { static_cast<double>(user.QuadPart)) * 1e-7; } -#elif !defined(BENCHMARK_OS_FUCHSIA) +#elif !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT) double MakeTime(struct rusage const& ru) { return (static_cast<double>(ru.ru_utime.tv_sec) + static_cast<double>(ru.ru_utime.tv_usec) * 1e-6 + @@ -118,15 +124,19 @@ double ProcessCPUUsage() { &user_time)) return MakeTime(kernel_time, user_time); DiagnoseAndExit("GetProccessTimes() failed"); +#elif defined(BENCHMARK_OS_QURT) + return static_cast<double>( + qurt_timer_timetick_to_us(qurt_timer_get_ticks())) * + 1.0e-6; #elif defined(BENCHMARK_OS_EMSCRIPTEN) // clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) returns 0 on Emscripten. // Use Emscripten-specific API. Reported CPU time would be exactly the // same as total time, but this is ok because there aren't long-latency - // syncronous system calls in Emscripten. + // synchronous system calls in Emscripten. return emscripten_get_now() * 1e-3; #elif defined(CLOCK_PROCESS_CPUTIME_ID) && !defined(BENCHMARK_OS_MACOSX) - // FIXME We want to use clock_gettime, but its not available in MacOS 10.11. See - // https://github.com/google/benchmark/pull/292 + // FIXME We want to use clock_gettime, but its not available in MacOS 10.11. + // See https://github.com/google/benchmark/pull/292 struct timespec spec; if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &spec) == 0) return MakeTime(spec); @@ -148,14 +158,19 @@ double ThreadCPUUsage() { GetThreadTimes(this_thread, &creation_time, &exit_time, &kernel_time, &user_time); return MakeTime(kernel_time, user_time); +#elif defined(BENCHMARK_OS_QURT) + return static_cast<double>( + qurt_timer_timetick_to_us(qurt_timer_get_ticks())) * + 1.0e-6; #elif defined(BENCHMARK_OS_MACOSX) - // FIXME We want to use clock_gettime, but its not available in MacOS 10.11. See - // https://github.com/google/benchmark/pull/292 + // FIXME We want to use clock_gettime, but its not available in MacOS 10.11. + // See https://github.com/google/benchmark/pull/292 mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT; thread_basic_info_data_t info; mach_port_t thread = pthread_mach_thread_np(pthread_self()); - if (thread_info(thread, THREAD_BASIC_INFO, (thread_info_t)&info, &count) == - KERN_SUCCESS) { + if (thread_info(thread, THREAD_BASIC_INFO, + reinterpret_cast<thread_info_t>(&info), + &count) == KERN_SUCCESS) { return MakeTime(info); } DiagnoseAndExit("ThreadCPUUsage() failed when evaluating thread_info"); @@ -190,15 +205,26 @@ std::string LocalDateTimeString() { std::size_t timestamp_len; long int offset_minutes; char tz_offset_sign = '+'; - // Long enough buffers to avoid format-overflow warnings - char tz_offset[128]; + // tz_offset is set in one of three ways: + // * strftime with %z - This either returns empty or the ISO 8601 time. The + // maximum length an + // ISO 8601 string can be is 7 (e.g. -03:30, plus trailing zero). + // * snprintf with %c%02li:%02li - The maximum length is 41 (one for %c, up to + // 19 for %02li, + // one for :, up to 19 %02li, plus trailing zero). + // * A fixed string of "-00:00". The maximum length is 7 (-00:00, plus + // trailing zero). + // + // Thus, the maximum size this needs to be is 41. + char tz_offset[41]; + // Long enough buffer to avoid format-overflow warnings char storage[128]; #if defined(BENCHMARK_OS_WINDOWS) - std::tm *timeinfo_p = ::localtime(&now); + std::tm* timeinfo_p = ::localtime(&now); #else std::tm timeinfo; - std::tm *timeinfo_p = &timeinfo; + std::tm* timeinfo_p = &timeinfo; ::localtime_r(&now, &timeinfo); #endif @@ -215,10 +241,11 @@ std::string LocalDateTimeString() { tz_offset_sign = '-'; } - tz_len = ::snprintf(tz_offset, sizeof(tz_offset), "%c%02li:%02li", - tz_offset_sign, offset_minutes / 100, offset_minutes % 100); - CHECK(tz_len == kTzOffsetLen); - ((void)tz_len); // Prevent unused variable warning in optimized build. + tz_len = + ::snprintf(tz_offset, sizeof(tz_offset), "%c%02li:%02li", + tz_offset_sign, offset_minutes / 100, offset_minutes % 100); + BM_CHECK(tz_len == kTzOffsetLen); + ((void)tz_len); // Prevent unused variable warning in optimized build. } else { // Unknown offset. RFC3339 specifies that unknown local offsets should be // written as UTC time with -00:00 timezone. @@ -232,9 +259,9 @@ std::string LocalDateTimeString() { strncpy(tz_offset, "-00:00", kTzOffsetLen + 1); } - timestamp_len = std::strftime(storage, sizeof(storage), "%Y-%m-%dT%H:%M:%S", - timeinfo_p); - CHECK(timestamp_len == kTimestampLen); + timestamp_len = + std::strftime(storage, sizeof(storage), "%Y-%m-%dT%H:%M:%S", timeinfo_p); + BM_CHECK(timestamp_len == kTimestampLen); // Prevent unused variable warning in optimized build. ((void)kTimestampLen); diff --git a/test/AssemblyTests.cmake b/test/AssemblyTests.cmake index 3d07858..c43c711 100644 --- a/test/AssemblyTests.cmake +++ b/test/AssemblyTests.cmake @@ -1,3 +1,23 @@ +set(CLANG_SUPPORTED_VERSION "5.0.0") +set(GCC_SUPPORTED_VERSION "5.5.0") + +if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") + if (NOT CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL ${CLANG_SUPPORTED_VERSION}) + message (WARNING + "Unsupported Clang version " ${CMAKE_CXX_COMPILER_VERSION} + ". Expected is " ${CLANG_SUPPORTED_VERSION} + ". Assembly tests may be broken.") + endif() +elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU") + if (NOT CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL ${GCC_SUPPORTED_VERSION}) + message (WARNING + "Unsupported GCC version " ${CMAKE_CXX_COMPILER_VERSION} + ". Expected is " ${GCC_SUPPORTED_VERSION} + ". Assembly tests may be broken.") + endif() +else() + message (WARNING "Unsupported compiler. Assembly tests may be broken.") +endif() include(split_list) @@ -23,6 +43,7 @@ string(TOUPPER "${CMAKE_CXX_COMPILER_ID}" ASM_TEST_COMPILER) macro(add_filecheck_test name) cmake_parse_arguments(ARG "" "" "CHECK_PREFIXES" ${ARGV}) add_library(${name} OBJECT ${name}.cc) + target_link_libraries(${name} PRIVATE benchmark::benchmark) set_target_properties(${name} PROPERTIES COMPILE_FLAGS "-S ${ASM_TEST_FLAGS}") set(ASM_OUTPUT_FILE "${CMAKE_CURRENT_BINARY_DIR}/${name}.s") add_custom_target(copy_${name} ALL @@ -1,8 +1,18 @@ +load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test") + +platform( + name = "windows", + constraint_values = [ + "@platforms//os:windows", + ], +) + TEST_COPTS = [ "-pedantic", "-pedantic-errors", "-std=c++11", "-Wall", + "-Wconversion", "-Wextra", "-Wshadow", # "-Wshorten-64-to-32", @@ -10,64 +20,108 @@ TEST_COPTS = [ "-fstrict-aliasing", ] -PER_SRC_COPTS = ({ - "cxx03_test.cc": ["-std=c++03"], - # Some of the issues with DoNotOptimize only occur when optimization is enabled +# Some of the issues with DoNotOptimize only occur when optimization is enabled +PER_SRC_COPTS = { "donotoptimize_test.cc": ["-O3"], -}) +} -TEST_ARGS = ["--benchmark_min_time=0.01"] +TEST_ARGS = ["--benchmark_min_time=0.01s"] -PER_SRC_TEST_ARGS = ({ +PER_SRC_TEST_ARGS = { "user_counters_tabular_test.cc": ["--benchmark_counters_tabular=true"], -}) - -load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test") + "repetitions_test.cc": [" --benchmark_repetitions=3"], + "spec_arg_test.cc": ["--benchmark_filter=BM_NotChosen"], + "spec_arg_verbosity_test.cc": ["--v=42"], +} cc_library( name = "output_test_helper", testonly = 1, srcs = ["output_test_helper.cc"], hdrs = ["output_test.h"], - copts = TEST_COPTS, + copts = select({ + "//:windows": [], + "//conditions:default": TEST_COPTS, + }), deps = [ "//:benchmark", "//:benchmark_internal_headers", ], ) +# Tests that use gtest. These rely on `gtest_main`. +[ + cc_test( + name = test_src[:-len(".cc")], + size = "small", + srcs = [test_src], + copts = select({ + "//:windows": [], + "//conditions:default": TEST_COPTS, + }) + PER_SRC_COPTS.get(test_src, []), + deps = [ + "//:benchmark", + "//:benchmark_internal_headers", + "@com_google_googletest//:gtest", + "@com_google_googletest//:gtest_main", + ], + ) + for test_src in glob(["*_gtest.cc"]) +] + +# Tests that do not use gtest. These have their own `main` defined. [ cc_test( name = test_src[:-len(".cc")], size = "small", srcs = [test_src], args = TEST_ARGS + PER_SRC_TEST_ARGS.get(test_src, []), - copts = TEST_COPTS + PER_SRC_COPTS.get(test_src, []), + copts = select({ + "//:windows": [], + "//conditions:default": TEST_COPTS, + }) + PER_SRC_COPTS.get(test_src, []), deps = [ ":output_test_helper", "//:benchmark", "//:benchmark_internal_headers", - "@com_google_googletest//:gtest", - ] + ( - ["@com_google_googletest//:gtest_main"] if (test_src[-len("gtest.cc"):] == "gtest.cc") else [] - ), + ], # FIXME: Add support for assembly tests to bazel. # See Issue #556 # https://github.com/google/benchmark/issues/556 ) for test_src in glob( - ["*test.cc"], + ["*_test.cc"], exclude = [ "*_assembly_test.cc", + "cxx03_test.cc", "link_main_test.cc", ], ) ] cc_test( + name = "cxx03_test", + size = "small", + srcs = ["cxx03_test.cc"], + copts = TEST_COPTS + ["-std=c++03"], + target_compatible_with = select({ + "//:windows": ["@platforms//:incompatible"], + "//conditions:default": [], + }), + deps = [ + ":output_test_helper", + "//:benchmark", + "//:benchmark_internal_headers", + ], +) + +cc_test( name = "link_main_test", size = "small", srcs = ["link_main_test.cc"], - copts = TEST_COPTS, + copts = select({ + "//:windows": [], + "//conditions:default": TEST_COPTS, + }), deps = ["//:benchmark_main"], ) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c1a3a3f..fd88131 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,5 +1,7 @@ # Enable the tests +set(THREADS_PREFER_PTHREAD_FLAG ON) + find_package(Threads REQUIRED) include(CheckCXXCompilerFlag) @@ -22,6 +24,10 @@ if( NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG" ) endforeach() endif() +if (NOT BUILD_SHARED_LIBS) + add_definitions(-DBENCHMARK_STATIC_DEFINE) +endif() + check_cxx_compiler_flag(-O3 BENCHMARK_HAS_O3_FLAG) set(BENCHMARK_O3_FLAG "") if (BENCHMARK_HAS_O3_FLAG) @@ -35,10 +41,14 @@ if (DEFINED BENCHMARK_CXX_LINKER_FLAGS) endif() add_library(output_test_helper STATIC output_test_helper.cc output_test.h) +target_link_libraries(output_test_helper PRIVATE benchmark::benchmark) macro(compile_benchmark_test name) add_executable(${name} "${name}.cc") target_link_libraries(${name} benchmark::benchmark ${CMAKE_THREAD_LIBS_INIT}) + if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "NVHPC") + target_compile_options( ${name} PRIVATE --diag_suppress partial_override ) + endif() endmacro(compile_benchmark_test) macro(compile_benchmark_test_with_main name) @@ -48,20 +58,35 @@ endmacro(compile_benchmark_test_with_main) macro(compile_output_test name) add_executable(${name} "${name}.cc" output_test.h) - target_link_libraries(${name} output_test_helper benchmark::benchmark + target_link_libraries(${name} output_test_helper benchmark::benchmark_main ${BENCHMARK_CXX_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT}) endmacro(compile_output_test) # Demonstration executable compile_benchmark_test(benchmark_test) -add_test(NAME benchmark COMMAND benchmark_test --benchmark_min_time=0.01) +add_test(NAME benchmark COMMAND benchmark_test --benchmark_min_time=0.01s) + +compile_benchmark_test(spec_arg_test) +add_test(NAME spec_arg COMMAND spec_arg_test --benchmark_filter=BM_NotChosen) + +compile_benchmark_test(spec_arg_verbosity_test) +add_test(NAME spec_arg_verbosity COMMAND spec_arg_verbosity_test --v=42) + +compile_benchmark_test(benchmark_setup_teardown_test) +add_test(NAME benchmark_setup_teardown COMMAND benchmark_setup_teardown_test) compile_benchmark_test(filter_test) macro(add_filter_test name filter expect) - add_test(NAME ${name} COMMAND filter_test --benchmark_min_time=0.01 --benchmark_filter=${filter} ${expect}) + add_test(NAME ${name} COMMAND filter_test --benchmark_min_time=0.01s --benchmark_filter=${filter} ${expect}) add_test(NAME ${name}_list_only COMMAND filter_test --benchmark_list_tests --benchmark_filter=${filter} ${expect}) endmacro(add_filter_test) +compile_benchmark_test(benchmark_min_time_flag_time_test) +add_test(NAME min_time_flag_time COMMAND benchmark_min_time_flag_time_test) + +compile_benchmark_test(benchmark_min_time_flag_iters_test) +add_test(NAME min_time_flag_iters COMMAND benchmark_min_time_flag_iters_test) + add_filter_test(filter_simple "Foo" 3) add_filter_test(filter_simple_negative "-Foo" 2) add_filter_test(filter_suffix "BM_.*" 4) @@ -82,72 +107,83 @@ add_filter_test(filter_regex_end ".*Ba$" 1) add_filter_test(filter_regex_end_negative "-.*Ba$" 4) compile_benchmark_test(options_test) -add_test(NAME options_benchmarks COMMAND options_test --benchmark_min_time=0.01) +add_test(NAME options_benchmarks COMMAND options_test --benchmark_min_time=0.01s) compile_benchmark_test(basic_test) -add_test(NAME basic_benchmark COMMAND basic_test --benchmark_min_time=0.01) +add_test(NAME basic_benchmark COMMAND basic_test --benchmark_min_time=0.01s) + +compile_output_test(repetitions_test) +add_test(NAME repetitions_benchmark COMMAND repetitions_test --benchmark_min_time=0.01s --benchmark_repetitions=3) compile_benchmark_test(diagnostics_test) -add_test(NAME diagnostics_test COMMAND diagnostics_test --benchmark_min_time=0.01) +add_test(NAME diagnostics_test COMMAND diagnostics_test --benchmark_min_time=0.01s) compile_benchmark_test(skip_with_error_test) -add_test(NAME skip_with_error_test COMMAND skip_with_error_test --benchmark_min_time=0.01) +add_test(NAME skip_with_error_test COMMAND skip_with_error_test --benchmark_min_time=0.01s) compile_benchmark_test(donotoptimize_test) +# Enable errors for deprecated deprecations (DoNotOptimize(Tp const& value)). +check_cxx_compiler_flag(-Werror=deprecated-declarations BENCHMARK_HAS_DEPRECATED_DECLARATIONS_FLAG) +if (BENCHMARK_HAS_DEPRECATED_DECLARATIONS_FLAG) + target_compile_options (donotoptimize_test PRIVATE "-Werror=deprecated-declarations") +endif() # Some of the issues with DoNotOptimize only occur when optimization is enabled check_cxx_compiler_flag(-O3 BENCHMARK_HAS_O3_FLAG) if (BENCHMARK_HAS_O3_FLAG) set_target_properties(donotoptimize_test PROPERTIES COMPILE_FLAGS "-O3") endif() -add_test(NAME donotoptimize_test COMMAND donotoptimize_test --benchmark_min_time=0.01) +add_test(NAME donotoptimize_test COMMAND donotoptimize_test --benchmark_min_time=0.01s) compile_benchmark_test(fixture_test) -add_test(NAME fixture_test COMMAND fixture_test --benchmark_min_time=0.01) +add_test(NAME fixture_test COMMAND fixture_test --benchmark_min_time=0.01s) compile_benchmark_test(register_benchmark_test) -add_test(NAME register_benchmark_test COMMAND register_benchmark_test --benchmark_min_time=0.01) +add_test(NAME register_benchmark_test COMMAND register_benchmark_test --benchmark_min_time=0.01s) compile_benchmark_test(map_test) -add_test(NAME map_test COMMAND map_test --benchmark_min_time=0.01) +add_test(NAME map_test COMMAND map_test --benchmark_min_time=0.01s) compile_benchmark_test(multiple_ranges_test) -add_test(NAME multiple_ranges_test COMMAND multiple_ranges_test --benchmark_min_time=0.01) +add_test(NAME multiple_ranges_test COMMAND multiple_ranges_test --benchmark_min_time=0.01s) compile_benchmark_test(args_product_test) -add_test(NAME args_product_test COMMAND args_product_test --benchmark_min_time=0.01) +add_test(NAME args_product_test COMMAND args_product_test --benchmark_min_time=0.01s) compile_benchmark_test_with_main(link_main_test) -add_test(NAME link_main_test COMMAND link_main_test --benchmark_min_time=0.01) +add_test(NAME link_main_test COMMAND link_main_test --benchmark_min_time=0.01s) compile_output_test(reporter_output_test) -add_test(NAME reporter_output_test COMMAND reporter_output_test --benchmark_min_time=0.01) +add_test(NAME reporter_output_test COMMAND reporter_output_test --benchmark_min_time=0.01s) compile_output_test(templated_fixture_test) -add_test(NAME templated_fixture_test COMMAND templated_fixture_test --benchmark_min_time=0.01) +add_test(NAME templated_fixture_test COMMAND templated_fixture_test --benchmark_min_time=0.01s) compile_output_test(user_counters_test) -add_test(NAME user_counters_test COMMAND user_counters_test --benchmark_min_time=0.01) +add_test(NAME user_counters_test COMMAND user_counters_test --benchmark_min_time=0.01s) + +compile_output_test(perf_counters_test) +add_test(NAME perf_counters_test COMMAND perf_counters_test --benchmark_min_time=0.01s --benchmark_perf_counters=CYCLES,BRANCHES) compile_output_test(internal_threading_test) -add_test(NAME internal_threading_test COMMAND internal_threading_test --benchmark_min_time=0.01) +add_test(NAME internal_threading_test COMMAND internal_threading_test --benchmark_min_time=0.01s) compile_output_test(report_aggregates_only_test) -add_test(NAME report_aggregates_only_test COMMAND report_aggregates_only_test --benchmark_min_time=0.01) +add_test(NAME report_aggregates_only_test COMMAND report_aggregates_only_test --benchmark_min_time=0.01s) compile_output_test(display_aggregates_only_test) -add_test(NAME display_aggregates_only_test COMMAND display_aggregates_only_test --benchmark_min_time=0.01) +add_test(NAME display_aggregates_only_test COMMAND display_aggregates_only_test --benchmark_min_time=0.01s) compile_output_test(user_counters_tabular_test) -add_test(NAME user_counters_tabular_test COMMAND user_counters_tabular_test --benchmark_counters_tabular=true --benchmark_min_time=0.01) +add_test(NAME user_counters_tabular_test COMMAND user_counters_tabular_test --benchmark_counters_tabular=true --benchmark_min_time=0.01s) compile_output_test(user_counters_thousands_test) -add_test(NAME user_counters_thousands_test COMMAND user_counters_thousands_test --benchmark_min_time=0.01) +add_test(NAME user_counters_thousands_test COMMAND user_counters_thousands_test --benchmark_min_time=0.01s) compile_output_test(memory_manager_test) -add_test(NAME memory_manager_test COMMAND memory_manager_test --benchmark_min_time=0.01) +add_test(NAME memory_manager_test COMMAND memory_manager_test --benchmark_min_time=0.01s) -check_cxx_compiler_flag(-std=c++03 BENCHMARK_HAS_CXX03_FLAG) -if (BENCHMARK_HAS_CXX03_FLAG) +# MSVC does not allow to set the language standard to C++98/03. +if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") compile_benchmark_test(cxx03_test) set_target_properties(cxx03_test PROPERTIES @@ -158,19 +194,25 @@ if (BENCHMARK_HAS_CXX03_FLAG) # causing the test to fail to compile. To prevent this we explicitly disable # the warning. check_cxx_compiler_flag(-Wno-odr BENCHMARK_HAS_WNO_ODR) - if (BENCHMARK_ENABLE_LTO AND BENCHMARK_HAS_WNO_ODR) - set_target_properties(cxx03_test - PROPERTIES - LINK_FLAGS "-Wno-odr") + check_cxx_compiler_flag(-Wno-lto-type-mismatch BENCHMARK_HAS_WNO_LTO_TYPE_MISMATCH) + # Cannot set_target_properties multiple times here because the warnings will + # be overwritten on each call + set (DISABLE_LTO_WARNINGS "") + if (BENCHMARK_HAS_WNO_ODR) + set(DISABLE_LTO_WARNINGS "${DISABLE_LTO_WARNINGS} -Wno-odr") + endif() + if (BENCHMARK_HAS_WNO_LTO_TYPE_MISMATCH) + set(DISABLE_LTO_WARNINGS "${DISABLE_LTO_WARNINGS} -Wno-lto-type-mismatch") endif() - add_test(NAME cxx03 COMMAND cxx03_test --benchmark_min_time=0.01) + set_target_properties(cxx03_test PROPERTIES LINK_FLAGS "${DISABLE_LTO_WARNINGS}") + add_test(NAME cxx03 COMMAND cxx03_test --benchmark_min_time=0.01s) endif() # Attempt to work around flaky test failures when running on Appveyor servers. if (DEFINED ENV{APPVEYOR}) - set(COMPLEXITY_MIN_TIME "0.5") + set(COMPLEXITY_MIN_TIME "0.5s") else() - set(COMPLEXITY_MIN_TIME "0.01") + set(COMPLEXITY_MIN_TIME "0.01s") endif() compile_output_test(complexity_test) add_test(NAME complexity_benchmark COMMAND complexity_test --benchmark_min_time=${COMPLEXITY_MIN_TIME}) @@ -193,9 +235,13 @@ if (BENCHMARK_ENABLE_GTEST_TESTS) add_gtest(benchmark_gtest) add_gtest(benchmark_name_gtest) + add_gtest(benchmark_random_interleaving_gtest) add_gtest(commandlineflags_gtest) add_gtest(statistics_gtest) add_gtest(string_util_gtest) + add_gtest(perf_counters_gtest) + add_gtest(time_unit_gtest) + add_gtest(min_time_parse_gtest) endif(BENCHMARK_ENABLE_GTEST_TESTS) ############################################################################### diff --git a/test/args_product_test.cc b/test/args_product_test.cc index 8a859f8..63b8b71 100644 --- a/test/args_product_test.cc +++ b/test/args_product_test.cc @@ -1,10 +1,10 @@ -#include "benchmark/benchmark.h" - #include <cassert> #include <iostream> #include <set> #include <vector> +#include "benchmark/benchmark.h" + class ArgsProductFixture : public ::benchmark::Fixture { public: ArgsProductFixture() @@ -23,7 +23,7 @@ class ArgsProductFixture : public ::benchmark::Fixture { {2, 15, 10, 9}, {4, 5, 6, 11}}) {} - void SetUp(const ::benchmark::State& state) { + void SetUp(const ::benchmark::State& state) override { std::vector<int64_t> ranges = {state.range(0), state.range(1), state.range(2), state.range(3)}; @@ -34,10 +34,10 @@ class ArgsProductFixture : public ::benchmark::Fixture { // NOTE: This is not TearDown as we want to check after _all_ runs are // complete. - virtual ~ArgsProductFixture() { + ~ArgsProductFixture() override { if (actualValues != expectedValues) { std::cout << "EXPECTED\n"; - for (auto v : expectedValues) { + for (const auto& v : expectedValues) { std::cout << "{"; for (int64_t iv : v) { std::cout << iv << ", "; @@ -45,7 +45,7 @@ class ArgsProductFixture : public ::benchmark::Fixture { std::cout << "}\n"; } std::cout << "ACTUAL\n"; - for (auto v : actualValues) { + for (const auto& v : actualValues) { std::cout << "{"; for (int64_t iv : v) { std::cout << iv << ", "; diff --git a/test/basic_test.cc b/test/basic_test.cc index 5f3dd1a..cba1b0f 100644 --- a/test/basic_test.cc +++ b/test/basic_test.cc @@ -5,7 +5,8 @@ void BM_empty(benchmark::State& state) { for (auto _ : state) { - benchmark::DoNotOptimize(state.iterations()); + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } } BENCHMARK(BM_empty); @@ -13,7 +14,7 @@ BENCHMARK(BM_empty)->ThreadPerCpu(); void BM_spin_empty(benchmark::State& state) { for (auto _ : state) { - for (int x = 0; x < state.range(0); ++x) { + for (auto x = 0; x < state.range(0); ++x) { benchmark::DoNotOptimize(x); } } @@ -22,11 +23,11 @@ BASIC_BENCHMARK_TEST(BM_spin_empty); BASIC_BENCHMARK_TEST(BM_spin_empty)->ThreadPerCpu(); void BM_spin_pause_before(benchmark::State& state) { - for (int i = 0; i < state.range(0); ++i) { + for (auto i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(i); } for (auto _ : state) { - for (int i = 0; i < state.range(0); ++i) { + for (auto i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(i); } } @@ -37,11 +38,11 @@ BASIC_BENCHMARK_TEST(BM_spin_pause_before)->ThreadPerCpu(); void BM_spin_pause_during(benchmark::State& state) { for (auto _ : state) { state.PauseTiming(); - for (int i = 0; i < state.range(0); ++i) { + for (auto i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(i); } state.ResumeTiming(); - for (int i = 0; i < state.range(0); ++i) { + for (auto i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(i); } } @@ -62,11 +63,11 @@ BENCHMARK(BM_pause_during)->UseRealTime()->ThreadPerCpu(); void BM_spin_pause_after(benchmark::State& state) { for (auto _ : state) { - for (int i = 0; i < state.range(0); ++i) { + for (auto i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(i); } } - for (int i = 0; i < state.range(0); ++i) { + for (auto i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(i); } } @@ -74,15 +75,15 @@ BASIC_BENCHMARK_TEST(BM_spin_pause_after); BASIC_BENCHMARK_TEST(BM_spin_pause_after)->ThreadPerCpu(); void BM_spin_pause_before_and_after(benchmark::State& state) { - for (int i = 0; i < state.range(0); ++i) { + for (auto i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(i); } for (auto _ : state) { - for (int i = 0; i < state.range(0); ++i) { + for (auto i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(i); } } - for (int i = 0; i < state.range(0); ++i) { + for (auto i = 0; i < state.range(0); ++i) { benchmark::DoNotOptimize(i); } } @@ -96,7 +97,6 @@ void BM_empty_stop_start(benchmark::State& state) { BENCHMARK(BM_empty_stop_start); BENCHMARK(BM_empty_stop_start)->ThreadPerCpu(); - void BM_KeepRunning(benchmark::State& state) { benchmark::IterationCount iter_count = 0; assert(iter_count == state.iterations()); @@ -108,15 +108,30 @@ void BM_KeepRunning(benchmark::State& state) { BENCHMARK(BM_KeepRunning); void BM_KeepRunningBatch(benchmark::State& state) { - // Choose a prime batch size to avoid evenly dividing max_iterations. - const benchmark::IterationCount batch_size = 101; + // Choose a batch size >1000 to skip the typical runs with iteration + // targets of 10, 100 and 1000. If these are not actually skipped the + // bug would be detectable as consecutive runs with the same iteration + // count. Below we assert that this does not happen. + const benchmark::IterationCount batch_size = 1009; + + static benchmark::IterationCount prior_iter_count = 0; benchmark::IterationCount iter_count = 0; while (state.KeepRunningBatch(batch_size)) { iter_count += batch_size; } assert(state.iterations() == iter_count); + + // Verify that the iteration count always increases across runs (see + // comment above). + assert(iter_count == batch_size // max_iterations == 1 + || iter_count > prior_iter_count); // max_iterations > batch_size + prior_iter_count = iter_count; } -BENCHMARK(BM_KeepRunningBatch); +// Register with a fixed repetition count to establish the invariant that +// the iteration count should always change across runs. This overrides +// the --benchmark_repetitions command line flag, which would otherwise +// cause this test to fail if set > 1. +BENCHMARK(BM_KeepRunningBatch)->Repetitions(1); void BM_RangedFor(benchmark::State& state) { benchmark::IterationCount iter_count = 0; @@ -127,10 +142,39 @@ void BM_RangedFor(benchmark::State& state) { } BENCHMARK(BM_RangedFor); +#ifdef BENCHMARK_HAS_CXX11 +template <typename T> +void BM_OneTemplateFunc(benchmark::State& state) { + auto arg = state.range(0); + T sum = 0; + for (auto _ : state) { + sum += static_cast<T>(arg); + } +} +BENCHMARK(BM_OneTemplateFunc<int>)->Arg(1); +BENCHMARK(BM_OneTemplateFunc<double>)->Arg(1); + +template <typename A, typename B> +void BM_TwoTemplateFunc(benchmark::State& state) { + auto arg = state.range(0); + A sum = 0; + B prod = 1; + for (auto _ : state) { + sum += static_cast<A>(arg); + prod *= static_cast<B>(arg); + } +} +BENCHMARK(BM_TwoTemplateFunc<int, double>)->Arg(1); +BENCHMARK(BM_TwoTemplateFunc<double, int>)->Arg(1); + +#endif // BENCHMARK_HAS_CXX11 + // Ensure that StateIterator provides all the necessary typedefs required to // instantiate std::iterator_traits. -static_assert(std::is_same< - typename std::iterator_traits<benchmark::State::StateIterator>::value_type, - typename benchmark::State::StateIterator::value_type>::value, ""); +static_assert( + std::is_same<typename std::iterator_traits< + benchmark::State::StateIterator>::value_type, + typename benchmark::State::StateIterator::value_type>::value, + ""); BENCHMARK_MAIN(); diff --git a/test/benchmark_gtest.cc b/test/benchmark_gtest.cc index 6dbf7a5..2c9e555 100644 --- a/test/benchmark_gtest.cc +++ b/test/benchmark_gtest.cc @@ -1,11 +1,15 @@ +#include <map> +#include <string> #include <vector> #include "../src/benchmark_register.h" +#include "benchmark/benchmark.h" #include "gmock/gmock.h" #include "gtest/gtest.h" namespace benchmark { namespace internal { + namespace { TEST(AddRangeTest, Simple) { @@ -34,8 +38,9 @@ TEST(AddRangeTest, Advanced64) { TEST(AddRangeTest, FullRange8) { std::vector<int8_t> dst; - AddRange(&dst, int8_t{1}, std::numeric_limits<int8_t>::max(), 8); - EXPECT_THAT(dst, testing::ElementsAre(1, 8, 64, 127)); + AddRange(&dst, int8_t{1}, std::numeric_limits<int8_t>::max(), int8_t{8}); + EXPECT_THAT( + dst, testing::ElementsAre(int8_t{1}, int8_t{8}, int8_t{64}, int8_t{127})); } TEST(AddRangeTest, FullRange64) { @@ -125,8 +130,38 @@ TEST(AddRangeTest, FullNegativeRange64) { TEST(AddRangeTest, Simple8) { std::vector<int8_t> dst; - AddRange<int8_t>(&dst, 1, 8, 2); - EXPECT_THAT(dst, testing::ElementsAre(1, 2, 4, 8)); + AddRange<int8_t>(&dst, int8_t{1}, int8_t{8}, int8_t{2}); + EXPECT_THAT(dst, + testing::ElementsAre(int8_t{1}, int8_t{2}, int8_t{4}, int8_t{8})); +} + +TEST(AddCustomContext, Simple) { + std::map<std::string, std::string> *&global_context = GetGlobalContext(); + EXPECT_THAT(global_context, nullptr); + + AddCustomContext("foo", "bar"); + AddCustomContext("baz", "qux"); + + EXPECT_THAT(*global_context, + testing::UnorderedElementsAre(testing::Pair("foo", "bar"), + testing::Pair("baz", "qux"))); + + delete global_context; + global_context = nullptr; +} + +TEST(AddCustomContext, DuplicateKey) { + std::map<std::string, std::string> *&global_context = GetGlobalContext(); + EXPECT_THAT(global_context, nullptr); + + AddCustomContext("foo", "bar"); + AddCustomContext("foo", "qux"); + + EXPECT_THAT(*global_context, + testing::UnorderedElementsAre(testing::Pair("foo", "bar"))); + + delete global_context; + global_context = nullptr; } } // namespace diff --git a/test/benchmark_min_time_flag_iters_test.cc b/test/benchmark_min_time_flag_iters_test.cc new file mode 100644 index 0000000..3de93a7 --- /dev/null +++ b/test/benchmark_min_time_flag_iters_test.cc @@ -0,0 +1,66 @@ +#include <cassert> +#include <cstdlib> +#include <cstring> +#include <iostream> +#include <string> +#include <vector> + +#include "benchmark/benchmark.h" + +// Tests that we can specify the number of iterations with +// --benchmark_min_time=<NUM>x. +namespace { + +class TestReporter : public benchmark::ConsoleReporter { + public: + virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE { + return ConsoleReporter::ReportContext(context); + }; + + virtual void ReportRuns(const std::vector<Run>& report) BENCHMARK_OVERRIDE { + assert(report.size() == 1); + iter_nums_.push_back(report[0].iterations); + ConsoleReporter::ReportRuns(report); + }; + + TestReporter() {} + + virtual ~TestReporter() {} + + const std::vector<benchmark::IterationCount>& GetIters() const { + return iter_nums_; + } + + private: + std::vector<benchmark::IterationCount> iter_nums_; +}; + +} // end namespace + +static void BM_MyBench(benchmark::State& state) { + for (auto s : state) { + } +} +BENCHMARK(BM_MyBench); + +int main(int argc, char** argv) { + // Make a fake argv and append the new --benchmark_min_time=<foo> to it. + int fake_argc = argc + 1; + const char** fake_argv = new const char*[static_cast<size_t>(fake_argc)]; + for (int i = 0; i < argc; ++i) fake_argv[i] = argv[i]; + fake_argv[argc] = "--benchmark_min_time=4x"; + + benchmark::Initialize(&fake_argc, const_cast<char**>(fake_argv)); + + TestReporter test_reporter; + const size_t returned_count = + benchmark::RunSpecifiedBenchmarks(&test_reporter, "BM_MyBench"); + assert(returned_count == 1); + + // Check the executed iters. + const std::vector<benchmark::IterationCount> iters = test_reporter.GetIters(); + assert(!iters.empty() && iters[0] == 4); + + delete[] fake_argv; + return 0; +} diff --git a/test/benchmark_min_time_flag_time_test.cc b/test/benchmark_min_time_flag_time_test.cc new file mode 100644 index 0000000..04a82eb --- /dev/null +++ b/test/benchmark_min_time_flag_time_test.cc @@ -0,0 +1,90 @@ +#include <cassert> +#include <climits> +#include <cmath> +#include <cstdlib> +#include <cstring> +#include <iostream> +#include <string> +#include <vector> + +#include "benchmark/benchmark.h" + +// Tests that we can specify the min time with +// --benchmark_min_time=<NUM> (no suffix needed) OR +// --benchmark_min_time=<NUM>s +namespace { + +// This is from benchmark.h +typedef int64_t IterationCount; + +class TestReporter : public benchmark::ConsoleReporter { + public: + virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE { + return ConsoleReporter::ReportContext(context); + }; + + virtual void ReportRuns(const std::vector<Run>& report) BENCHMARK_OVERRIDE { + assert(report.size() == 1); + ConsoleReporter::ReportRuns(report); + }; + + virtual void ReportRunsConfig(double min_time, bool /* has_explicit_iters */, + IterationCount /* iters */) BENCHMARK_OVERRIDE { + min_times_.push_back(min_time); + } + + TestReporter() {} + + virtual ~TestReporter() {} + + const std::vector<double>& GetMinTimes() const { return min_times_; } + + private: + std::vector<double> min_times_; +}; + +bool AlmostEqual(double a, double b) { + return std::fabs(a - b) < std::numeric_limits<double>::epsilon(); +} + +void DoTestHelper(int* argc, const char** argv, double expected) { + benchmark::Initialize(argc, const_cast<char**>(argv)); + + TestReporter test_reporter; + const size_t returned_count = + benchmark::RunSpecifiedBenchmarks(&test_reporter, "BM_MyBench"); + assert(returned_count == 1); + + // Check the min_time + const std::vector<double>& min_times = test_reporter.GetMinTimes(); + assert(!min_times.empty() && AlmostEqual(min_times[0], expected)); +} + +} // end namespace + +static void BM_MyBench(benchmark::State& state) { + for (auto s : state) { + } +} +BENCHMARK(BM_MyBench); + +int main(int argc, char** argv) { + // Make a fake argv and append the new --benchmark_min_time=<foo> to it. + int fake_argc = argc + 1; + const char** fake_argv = new const char*[static_cast<size_t>(fake_argc)]; + + for (int i = 0; i < argc; ++i) fake_argv[i] = argv[i]; + + const char* no_suffix = "--benchmark_min_time=4"; + const char* with_suffix = "--benchmark_min_time=4.0s"; + double expected = 4.0; + + fake_argv[argc] = no_suffix; + DoTestHelper(&fake_argc, fake_argv, expected); + + fake_argv[argc] = with_suffix; + DoTestHelper(&fake_argc, fake_argv, expected); + + delete[] fake_argv; + return 0; +} diff --git a/test/benchmark_name_gtest.cc b/test/benchmark_name_gtest.cc index afb401c..0a6746d 100644 --- a/test/benchmark_name_gtest.cc +++ b/test/benchmark_name_gtest.cc @@ -32,6 +32,14 @@ TEST(BenchmarkNameTest, MinTime) { EXPECT_EQ(name.str(), "function_name/some_args:3/4/min_time:3.4s"); } +TEST(BenchmarkNameTest, MinWarmUpTime) { + auto name = BenchmarkName(); + name.function_name = "function_name"; + name.args = "some_args:3/4"; + name.min_warmup_time = "min_warmup_time:3.5s"; + EXPECT_EQ(name.str(), "function_name/some_args:3/4/min_warmup_time:3.5s"); +} + TEST(BenchmarkNameTest, Iterations) { auto name = BenchmarkName(); name.function_name = "function_name"; diff --git a/test/benchmark_random_interleaving_gtest.cc b/test/benchmark_random_interleaving_gtest.cc new file mode 100644 index 0000000..7f20867 --- /dev/null +++ b/test/benchmark_random_interleaving_gtest.cc @@ -0,0 +1,126 @@ +#include <queue> +#include <string> +#include <vector> + +#include "../src/commandlineflags.h" +#include "../src/string_util.h" +#include "benchmark/benchmark.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace benchmark { + +BM_DECLARE_bool(benchmark_enable_random_interleaving); +BM_DECLARE_string(benchmark_filter); +BM_DECLARE_int32(benchmark_repetitions); + +namespace internal { +namespace { + +class EventQueue : public std::queue<std::string> { + public: + void Put(const std::string& event) { push(event); } + + void Clear() { + while (!empty()) { + pop(); + } + } + + std::string Get() { + std::string event = front(); + pop(); + return event; + } +}; + +EventQueue* queue = new EventQueue(); + +class NullReporter : public BenchmarkReporter { + public: + bool ReportContext(const Context& /*context*/) override { return true; } + void ReportRuns(const std::vector<Run>& /* report */) override {} +}; + +class BenchmarkTest : public testing::Test { + public: + static void SetupHook(int /* num_threads */) { queue->push("Setup"); } + + static void TeardownHook(int /* num_threads */) { queue->push("Teardown"); } + + void Execute(const std::string& pattern) { + queue->Clear(); + + std::unique_ptr<BenchmarkReporter> reporter(new NullReporter()); + FLAGS_benchmark_filter = pattern; + RunSpecifiedBenchmarks(reporter.get()); + + queue->Put("DONE"); // End marker + } +}; + +void BM_Match1(benchmark::State& state) { + const int64_t arg = state.range(0); + + for (auto _ : state) { + } + queue->Put(StrFormat("BM_Match1/%d", static_cast<int>(arg))); +} +BENCHMARK(BM_Match1) + ->Iterations(100) + ->Arg(1) + ->Arg(2) + ->Arg(3) + ->Range(10, 80) + ->Args({90}) + ->Args({100}); + +TEST_F(BenchmarkTest, Match1) { + Execute("BM_Match1"); + ASSERT_EQ("BM_Match1/1", queue->Get()); + ASSERT_EQ("BM_Match1/2", queue->Get()); + ASSERT_EQ("BM_Match1/3", queue->Get()); + ASSERT_EQ("BM_Match1/10", queue->Get()); + ASSERT_EQ("BM_Match1/64", queue->Get()); + ASSERT_EQ("BM_Match1/80", queue->Get()); + ASSERT_EQ("BM_Match1/90", queue->Get()); + ASSERT_EQ("BM_Match1/100", queue->Get()); + ASSERT_EQ("DONE", queue->Get()); +} + +TEST_F(BenchmarkTest, Match1WithRepetition) { + FLAGS_benchmark_repetitions = 2; + + Execute("BM_Match1/(64|80)"); + ASSERT_EQ("BM_Match1/64", queue->Get()); + ASSERT_EQ("BM_Match1/64", queue->Get()); + ASSERT_EQ("BM_Match1/80", queue->Get()); + ASSERT_EQ("BM_Match1/80", queue->Get()); + ASSERT_EQ("DONE", queue->Get()); +} + +TEST_F(BenchmarkTest, Match1WithRandomInterleaving) { + FLAGS_benchmark_enable_random_interleaving = true; + FLAGS_benchmark_repetitions = 100; + + std::map<std::string, int> element_count; + std::map<std::string, int> interleaving_count; + Execute("BM_Match1/(64|80)"); + for (int i = 0; i < 100; ++i) { + std::vector<std::string> interleaving; + interleaving.push_back(queue->Get()); + interleaving.push_back(queue->Get()); + element_count[interleaving[0]]++; + element_count[interleaving[1]]++; + interleaving_count[StrFormat("%s,%s", interleaving[0].c_str(), + interleaving[1].c_str())]++; + } + EXPECT_EQ(element_count["BM_Match1/64"], 100) << "Unexpected repetitions."; + EXPECT_EQ(element_count["BM_Match1/80"], 100) << "Unexpected repetitions."; + EXPECT_GE(interleaving_count.size(), 2) << "Interleaving was not randomized."; + ASSERT_EQ("DONE", queue->Get()); +} + +} // namespace +} // namespace internal +} // namespace benchmark diff --git a/test/benchmark_setup_teardown_test.cc b/test/benchmark_setup_teardown_test.cc new file mode 100644 index 0000000..6c3cc2e --- /dev/null +++ b/test/benchmark_setup_teardown_test.cc @@ -0,0 +1,157 @@ +#include <atomic> +#include <cassert> +#include <cstdlib> +#include <cstring> +#include <iostream> +#include <limits> +#include <string> + +#include "benchmark/benchmark.h" + +// Test that Setup() and Teardown() are called exactly once +// for each benchmark run (single-threaded). +namespace singlethreaded { +static int setup_call = 0; +static int teardown_call = 0; +} // namespace singlethreaded +static void DoSetup1(const benchmark::State& state) { + ++singlethreaded::setup_call; + + // Setup/Teardown should never be called with any thread_idx != 0. + assert(state.thread_index() == 0); +} + +static void DoTeardown1(const benchmark::State& state) { + ++singlethreaded::teardown_call; + assert(state.thread_index() == 0); +} + +static void BM_with_setup(benchmark::State& state) { + for (auto s : state) { + } +} +BENCHMARK(BM_with_setup) + ->Arg(1) + ->Arg(3) + ->Arg(5) + ->Arg(7) + ->Iterations(100) + ->Setup(DoSetup1) + ->Teardown(DoTeardown1); + +// Test that Setup() and Teardown() are called once for each group of threads. +namespace concurrent { +static std::atomic<int> setup_call(0); +static std::atomic<int> teardown_call(0); +static std::atomic<int> func_call(0); +} // namespace concurrent + +static void DoSetup2(const benchmark::State& state) { + concurrent::setup_call.fetch_add(1, std::memory_order_acquire); + assert(state.thread_index() == 0); +} + +static void DoTeardown2(const benchmark::State& state) { + concurrent::teardown_call.fetch_add(1, std::memory_order_acquire); + assert(state.thread_index() == 0); +} + +static void BM_concurrent(benchmark::State& state) { + for (auto s : state) { + } + concurrent::func_call.fetch_add(1, std::memory_order_acquire); +} + +BENCHMARK(BM_concurrent) + ->Setup(DoSetup2) + ->Teardown(DoTeardown2) + ->Iterations(100) + ->Threads(5) + ->Threads(10) + ->Threads(15); + +// Testing interaction with Fixture::Setup/Teardown +namespace fixture_interaction { +int setup = 0; +int fixture_setup = 0; +} // namespace fixture_interaction + +#define FIXTURE_BECHMARK_NAME MyFixture + +class FIXTURE_BECHMARK_NAME : public ::benchmark::Fixture { + public: + void SetUp(const ::benchmark::State&) override { + fixture_interaction::fixture_setup++; + } + + ~FIXTURE_BECHMARK_NAME() override {} +}; + +BENCHMARK_F(FIXTURE_BECHMARK_NAME, BM_WithFixture)(benchmark::State& st) { + for (auto _ : st) { + } +} + +static void DoSetupWithFixture(const benchmark::State&) { + fixture_interaction::setup++; +} + +BENCHMARK_REGISTER_F(FIXTURE_BECHMARK_NAME, BM_WithFixture) + ->Arg(1) + ->Arg(3) + ->Arg(5) + ->Arg(7) + ->Setup(DoSetupWithFixture) + ->Repetitions(1) + ->Iterations(100); + +// Testing repetitions. +namespace repetitions { +int setup = 0; +} + +static void DoSetupWithRepetitions(const benchmark::State&) { + repetitions::setup++; +} +static void BM_WithRep(benchmark::State& state) { + for (auto _ : state) { + } +} + +BENCHMARK(BM_WithRep) + ->Arg(1) + ->Arg(3) + ->Arg(5) + ->Arg(7) + ->Setup(DoSetupWithRepetitions) + ->Iterations(100) + ->Repetitions(4); + +int main(int argc, char** argv) { + benchmark::Initialize(&argc, argv); + + size_t ret = benchmark::RunSpecifiedBenchmarks("."); + assert(ret > 0); + + // Setup/Teardown is called once for each arg group (1,3,5,7). + assert(singlethreaded::setup_call == 4); + assert(singlethreaded::teardown_call == 4); + + // 3 group of threads calling this function (3,5,10). + assert(concurrent::setup_call.load(std::memory_order_relaxed) == 3); + assert(concurrent::teardown_call.load(std::memory_order_relaxed) == 3); + assert((5 + 10 + 15) == + concurrent::func_call.load(std::memory_order_relaxed)); + + // Setup is called 4 times, once for each arg group (1,3,5,7) + assert(fixture_interaction::setup == 4); + // Fixture::Setup is called every time the bm routine is run. + // The exact number is indeterministic, so we just assert that + // it's more than setup. + assert(fixture_interaction::fixture_setup > fixture_interaction::setup); + + // Setup is call once for each repetition * num_arg = 4 * 4 = 16. + assert(repetitions::setup == 16); + + return 0; +} diff --git a/test/benchmark_test.cc b/test/benchmark_test.cc index 3cd4f55..94590d5 100644 --- a/test/benchmark_test.cc +++ b/test/benchmark_test.cc @@ -5,6 +5,7 @@ #include <stdint.h> #include <chrono> +#include <complex> #include <cstdlib> #include <iostream> #include <limits> @@ -26,7 +27,7 @@ namespace { -int BENCHMARK_NOINLINE Factorial(uint32_t n) { +int BENCHMARK_NOINLINE Factorial(int n) { return (n == 1) ? 1 : n * Factorial(n - 1); } @@ -74,7 +75,8 @@ BENCHMARK_RANGE(BM_CalculatePiRange, 1, 1024 * 1024); static void BM_CalculatePi(benchmark::State& state) { static const int depth = 1024; for (auto _ : state) { - benchmark::DoNotOptimize(CalculatePi(static_cast<int>(depth))); + double pi = CalculatePi(static_cast<int>(depth)); + benchmark::DoNotOptimize(pi); } } BENCHMARK(BM_CalculatePi)->Threads(8); @@ -90,11 +92,13 @@ static void BM_SetInsert(benchmark::State& state) { for (int j = 0; j < state.range(1); ++j) data.insert(rand()); } state.SetItemsProcessed(state.iterations() * state.range(1)); - state.SetBytesProcessed(state.iterations() * state.range(1) * sizeof(int)); + state.SetBytesProcessed(state.iterations() * state.range(1) * + static_cast<int64_t>(sizeof(int))); } -// Test many inserts at once to reduce the total iterations needed. Otherwise, the slower, -// non-timed part of each iteration will make the benchmark take forever. +// Test many inserts at once to reduce the total iterations needed. Otherwise, +// the slower, non-timed part of each iteration will make the benchmark take +// forever. BENCHMARK(BM_SetInsert)->Ranges({{1 << 10, 8 << 10}, {128, 512}}); template <typename Container, @@ -107,7 +111,7 @@ static void BM_Sequential(benchmark::State& state) { } const int64_t items_processed = state.iterations() * state.range(0); state.SetItemsProcessed(items_processed); - state.SetBytesProcessed(items_processed * sizeof(v)); + state.SetBytesProcessed(items_processed * static_cast<int64_t>(sizeof(v))); } BENCHMARK_TEMPLATE2(BM_Sequential, std::vector<int>, int) ->Range(1 << 0, 1 << 10); @@ -121,12 +125,15 @@ static void BM_StringCompare(benchmark::State& state) { size_t len = static_cast<size_t>(state.range(0)); std::string s1(len, '-'); std::string s2(len, '-'); - for (auto _ : state) benchmark::DoNotOptimize(s1.compare(s2)); + for (auto _ : state) { + auto comp = s1.compare(s2); + benchmark::DoNotOptimize(comp); + } } BENCHMARK(BM_StringCompare)->Range(1, 1 << 20); static void BM_SetupTeardown(benchmark::State& state) { - if (state.thread_index == 0) { + if (state.thread_index() == 0) { // No need to lock test_vector_mu here as this is running single-threaded. test_vector = new std::vector<int>(); } @@ -139,7 +146,7 @@ static void BM_SetupTeardown(benchmark::State& state) { test_vector->pop_back(); ++i; } - if (state.thread_index == 0) { + if (state.thread_index() == 0) { delete test_vector; } } @@ -156,11 +163,11 @@ BENCHMARK(BM_LongTest)->Range(1 << 16, 1 << 28); static void BM_ParallelMemset(benchmark::State& state) { int64_t size = state.range(0) / static_cast<int64_t>(sizeof(int)); - int thread_size = static_cast<int>(size) / state.threads; - int from = thread_size * state.thread_index; + int thread_size = static_cast<int>(size) / state.threads(); + int from = thread_size * state.thread_index(); int to = from + thread_size; - if (state.thread_index == 0) { + if (state.thread_index() == 0) { test_vector = new std::vector<int>(static_cast<size_t>(size)); } @@ -168,11 +175,11 @@ static void BM_ParallelMemset(benchmark::State& state) { for (int i = from; i < to; i++) { // No need to lock test_vector_mu as ranges // do not overlap between threads. - benchmark::DoNotOptimize(test_vector->at(i) = 1); + benchmark::DoNotOptimize(test_vector->at(static_cast<size_t>(i)) = 1); } } - if (state.thread_index == 0) { + if (state.thread_index() == 0) { delete test_vector; } } @@ -214,7 +221,8 @@ BENCHMARK_CAPTURE(BM_with_args, string_and_pair_test, std::string("abc"), std::pair<int, double>(42, 3.8)); void BM_non_template_args(benchmark::State& state, int, double) { - while(state.KeepRunning()) {} + while (state.KeepRunning()) { + } } BENCHMARK_CAPTURE(BM_non_template_args, basic_test, 0, 0); @@ -223,14 +231,14 @@ BENCHMARK_CAPTURE(BM_non_template_args, basic_test, 0, 0); static void BM_DenseThreadRanges(benchmark::State& st) { switch (st.range(0)) { case 1: - assert(st.threads == 1 || st.threads == 2 || st.threads == 3); + assert(st.threads() == 1 || st.threads() == 2 || st.threads() == 3); break; case 2: - assert(st.threads == 1 || st.threads == 3 || st.threads == 4); + assert(st.threads() == 1 || st.threads() == 3 || st.threads() == 4); break; case 3: - assert(st.threads == 5 || st.threads == 8 || st.threads == 11 || - st.threads == 14); + assert(st.threads() == 5 || st.threads() == 8 || st.threads() == 11 || + st.threads() == 14); break; default: assert(false && "Invalid test case number"); @@ -242,4 +250,25 @@ BENCHMARK(BM_DenseThreadRanges)->Arg(1)->DenseThreadRange(1, 3); BENCHMARK(BM_DenseThreadRanges)->Arg(2)->DenseThreadRange(1, 4, 2); BENCHMARK(BM_DenseThreadRanges)->Arg(3)->DenseThreadRange(5, 14, 3); +static void BM_BenchmarkName(benchmark::State& state) { + for (auto _ : state) { + } + + // Check that the benchmark name is passed correctly to `state`. + assert("BM_BenchmarkName" == state.name()); +} +BENCHMARK(BM_BenchmarkName); + +// regression test for #1446 +template <typename type> +static void BM_templated_test(benchmark::State& state) { + for (auto _ : state) { + type created_string; + benchmark::DoNotOptimize(created_string); + } +} + +static auto BM_templated_test_double = BM_templated_test<std::complex<double>>; +BENCHMARK(BM_templated_test_double); + BENCHMARK_MAIN(); diff --git a/test/clobber_memory_assembly_test.cc b/test/clobber_memory_assembly_test.cc index f41911a..54e26cc 100644 --- a/test/clobber_memory_assembly_test.cc +++ b/test/clobber_memory_assembly_test.cc @@ -3,13 +3,13 @@ #ifdef __clang__ #pragma clang diagnostic ignored "-Wreturn-type" #endif +BENCHMARK_DISABLE_DEPRECATED_WARNING extern "C" { extern int ExternInt; extern int ExternInt2; extern int ExternInt3; - } // CHECK-LABEL: test_basic: diff --git a/test/commandlineflags_gtest.cc b/test/commandlineflags_gtest.cc index 656020f..8412008 100644 --- a/test/commandlineflags_gtest.cc +++ b/test/commandlineflags_gtest.cc @@ -2,6 +2,7 @@ #include "../src/commandlineflags.h" #include "../src/internal_macros.h" +#include "gmock/gmock.h" #include "gtest/gtest.h" namespace benchmark { @@ -19,9 +20,7 @@ int setenv(const char* name, const char* value, int overwrite) { return _putenv_s(name, value); } -int unsetenv(const char* name) { - return _putenv_s(name, ""); -} +int unsetenv(const char* name) { return _putenv_s(name, ""); } #endif // BENCHMARK_OS_WINDOWS @@ -197,5 +196,33 @@ TEST(StringFromEnv, Valid) { unsetenv("IN_ENV"); } +TEST(KvPairsFromEnv, Default) { + ASSERT_EQ(unsetenv("NOT_IN_ENV"), 0); + EXPECT_THAT(KvPairsFromEnv("not_in_env", {{"foo", "bar"}}), + testing::ElementsAre(testing::Pair("foo", "bar"))); +} + +TEST(KvPairsFromEnv, MalformedReturnsDefault) { + ASSERT_EQ(setenv("IN_ENV", "foo", 1), 0); + EXPECT_THAT(KvPairsFromEnv("in_env", {{"foo", "bar"}}), + testing::ElementsAre(testing::Pair("foo", "bar"))); + unsetenv("IN_ENV"); +} + +TEST(KvPairsFromEnv, Single) { + ASSERT_EQ(setenv("IN_ENV", "foo=bar", 1), 0); + EXPECT_THAT(KvPairsFromEnv("in_env", {}), + testing::ElementsAre(testing::Pair("foo", "bar"))); + unsetenv("IN_ENV"); +} + +TEST(KvPairsFromEnv, Multiple) { + ASSERT_EQ(setenv("IN_ENV", "foo=bar,baz=qux", 1), 0); + EXPECT_THAT(KvPairsFromEnv("in_env", {}), + testing::UnorderedElementsAre(testing::Pair("foo", "bar"), + testing::Pair("baz", "qux"))); + unsetenv("IN_ENV"); +} + } // namespace } // namespace benchmark diff --git a/test/complexity_test.cc b/test/complexity_test.cc index 5681fdc..76891e0 100644 --- a/test/complexity_test.cc +++ b/test/complexity_test.cc @@ -4,6 +4,7 @@ #include <cmath> #include <cstdlib> #include <vector> + #include "benchmark/benchmark.h" #include "output_test.h" @@ -12,8 +13,10 @@ namespace { #define ADD_COMPLEXITY_CASES(...) \ int CONCAT(dummy, __LINE__) = AddComplexityTest(__VA_ARGS__) -int AddComplexityTest(std::string test_name, std::string big_o_test_name, - std::string rms_test_name, std::string big_o) { +int AddComplexityTest(const std::string &test_name, + const std::string &big_o_test_name, + const std::string &rms_test_name, + const std::string &big_o, int family_index) { SetSubstitutions({{"%name", test_name}, {"%bigo_name", big_o_test_name}, {"%rms_name", rms_test_name}, @@ -25,25 +28,33 @@ int AddComplexityTest(std::string test_name, std::string big_o_test_name, {{"^%bigo_name %bigo_str %bigo_str[ ]*$"}, {"^%bigo_name", MR_Not}, // Assert we we didn't only matched a name. {"^%rms_name %rms %rms[ ]*$", MR_Next}}); - AddCases(TC_JSONOut, {{"\"name\": \"%bigo_name\",$"}, - {"\"run_name\": \"%name\",$", MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": %int,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"BigO\",$", MR_Next}, - {"\"cpu_coefficient\": %float,$", MR_Next}, - {"\"real_coefficient\": %float,$", MR_Next}, - {"\"big_o\": \"%bigo\",$", MR_Next}, - {"\"time_unit\": \"ns\"$", MR_Next}, - {"}", MR_Next}, - {"\"name\": \"%rms_name\",$"}, - {"\"run_name\": \"%name\",$", MR_Next}, - {"\"run_type\": \"aggregate\",$", MR_Next}, - {"\"repetitions\": %int,$", MR_Next}, - {"\"threads\": 1,$", MR_Next}, - {"\"aggregate_name\": \"RMS\",$", MR_Next}, - {"\"rms\": %float$", MR_Next}, - {"}", MR_Next}}); + AddCases( + TC_JSONOut, + {{"\"name\": \"%bigo_name\",$"}, + {"\"family_index\": " + std::to_string(family_index) + ",$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"%name\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": %int,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"BigO\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, + {"\"cpu_coefficient\": %float,$", MR_Next}, + {"\"real_coefficient\": %float,$", MR_Next}, + {"\"big_o\": \"%bigo\",$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}, + {"\"name\": \"%rms_name\",$"}, + {"\"family_index\": " + std::to_string(family_index) + ",$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"%name\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": %int,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"RMS\",$", MR_Next}, + {"\"aggregate_unit\": \"percentage\",$", MR_Next}, + {"\"rms\": %float$", MR_Next}, + {"}", MR_Next}}); AddCases(TC_CSVOut, {{"^\"%bigo_name\",,%float,%float,%bigo,,,,,$"}, {"^\"%bigo_name\"", MR_Not}, {"^\"%rms_name\",,%float,%float,,,,,,$", MR_Next}}); @@ -56,10 +67,10 @@ int AddComplexityTest(std::string test_name, std::string big_o_test_name, // --------------------------- Testing BigO O(1) --------------------------- // // ========================================================================= // -void BM_Complexity_O1(benchmark::State& state) { +void BM_Complexity_O1(benchmark::State &state) { for (auto _ : state) { for (int i = 0; i < 1024; ++i) { - benchmark::DoNotOptimize(&i); + benchmark::DoNotOptimize(i); } } state.SetComplexityN(state.range(0)); @@ -82,15 +93,15 @@ const char *lambda_big_o_1 = "f\\(N\\)"; // Add enum tests ADD_COMPLEXITY_CASES(one_test_name, big_o_1_test_name, rms_o_1_test_name, - enum_big_o_1); + enum_big_o_1, /*family_index=*/0); // Add auto enum tests ADD_COMPLEXITY_CASES(one_test_name, big_o_1_test_name, rms_o_1_test_name, - auto_big_o_1); + auto_big_o_1, /*family_index=*/1); // Add lambda tests ADD_COMPLEXITY_CASES(one_test_name, big_o_1_test_name, rms_o_1_test_name, - lambda_big_o_1); + lambda_big_o_1, /*family_index=*/2); // ========================================================================= // // --------------------------- Testing BigO O(N) --------------------------- // @@ -98,19 +109,20 @@ ADD_COMPLEXITY_CASES(one_test_name, big_o_1_test_name, rms_o_1_test_name, std::vector<int> ConstructRandomVector(int64_t size) { std::vector<int> v; - v.reserve(static_cast<int>(size)); + v.reserve(static_cast<size_t>(size)); for (int i = 0; i < size; ++i) { v.push_back(static_cast<int>(std::rand() % size)); } return v; } -void BM_Complexity_O_N(benchmark::State& state) { +void BM_Complexity_O_N(benchmark::State &state) { auto v = ConstructRandomVector(state.range(0)); // Test worst case scenario (item not in vector) const int64_t item_not_in_vector = state.range(0) * 2; for (auto _ : state) { - benchmark::DoNotOptimize(std::find(v.begin(), v.end(), item_not_in_vector)); + auto it = std::find(v.begin(), v.end(), item_not_in_vector); + benchmark::DoNotOptimize(it); } state.SetComplexityN(state.range(0)); } @@ -137,17 +149,17 @@ const char *lambda_big_o_n = "f\\(N\\)"; // Add enum tests ADD_COMPLEXITY_CASES(n_test_name, big_o_n_test_name, rms_o_n_test_name, - enum_auto_big_o_n); + enum_auto_big_o_n, /*family_index=*/3); // Add lambda tests ADD_COMPLEXITY_CASES(n_test_name, big_o_n_test_name, rms_o_n_test_name, - lambda_big_o_n); + lambda_big_o_n, /*family_index=*/4); // ========================================================================= // // ------------------------- Testing BigO O(N*lgN) ------------------------- // // ========================================================================= // -static void BM_Complexity_O_N_log_N(benchmark::State& state) { +static void BM_Complexity_O_N_log_N(benchmark::State &state) { auto v = ConstructRandomVector(state.range(0)); for (auto _ : state) { std::sort(v.begin(), v.end()); @@ -163,7 +175,7 @@ BENCHMARK(BM_Complexity_O_N_log_N) ->RangeMultiplier(2) ->Range(1 << 10, 1 << 16) ->Complexity([](benchmark::IterationCount n) { - return kLog2E * n * log(static_cast<double>(n)); + return kLog2E * static_cast<double>(n) * log(static_cast<double>(n)); }); BENCHMARK(BM_Complexity_O_N_log_N) ->RangeMultiplier(2) @@ -178,20 +190,23 @@ const char *lambda_big_o_n_lg_n = "f\\(N\\)"; // Add enum tests ADD_COMPLEXITY_CASES(n_lg_n_test_name, big_o_n_lg_n_test_name, - rms_o_n_lg_n_test_name, enum_auto_big_o_n_lg_n); + rms_o_n_lg_n_test_name, enum_auto_big_o_n_lg_n, + /*family_index=*/6); // Add lambda tests ADD_COMPLEXITY_CASES(n_lg_n_test_name, big_o_n_lg_n_test_name, - rms_o_n_lg_n_test_name, lambda_big_o_n_lg_n); + rms_o_n_lg_n_test_name, lambda_big_o_n_lg_n, + /*family_index=*/7); // ========================================================================= // // -------- Testing formatting of Complexity with captured args ------------ // // ========================================================================= // -void BM_ComplexityCaptureArgs(benchmark::State& state, int n) { +void BM_ComplexityCaptureArgs(benchmark::State &state, int n) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero - benchmark::DoNotOptimize(state.iterations()); + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } state.SetComplexityN(n); } @@ -204,7 +219,7 @@ const std::string complexity_capture_name = "BM_ComplexityCaptureArgs/capture_test"; ADD_COMPLEXITY_CASES(complexity_capture_name, complexity_capture_name + "_BigO", - complexity_capture_name + "_RMS", "N"); + complexity_capture_name + "_RMS", "N", /*family_index=*/9); // ========================================================================= // // --------------------------- TEST CASES END ------------------------------ // diff --git a/test/cxx03_test.cc b/test/cxx03_test.cc index c4c9a52..9711c1b 100644 --- a/test/cxx03_test.cc +++ b/test/cxx03_test.cc @@ -44,8 +44,7 @@ BENCHMARK_TEMPLATE(BM_template1, long); BENCHMARK_TEMPLATE1(BM_template1, int); template <class T> -struct BM_Fixture : public ::benchmark::Fixture { -}; +struct BM_Fixture : public ::benchmark::Fixture {}; BENCHMARK_TEMPLATE_F(BM_Fixture, BM_template1, long)(benchmark::State& state) { BM_empty(state); @@ -55,8 +54,8 @@ BENCHMARK_TEMPLATE1_F(BM_Fixture, BM_template2, int)(benchmark::State& state) { } void BM_counters(benchmark::State& state) { - BM_empty(state); - state.counters["Foo"] = 2; + BM_empty(state); + state.counters["Foo"] = 2; } BENCHMARK(BM_counters); diff --git a/test/diagnostics_test.cc b/test/diagnostics_test.cc index dd64a33..0cd3edb 100644 --- a/test/diagnostics_test.cc +++ b/test/diagnostics_test.cc @@ -26,7 +26,8 @@ void TestHandler() { } void try_invalid_pause_resume(benchmark::State& state) { -#if !defined(TEST_BENCHMARK_LIBRARY_HAS_NO_ASSERTIONS) && !defined(TEST_HAS_NO_EXCEPTIONS) +#if !defined(TEST_BENCHMARK_LIBRARY_HAS_NO_ASSERTIONS) && \ + !defined(TEST_HAS_NO_EXCEPTIONS) try { state.PauseTiming(); std::abort(); @@ -48,7 +49,8 @@ void BM_diagnostic_test(benchmark::State& state) { if (called_once == false) try_invalid_pause_resume(state); for (auto _ : state) { - benchmark::DoNotOptimize(state.iterations()); + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } if (called_once == false) try_invalid_pause_resume(state); @@ -57,14 +59,14 @@ void BM_diagnostic_test(benchmark::State& state) { } BENCHMARK(BM_diagnostic_test); - void BM_diagnostic_test_keep_running(benchmark::State& state) { static bool called_once = false; if (called_once == false) try_invalid_pause_resume(state); - while(state.KeepRunning()) { - benchmark::DoNotOptimize(state.iterations()); + while (state.KeepRunning()) { + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } if (called_once == false) try_invalid_pause_resume(state); @@ -74,7 +76,16 @@ void BM_diagnostic_test_keep_running(benchmark::State& state) { BENCHMARK(BM_diagnostic_test_keep_running); int main(int argc, char* argv[]) { +#ifdef NDEBUG + // This test is exercising functionality for debug builds, which are not + // available in release builds. Skip the test if we are in that environment + // to avoid a test failure. + std::cout << "Diagnostic test disabled in release build" << std::endl; + (void)argc; + (void)argv; +#else benchmark::internal::GetAbortHandler() = &TestHandler; benchmark::Initialize(&argc, argv); benchmark::RunSpecifiedBenchmarks(); +#endif } diff --git a/test/display_aggregates_only_test.cc b/test/display_aggregates_only_test.cc index 3c36d3f..6ad65e7 100644 --- a/test/display_aggregates_only_test.cc +++ b/test/display_aggregates_only_test.cc @@ -19,21 +19,23 @@ BENCHMARK(BM_SummaryRepeat)->Repetitions(3)->DisplayAggregatesOnly(); int main(int argc, char* argv[]) { const std::string output = GetFileReporterOutput(argc, argv); - if (SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3") != 6 || + if (SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3") != 7 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3\"") != 3 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_mean\"") != 1 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_median\"") != 1 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\"") != - 1) { - std::cout << "Precondition mismatch. Expected to only find 6 " + 1 || + SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_cv\"") != 1) { + std::cout << "Precondition mismatch. Expected to only find 8 " "occurrences of \"BM_SummaryRepeat/repeats:3\" substring:\n" "\"name\": \"BM_SummaryRepeat/repeats:3\", " "\"name\": \"BM_SummaryRepeat/repeats:3\", " "\"name\": \"BM_SummaryRepeat/repeats:3\", " "\"name\": \"BM_SummaryRepeat/repeats:3_mean\", " "\"name\": \"BM_SummaryRepeat/repeats:3_median\", " - "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\"\nThe entire " + "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\", " + "\"name\": \"BM_SummaryRepeat/repeats:3_cv\"\nThe entire " "output:\n"; std::cout << output; return 1; diff --git a/test/donotoptimize_assembly_test.cc b/test/donotoptimize_assembly_test.cc index d4b0bab..dc286f5 100644 --- a/test/donotoptimize_assembly_test.cc +++ b/test/donotoptimize_assembly_test.cc @@ -3,19 +3,23 @@ #ifdef __clang__ #pragma clang diagnostic ignored "-Wreturn-type" #endif +BENCHMARK_DISABLE_DEPRECATED_WARNING extern "C" { extern int ExternInt; extern int ExternInt2; extern int ExternInt3; +extern int BigArray[2049]; + +const int ConstBigArray[2049]{}; inline int Add42(int x) { return x + 42; } struct NotTriviallyCopyable { NotTriviallyCopyable(); explicit NotTriviallyCopyable(int x) : value(x) {} - NotTriviallyCopyable(NotTriviallyCopyable const&); + NotTriviallyCopyable(NotTriviallyCopyable const &); int value; }; @@ -24,7 +28,14 @@ struct Large { int data[2]; }; +struct ExtraLarge { + int arr[2049]; +}; } + +extern ExtraLarge ExtraLargeObj; +const ExtraLarge ConstExtraLargeObj{}; + // CHECK-LABEL: test_with_rvalue: extern "C" void test_with_rvalue() { benchmark::DoNotOptimize(Add42(0)); @@ -69,6 +80,22 @@ extern "C" void test_with_large_lvalue() { // CHECK: ret } +// CHECK-LABEL: test_with_extra_large_lvalue_with_op: +extern "C" void test_with_extra_large_lvalue_with_op() { + ExtraLargeObj.arr[16] = 42; + benchmark::DoNotOptimize(ExtraLargeObj); + // CHECK: movl $42, ExtraLargeObj+64(%rip) + // CHECK: ret +} + +// CHECK-LABEL: test_with_big_array_with_op +extern "C" void test_with_big_array_with_op() { + BigArray[16] = 42; + benchmark::DoNotOptimize(BigArray); + // CHECK: movl $42, BigArray+64(%rip) + // CHECK: ret +} + // CHECK-LABEL: test_with_non_trivial_lvalue: extern "C" void test_with_non_trivial_lvalue() { NotTriviallyCopyable NTC(ExternInt); @@ -97,6 +124,18 @@ extern "C" void test_with_large_const_lvalue() { // CHECK: ret } +// CHECK-LABEL: test_with_const_extra_large_obj: +extern "C" void test_with_const_extra_large_obj() { + benchmark::DoNotOptimize(ConstExtraLargeObj); + // CHECK: ret +} + +// CHECK-LABEL: test_with_const_big_array +extern "C" void test_with_const_big_array() { + benchmark::DoNotOptimize(ConstBigArray); + // CHECK: ret +} + // CHECK-LABEL: test_with_non_trivial_const_lvalue: extern "C" void test_with_non_trivial_const_lvalue() { const NotTriviallyCopyable Obj(ExternInt); @@ -118,8 +157,7 @@ extern "C" int test_div_by_two(int input) { // CHECK-LABEL: test_inc_integer: extern "C" int test_inc_integer() { int x = 0; - for (int i=0; i < 5; ++i) - benchmark::DoNotOptimize(++x); + for (int i = 0; i < 5; ++i) benchmark::DoNotOptimize(++x); // CHECK: movl $1, [[DEST:.*]] // CHECK: {{(addl \$1,|incl)}} [[DEST]] // CHECK: {{(addl \$1,|incl)}} [[DEST]] @@ -147,7 +185,7 @@ extern "C" void test_pointer_const_lvalue() { // CHECK-CLANG: movq %rax, -{{[0-9]+}}(%[[REG:[a-z]+]]) // CHECK: ret int x = 42; - int * const xp = &x; + int *const xp = &x; benchmark::DoNotOptimize(xp); } diff --git a/test/donotoptimize_test.cc b/test/donotoptimize_test.cc index 2ce92d1..04ec938 100644 --- a/test/donotoptimize_test.cc +++ b/test/donotoptimize_test.cc @@ -1,33 +1,43 @@ -#include "benchmark/benchmark.h" - #include <cstdint> +#include "benchmark/benchmark.h" + namespace { #if defined(__GNUC__) -std::uint64_t double_up(const std::uint64_t x) __attribute__((const)); +std::int64_t double_up(const std::int64_t x) __attribute__((const)); #endif -std::uint64_t double_up(const std::uint64_t x) { return x * 2; } -} +std::int64_t double_up(const std::int64_t x) { return x * 2; } +} // namespace // Using DoNotOptimize on types like BitRef seem to cause a lot of problems // with the inline assembly on both GCC and Clang. struct BitRef { int index; - unsigned char &byte; + unsigned char& byte; -public: + public: static BitRef Make() { static unsigned char arr[2] = {}; BitRef b(1, arr[0]); return b; } -private: + + private: BitRef(int i, unsigned char& b) : index(i), byte(b) {} }; int main(int, char*[]) { // this test verifies compilation of DoNotOptimize() for some types + char buffer1[1] = ""; + benchmark::DoNotOptimize(buffer1); + + char buffer2[2] = ""; + benchmark::DoNotOptimize(buffer2); + + char buffer3[3] = ""; + benchmark::DoNotOptimize(buffer3); + char buffer8[8] = ""; benchmark::DoNotOptimize(buffer8); @@ -36,17 +46,24 @@ int main(int, char*[]) { char buffer1024[1024] = ""; benchmark::DoNotOptimize(buffer1024); - benchmark::DoNotOptimize(&buffer1024[0]); + char* bptr = &buffer1024[0]; + benchmark::DoNotOptimize(bptr); int x = 123; benchmark::DoNotOptimize(x); - benchmark::DoNotOptimize(&x); + int* xp = &x; + benchmark::DoNotOptimize(xp); benchmark::DoNotOptimize(x += 42); - benchmark::DoNotOptimize(double_up(x)); + std::int64_t y = double_up(x); + benchmark::DoNotOptimize(y); // These tests are to e - benchmark::DoNotOptimize(BitRef::Make()); BitRef lval = BitRef::Make(); benchmark::DoNotOptimize(lval); + +#ifdef BENCHMARK_HAS_CXX11 + // Check that accept rvalue. + benchmark::DoNotOptimize(BitRef::Make()); +#endif } diff --git a/test/filter_test.cc b/test/filter_test.cc index 0e27065..4c8b8ea 100644 --- a/test/filter_test.cc +++ b/test/filter_test.cc @@ -1,36 +1,40 @@ -#include "benchmark/benchmark.h" - +#include <algorithm> #include <cassert> #include <cmath> #include <cstdint> #include <cstdlib> - #include <iostream> #include <limits> #include <sstream> #include <string> +#include "benchmark/benchmark.h" + namespace { class TestReporter : public benchmark::ConsoleReporter { public: - virtual bool ReportContext(const Context& context) { + bool ReportContext(const Context& context) override { return ConsoleReporter::ReportContext(context); }; - virtual void ReportRuns(const std::vector<Run>& report) { + void ReportRuns(const std::vector<Run>& report) override { ++count_; + max_family_index_ = std::max(max_family_index_, report[0].family_index); ConsoleReporter::ReportRuns(report); }; - TestReporter() : count_(0) {} + TestReporter() : count_(0), max_family_index_(0) {} - virtual ~TestReporter() {} + ~TestReporter() override {} - size_t GetCount() const { return count_; } + int GetCount() const { return count_; } + + int64_t GetMaxFamilyIndex() const { return max_family_index_; } private: - mutable size_t count_; + mutable int count_; + mutable int64_t max_family_index_; }; } // end namespace @@ -65,7 +69,7 @@ static void BM_FooBa(benchmark::State& state) { } BENCHMARK(BM_FooBa); -int main(int argc, char **argv) { +int main(int argc, char** argv) { bool list_only = false; for (int i = 0; i < argc; ++i) list_only |= std::string(argv[i]).find("--benchmark_list_tests") != @@ -74,13 +78,13 @@ int main(int argc, char **argv) { benchmark::Initialize(&argc, argv); TestReporter test_reporter; - const size_t returned_count = - benchmark::RunSpecifiedBenchmarks(&test_reporter); + const int64_t returned_count = + static_cast<int64_t>(benchmark::RunSpecifiedBenchmarks(&test_reporter)); if (argc == 2) { // Make sure we ran all of the tests std::stringstream ss(argv[1]); - size_t expected_return; + int64_t expected_return; ss >> expected_return; if (returned_count != expected_return) { @@ -90,14 +94,23 @@ int main(int argc, char **argv) { return -1; } - const size_t expected_reports = list_only ? 0 : expected_return; - const size_t reports_count = test_reporter.GetCount(); + const int64_t expected_reports = list_only ? 0 : expected_return; + const int64_t reports_count = test_reporter.GetCount(); if (reports_count != expected_reports) { std::cerr << "ERROR: Expected " << expected_reports << " tests to be run but reported_count = " << reports_count << std::endl; return -1; } + + const int64_t max_family_index = test_reporter.GetMaxFamilyIndex(); + const int64_t num_families = reports_count == 0 ? 0 : 1 + max_family_index; + if (num_families != expected_reports) { + std::cerr << "ERROR: Expected " << expected_reports + << " test families to be run but num_families = " + << num_families << std::endl; + return -1; + } } return 0; diff --git a/test/fixture_test.cc b/test/fixture_test.cc index a331c7d..d1093eb 100644 --- a/test/fixture_test.cc +++ b/test/fixture_test.cc @@ -1,33 +1,33 @@ -#include "benchmark/benchmark.h" - #include <cassert> #include <memory> +#include "benchmark/benchmark.h" + #define FIXTURE_BECHMARK_NAME MyFixture class FIXTURE_BECHMARK_NAME : public ::benchmark::Fixture { public: - void SetUp(const ::benchmark::State& state) { - if (state.thread_index == 0) { + void SetUp(const ::benchmark::State& state) override { + if (state.thread_index() == 0) { assert(data.get() == nullptr); data.reset(new int(42)); } } - void TearDown(const ::benchmark::State& state) { - if (state.thread_index == 0) { + void TearDown(const ::benchmark::State& state) override { + if (state.thread_index() == 0) { assert(data.get() != nullptr); data.reset(); } } - ~FIXTURE_BECHMARK_NAME() { assert(data == nullptr); } + ~FIXTURE_BECHMARK_NAME() override { assert(data == nullptr); } std::unique_ptr<int> data; }; -BENCHMARK_F(FIXTURE_BECHMARK_NAME, Foo)(benchmark::State &st) { +BENCHMARK_F(FIXTURE_BECHMARK_NAME, Foo)(benchmark::State& st) { assert(data.get() != nullptr); assert(*data == 42); for (auto _ : st) { @@ -35,7 +35,7 @@ BENCHMARK_F(FIXTURE_BECHMARK_NAME, Foo)(benchmark::State &st) { } BENCHMARK_DEFINE_F(FIXTURE_BECHMARK_NAME, Bar)(benchmark::State& st) { - if (st.thread_index == 0) { + if (st.thread_index() == 0) { assert(data.get() != nullptr); assert(*data == 42); } diff --git a/test/internal_threading_test.cc b/test/internal_threading_test.cc index 039d7c1..62b5b95 100644 --- a/test/internal_threading_test.cc +++ b/test/internal_threading_test.cc @@ -3,6 +3,7 @@ #include <chrono> #include <thread> + #include "../src/timers.h" #include "benchmark/benchmark.h" #include "output_test.h" diff --git a/test/link_main_test.cc b/test/link_main_test.cc index 241ad5c..e806500 100644 --- a/test/link_main_test.cc +++ b/test/link_main_test.cc @@ -2,7 +2,8 @@ void BM_empty(benchmark::State& state) { for (auto _ : state) { - benchmark::DoNotOptimize(state.iterations()); + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } } BENCHMARK(BM_empty); diff --git a/test/map_test.cc b/test/map_test.cc index dbf7982..0fdba7c 100644 --- a/test/map_test.cc +++ b/test/map_test.cc @@ -1,8 +1,8 @@ -#include "benchmark/benchmark.h" - #include <cstdlib> #include <map> +#include "benchmark/benchmark.h" + namespace { std::map<int, int> ConstructRandomMap(int size) { @@ -24,7 +24,8 @@ static void BM_MapLookup(benchmark::State& state) { m = ConstructRandomMap(size); state.ResumeTiming(); for (int i = 0; i < size; ++i) { - benchmark::DoNotOptimize(m.find(std::rand() % size)); + auto it = m.find(std::rand() % size); + benchmark::DoNotOptimize(it); } } state.SetItemsProcessed(state.iterations() * size); @@ -34,11 +35,11 @@ BENCHMARK(BM_MapLookup)->Range(1 << 3, 1 << 12); // Using fixtures. class MapFixture : public ::benchmark::Fixture { public: - void SetUp(const ::benchmark::State& st) { + void SetUp(const ::benchmark::State& st) override { m = ConstructRandomMap(static_cast<int>(st.range(0))); } - void TearDown(const ::benchmark::State&) { m.clear(); } + void TearDown(const ::benchmark::State&) override { m.clear(); } std::map<int, int> m; }; @@ -47,7 +48,8 @@ BENCHMARK_DEFINE_F(MapFixture, Lookup)(benchmark::State& state) { const int size = static_cast<int>(state.range(0)); for (auto _ : state) { for (int i = 0; i < size; ++i) { - benchmark::DoNotOptimize(m.find(std::rand() % size)); + auto it = m.find(std::rand() % size); + benchmark::DoNotOptimize(it); } } state.SetItemsProcessed(state.iterations() * size); diff --git a/test/memory_manager_test.cc b/test/memory_manager_test.cc index 90bed16..d94bd51 100644 --- a/test/memory_manager_test.cc +++ b/test/memory_manager_test.cc @@ -5,25 +5,28 @@ #include "output_test.h" class TestMemoryManager : public benchmark::MemoryManager { - void Start() {} - void Stop(Result* result) { - result->num_allocs = 42; - result->max_bytes_used = 42000; + void Start() override {} + void Stop(Result& result) override { + result.num_allocs = 42; + result.max_bytes_used = 42000; } }; void BM_empty(benchmark::State& state) { for (auto _ : state) { - benchmark::DoNotOptimize(state.iterations()); + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } } BENCHMARK(BM_empty); ADD_CASES(TC_ConsoleOut, {{"^BM_empty %console_report$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_empty\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_empty\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, diff --git a/test/min_time_parse_gtest.cc b/test/min_time_parse_gtest.cc new file mode 100644 index 0000000..e2bdf67 --- /dev/null +++ b/test/min_time_parse_gtest.cc @@ -0,0 +1,30 @@ +#include "../src/benchmark_runner.h" +#include "gtest/gtest.h" + +namespace { + +TEST(ParseMinTimeTest, InvalidInput) { +#if GTEST_HAS_DEATH_TEST + // Tests only runnable in debug mode (when BM_CHECK is enabled). +#ifndef NDEBUG +#ifndef TEST_BENCHMARK_LIBRARY_HAS_NO_ASSERTIONS + ASSERT_DEATH_IF_SUPPORTED( + { benchmark::internal::ParseBenchMinTime("abc"); }, + "Malformed seconds value passed to --benchmark_min_time: `abc`"); + + ASSERT_DEATH_IF_SUPPORTED( + { benchmark::internal::ParseBenchMinTime("123ms"); }, + "Malformed seconds value passed to --benchmark_min_time: `123ms`"); + + ASSERT_DEATH_IF_SUPPORTED( + { benchmark::internal::ParseBenchMinTime("1z"); }, + "Malformed seconds value passed to --benchmark_min_time: `1z`"); + + ASSERT_DEATH_IF_SUPPORTED( + { benchmark::internal::ParseBenchMinTime("1hs"); }, + "Malformed seconds value passed to --benchmark_min_time: `1hs`"); +#endif +#endif +#endif +} +} // namespace diff --git a/test/multiple_ranges_test.cc b/test/multiple_ranges_test.cc index b25f40e..5300a96 100644 --- a/test/multiple_ranges_test.cc +++ b/test/multiple_ranges_test.cc @@ -1,10 +1,10 @@ -#include "benchmark/benchmark.h" - #include <cassert> #include <iostream> #include <set> #include <vector> +#include "benchmark/benchmark.h" + class MultipleRangesFixture : public ::benchmark::Fixture { public: MultipleRangesFixture() @@ -28,7 +28,7 @@ class MultipleRangesFixture : public ::benchmark::Fixture { {2, 7, 15}, {7, 6, 3}}) {} - void SetUp(const ::benchmark::State& state) { + void SetUp(const ::benchmark::State& state) override { std::vector<int64_t> ranges = {state.range(0), state.range(1), state.range(2)}; @@ -39,10 +39,10 @@ class MultipleRangesFixture : public ::benchmark::Fixture { // NOTE: This is not TearDown as we want to check after _all_ runs are // complete. - virtual ~MultipleRangesFixture() { + ~MultipleRangesFixture() override { if (actualValues != expectedValues) { std::cout << "EXPECTED\n"; - for (auto v : expectedValues) { + for (const auto& v : expectedValues) { std::cout << "{"; for (int64_t iv : v) { std::cout << iv << ", "; @@ -50,7 +50,7 @@ class MultipleRangesFixture : public ::benchmark::Fixture { std::cout << "}\n"; } std::cout << "ACTUAL\n"; - for (auto v : actualValues) { + for (const auto& v : actualValues) { std::cout << "{"; for (int64_t iv : v) { std::cout << iv << ", "; diff --git a/test/options_test.cc b/test/options_test.cc index 9f9a786..a1b209f 100644 --- a/test/options_test.cc +++ b/test/options_test.cc @@ -1,7 +1,8 @@ -#include "benchmark/benchmark.h" #include <chrono> #include <thread> +#include "benchmark/benchmark.h" + #if defined(NDEBUG) #undef NDEBUG #endif @@ -32,6 +33,8 @@ BENCHMARK(BM_basic)->DenseRange(10, 15); BENCHMARK(BM_basic)->Args({42, 42}); BENCHMARK(BM_basic)->Ranges({{64, 512}, {64, 512}}); BENCHMARK(BM_basic)->MinTime(0.7); +BENCHMARK(BM_basic)->MinWarmUpTime(0.8); +BENCHMARK(BM_basic)->MinTime(0.1)->MinWarmUpTime(0.2); BENCHMARK(BM_basic)->UseRealTime(); BENCHMARK(BM_basic)->ThreadRange(2, 4); BENCHMARK(BM_basic)->ThreadPerCpu(); @@ -64,12 +67,10 @@ void BM_explicit_iteration_count(benchmark::State& state) { // Test that the requested iteration count is respected. assert(state.max_iterations == 42); - size_t actual_iterations = 0; - for (auto _ : state) - ++actual_iterations; + for (auto _ : state) { + } assert(state.iterations() == state.max_iterations); assert(state.iterations() == 42); - } BENCHMARK(BM_explicit_iteration_count)->Iterations(42); diff --git a/test/output_test.h b/test/output_test.h index 9385761..c08fe1d 100644 --- a/test/output_test.h +++ b/test/output_test.h @@ -85,7 +85,7 @@ std::string GetFileReporterOutput(int argc, char* argv[]); struct Results; typedef std::function<void(Results const&)> ResultsCheckFn; -size_t AddChecker(const char* bm_name_pattern, ResultsCheckFn fn); +size_t AddChecker(const std::string& bm_name_pattern, const ResultsCheckFn& fn); // Class holding the results of a benchmark. // It is passed in calls to checker functions. @@ -113,13 +113,11 @@ struct Results { return NumIterations() * GetTime(kRealTime); } // get the cpu_time duration of the benchmark in seconds - double DurationCPUTime() const { - return NumIterations() * GetTime(kCpuTime); - } + double DurationCPUTime() const { return NumIterations() * GetTime(kCpuTime); } // get the string for a result by name, or nullptr if the name // is not found - const std::string* Get(const char* entry_name) const { + const std::string* Get(const std::string& entry_name) const { auto it = values.find(entry_name); if (it == values.end()) return nullptr; return &it->second; @@ -128,12 +126,12 @@ struct Results { // get a result by name, parsed as a specific type. // NOTE: for counters, use GetCounterAs instead. template <class T> - T GetAs(const char* entry_name) const; + T GetAs(const std::string& entry_name) const; // counters are written as doubles, so they have to be read first // as a double, and only then converted to the asked type. template <class T> - T GetCounterAs(const char* entry_name) const { + T GetCounterAs(const std::string& entry_name) const { double dval = GetAs<double>(entry_name); T tval = static_cast<T>(dval); return tval; @@ -141,14 +139,14 @@ struct Results { }; template <class T> -T Results::GetAs(const char* entry_name) const { +T Results::GetAs(const std::string& entry_name) const { auto* sv = Get(entry_name); - CHECK(sv != nullptr && !sv->empty()); + BM_CHECK(sv != nullptr && !sv->empty()); std::stringstream ss; ss << *sv; T out; ss >> out; - CHECK(!ss.fail()); + BM_CHECK(!ss.fail()); return out; } @@ -158,8 +156,8 @@ T Results::GetAs(const char* entry_name) const { // clang-format off -#define _CHECK_RESULT_VALUE(entry, getfn, var_type, var_name, relationship, value) \ - CONCAT(CHECK_, relationship) \ +#define CHECK_RESULT_VALUE_IMPL(entry, getfn, var_type, var_name, relationship, value) \ + CONCAT(BM_CHECK_, relationship) \ (entry.getfn< var_type >(var_name), (value)) << "\n" \ << __FILE__ << ":" << __LINE__ << ": " << (entry).name << ":\n" \ << __FILE__ << ":" << __LINE__ << ": " \ @@ -169,8 +167,8 @@ T Results::GetAs(const char* entry_name) const { // check with tolerance. eps_factor is the tolerance window, which is // interpreted relative to value (eg, 0.1 means 10% of value). -#define _CHECK_FLOAT_RESULT_VALUE(entry, getfn, var_type, var_name, relationship, value, eps_factor) \ - CONCAT(CHECK_FLOAT_, relationship) \ +#define CHECK_FLOAT_RESULT_VALUE_IMPL(entry, getfn, var_type, var_name, relationship, value, eps_factor) \ + CONCAT(BM_CHECK_FLOAT_, relationship) \ (entry.getfn< var_type >(var_name), (value), (eps_factor) * (value)) << "\n" \ << __FILE__ << ":" << __LINE__ << ": " << (entry).name << ":\n" \ << __FILE__ << ":" << __LINE__ << ": " \ @@ -187,16 +185,16 @@ T Results::GetAs(const char* entry_name) const { << "%)" #define CHECK_RESULT_VALUE(entry, var_type, var_name, relationship, value) \ - _CHECK_RESULT_VALUE(entry, GetAs, var_type, var_name, relationship, value) + CHECK_RESULT_VALUE_IMPL(entry, GetAs, var_type, var_name, relationship, value) #define CHECK_COUNTER_VALUE(entry, var_type, var_name, relationship, value) \ - _CHECK_RESULT_VALUE(entry, GetCounterAs, var_type, var_name, relationship, value) + CHECK_RESULT_VALUE_IMPL(entry, GetCounterAs, var_type, var_name, relationship, value) #define CHECK_FLOAT_RESULT_VALUE(entry, var_name, relationship, value, eps_factor) \ - _CHECK_FLOAT_RESULT_VALUE(entry, GetAs, double, var_name, relationship, value, eps_factor) + CHECK_FLOAT_RESULT_VALUE_IMPL(entry, GetAs, double, var_name, relationship, value, eps_factor) #define CHECK_FLOAT_COUNTER_VALUE(entry, var_name, relationship, value, eps_factor) \ - _CHECK_FLOAT_RESULT_VALUE(entry, GetCounterAs, double, var_name, relationship, value, eps_factor) + CHECK_FLOAT_RESULT_VALUE_IMPL(entry, GetCounterAs, double, var_name, relationship, value, eps_factor) // clang-format on diff --git a/test/output_test_helper.cc b/test/output_test_helper.cc index 1aebc55..2567370 100644 --- a/test/output_test_helper.cc +++ b/test/output_test_helper.cc @@ -10,6 +10,7 @@ #include "../src/benchmark_api_internal.h" #include "../src/check.h" // NOTE: check.h is for internal use only! +#include "../src/log.h" // NOTE: log.h is for internal use only #include "../src/re.h" // NOTE: re.h is for internal use only #include "output_test.h" @@ -40,14 +41,17 @@ SubMap& GetSubstitutions() { // clang-format off static std::string safe_dec_re = "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?"; static std::string time_re = "([0-9]+[.])?[0-9]+"; + static std::string percentage_re = "[0-9]+[.][0-9]{2}"; static SubMap map = { {"%float", "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?"}, // human-readable float - {"%hrfloat", "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?[kMGTPEZYmunpfazy]?"}, + {"%hrfloat", "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?[kKMGTPEZYmunpfazy]?i?"}, + {"%percentage", percentage_re}, {"%int", "[ ]*[0-9]+"}, {" %s ", "[ ]+"}, {"%time", "[ ]*" + time_re + "[ ]+ns"}, {"%console_report", "[ ]*" + time_re + "[ ]+ns [ ]*" + time_re + "[ ]+ns [ ]*[0-9]+"}, + {"%console_percentage_report", "[ ]*" + percentage_re + "[ ]+% [ ]*" + percentage_re + "[ ]+% [ ]*[0-9]+"}, {"%console_us_report", "[ ]*" + time_re + "[ ]+us [ ]*" + time_re + "[ ]+us [ ]*[0-9]+"}, {"%console_ms_report", "[ ]*" + time_re + "[ ]+ms [ ]*" + time_re + "[ ]+ms [ ]*[0-9]+"}, {"%console_s_report", "[ ]*" + time_re + "[ ]+s [ ]*" + time_re + "[ ]+s [ ]*[0-9]+"}, @@ -94,27 +98,27 @@ void CheckCase(std::stringstream& remaining_output, TestCase const& TC, bool on_first = true; std::string line; while (remaining_output.eof() == false) { - CHECK(remaining_output.good()); + BM_CHECK(remaining_output.good()); std::getline(remaining_output, line); if (on_first) { first_line = line; on_first = false; } for (const auto& NC : not_checks) { - CHECK(!NC.regex->Match(line)) + BM_CHECK(!NC.regex->Match(line)) << "Unexpected match for line \"" << line << "\" for MR_Not regex \"" << NC.regex_str << "\"" << "\n actual regex string \"" << TC.substituted_regex << "\"" << "\n started matching near: " << first_line; } if (TC.regex->Match(line)) return; - CHECK(TC.match_rule != MR_Next) + BM_CHECK(TC.match_rule != MR_Next) << "Expected line \"" << line << "\" to match regex \"" << TC.regex_str << "\"" << "\n actual regex string \"" << TC.substituted_regex << "\"" << "\n started matching near: " << first_line; } - CHECK(remaining_output.eof() == false) + BM_CHECK(remaining_output.eof() == false) << "End of output reached before match for regex \"" << TC.regex_str << "\" was found" << "\n actual regex string \"" << TC.substituted_regex << "\"" @@ -137,14 +141,14 @@ void CheckCases(TestCaseList const& checks, std::stringstream& output) { class TestReporter : public benchmark::BenchmarkReporter { public: TestReporter(std::vector<benchmark::BenchmarkReporter*> reps) - : reporters_(reps) {} + : reporters_(std::move(reps)) {} - virtual bool ReportContext(const Context& context) { + bool ReportContext(const Context& context) override { bool last_ret = false; bool first = true; for (auto rep : reporters_) { bool new_ret = rep->ReportContext(context); - CHECK(first || new_ret == last_ret) + BM_CHECK(first || new_ret == last_ret) << "Reports return different values for ReportContext"; first = false; last_ret = new_ret; @@ -153,10 +157,10 @@ class TestReporter : public benchmark::BenchmarkReporter { return last_ret; } - void ReportRuns(const std::vector<Run>& report) { + void ReportRuns(const std::vector<Run>& report) override { for (auto rep : reporters_) rep->ReportRuns(report); } - void Finalize() { + void Finalize() override { for (auto rep : reporters_) rep->Finalize(); } @@ -179,7 +183,7 @@ class ResultsChecker { public: struct PatternAndFn : public TestCase { // reusing TestCase for its regexes PatternAndFn(const std::string& rx, ResultsCheckFn fn_) - : TestCase(rx), fn(fn_) {} + : TestCase(rx), fn(std::move(fn_)) {} ResultsCheckFn fn; }; @@ -187,7 +191,7 @@ class ResultsChecker { std::vector<Results> results; std::vector<std::string> field_names; - void Add(const std::string& entry_pattern, ResultsCheckFn fn); + void Add(const std::string& entry_pattern, const ResultsCheckFn& fn); void CheckResults(std::stringstream& output); @@ -206,7 +210,8 @@ ResultsChecker& GetResultsChecker() { } // add a results checker for a benchmark -void ResultsChecker::Add(const std::string& entry_pattern, ResultsCheckFn fn) { +void ResultsChecker::Add(const std::string& entry_pattern, + const ResultsCheckFn& fn) { check_patterns.emplace_back(entry_pattern, fn); } @@ -226,7 +231,7 @@ void ResultsChecker::CheckResults(std::stringstream& output) { std::string line; bool on_first = true; while (output.eof() == false) { - CHECK(output.good()); + BM_CHECK(output.good()); std::getline(output, line); if (on_first) { SetHeader_(line); // this is important @@ -237,18 +242,17 @@ void ResultsChecker::CheckResults(std::stringstream& output) { } // finally we can call the subscribed check functions for (const auto& p : check_patterns) { - VLOG(2) << "--------------------------------\n"; - VLOG(2) << "checking for benchmarks matching " << p.regex_str << "...\n"; + BM_VLOG(2) << "--------------------------------\n"; + BM_VLOG(2) << "checking for benchmarks matching " << p.regex_str << "...\n"; for (const auto& r : results) { if (!p.regex->Match(r.name)) { - VLOG(2) << p.regex_str << " is not matched by " << r.name << "\n"; + BM_VLOG(2) << p.regex_str << " is not matched by " << r.name << "\n"; continue; - } else { - VLOG(2) << p.regex_str << " is matched by " << r.name << "\n"; } - VLOG(1) << "Checking results of " << r.name << ": ... \n"; + BM_VLOG(2) << p.regex_str << " is matched by " << r.name << "\n"; + BM_VLOG(1) << "Checking results of " << r.name << ": ... \n"; p.fn(r); - VLOG(1) << "Checking results of " << r.name << ": OK.\n"; + BM_VLOG(1) << "Checking results of " << r.name << ": OK.\n"; } } } @@ -261,9 +265,9 @@ void ResultsChecker::SetHeader_(const std::string& csv_header) { // set the values for a benchmark void ResultsChecker::SetValues_(const std::string& entry_csv_line) { if (entry_csv_line.empty()) return; // some lines are empty - CHECK(!field_names.empty()); + BM_CHECK(!field_names.empty()); auto vals = SplitCsv_(entry_csv_line); - CHECK_EQ(vals.size(), field_names.size()); + BM_CHECK_EQ(vals.size(), field_names.size()); results.emplace_back(vals[0]); // vals[0] is the benchmark name auto& entry = results.back(); for (size_t i = 1, e = vals.size(); i < e; ++i) { @@ -278,7 +282,7 @@ std::vector<std::string> ResultsChecker::SplitCsv_(const std::string& line) { if (!field_names.empty()) out.reserve(field_names.size()); size_t prev = 0, pos = line.find_first_of(','), curr = pos; while (pos != line.npos) { - CHECK(curr > 0); + BM_CHECK(curr > 0); if (line[prev] == '"') ++prev; if (line[curr - 1] == '"') --curr; out.push_back(line.substr(prev, curr - prev)); @@ -295,7 +299,7 @@ std::vector<std::string> ResultsChecker::SplitCsv_(const std::string& line) { } // end namespace internal -size_t AddChecker(const char* bm_name, ResultsCheckFn fn) { +size_t AddChecker(const std::string& bm_name, const ResultsCheckFn& fn) { auto& rc = internal::GetResultsChecker(); rc.Add(bm_name, fn); return rc.results.size(); @@ -309,32 +313,32 @@ int Results::NumThreads() const { ss << name.substr(pos + 9, end); int num = 1; ss >> num; - CHECK(!ss.fail()); + BM_CHECK(!ss.fail()); return num; } -double Results::NumIterations() const { - return GetAs<double>("iterations"); -} +double Results::NumIterations() const { return GetAs<double>("iterations"); } double Results::GetTime(BenchmarkTime which) const { - CHECK(which == kCpuTime || which == kRealTime); + BM_CHECK(which == kCpuTime || which == kRealTime); const char* which_str = which == kCpuTime ? "cpu_time" : "real_time"; double val = GetAs<double>(which_str); auto unit = Get("time_unit"); - CHECK(unit); + BM_CHECK(unit); if (*unit == "ns") { return val * 1.e-9; - } else if (*unit == "us") { + } + if (*unit == "us") { return val * 1.e-6; - } else if (*unit == "ms") { + } + if (*unit == "ms") { return val * 1.e-3; - } else if (*unit == "s") { + } + if (*unit == "s") { return val; - } else { - CHECK(1 == 0) << "unknown time unit: " << *unit; - return 0; } + BM_CHECK(1 == 0) << "unknown time unit: " << *unit; + return 0; } // ========================================================================= // @@ -348,10 +352,10 @@ TestCase::TestCase(std::string re, int rule) regex(std::make_shared<benchmark::Regex>()) { std::string err_str; regex->Init(substituted_regex, &err_str); - CHECK(err_str.empty()) << "Could not construct regex \"" << substituted_regex - << "\"" - << "\n originally \"" << regex_str << "\"" - << "\n got error: " << err_str; + BM_CHECK(err_str.empty()) + << "Could not construct regex \"" << substituted_regex << "\"" + << "\n originally \"" << regex_str << "\"" + << "\n got error: " << err_str; } int AddCases(TestCaseID ID, std::initializer_list<TestCase> il) { @@ -380,10 +384,8 @@ int SetSubstitutions( // Disable deprecated warnings temporarily because we need to reference // CSVReporter but don't want to trigger -Werror=-Wdeprecated-declarations -#ifdef __GNUC__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#endif +BENCHMARK_DISABLE_DEPRECATED_WARNING + void RunOutputTests(int argc, char* argv[]) { using internal::GetTestCaseList; benchmark::Initialize(&argc, argv); @@ -392,14 +394,14 @@ void RunOutputTests(int argc, char* argv[]) { benchmark::JSONReporter JR; benchmark::CSVReporter CSVR; struct ReporterTest { - const char* name; + std::string name; std::vector<TestCase>& output_cases; std::vector<TestCase>& error_cases; benchmark::BenchmarkReporter& reporter; std::stringstream out_stream; std::stringstream err_stream; - ReporterTest(const char* n, std::vector<TestCase>& out_tc, + ReporterTest(const std::string& n, std::vector<TestCase>& out_tc, std::vector<TestCase>& err_tc, benchmark::BenchmarkReporter& br) : name(n), output_cases(out_tc), error_cases(err_tc), reporter(br) { @@ -407,12 +409,12 @@ void RunOutputTests(int argc, char* argv[]) { reporter.SetErrorStream(&err_stream); } } TestCases[] = { - {"ConsoleReporter", GetTestCaseList(TC_ConsoleOut), + {std::string("ConsoleReporter"), GetTestCaseList(TC_ConsoleOut), GetTestCaseList(TC_ConsoleErr), CR}, - {"JSONReporter", GetTestCaseList(TC_JSONOut), GetTestCaseList(TC_JSONErr), - JR}, - {"CSVReporter", GetTestCaseList(TC_CSVOut), GetTestCaseList(TC_CSVErr), - CSVR}, + {std::string("JSONReporter"), GetTestCaseList(TC_JSONOut), + GetTestCaseList(TC_JSONErr), JR}, + {std::string("CSVReporter"), GetTestCaseList(TC_CSVOut), + GetTestCaseList(TC_CSVErr), CSVR}, }; // Create the test reporter and run the benchmarks. @@ -421,7 +423,8 @@ void RunOutputTests(int argc, char* argv[]) { benchmark::RunSpecifiedBenchmarks(&test_rep); for (auto& rep_test : TestCases) { - std::string msg = std::string("\nTesting ") + rep_test.name + " Output\n"; + std::string msg = + std::string("\nTesting ") + rep_test.name + std::string(" Output\n"); std::string banner(msg.size() - 1, '-'); std::cout << banner << msg << banner << "\n"; @@ -438,13 +441,11 @@ void RunOutputTests(int argc, char* argv[]) { // the checks to subscribees. auto& csv = TestCases[2]; // would use == but gcc spits a warning - CHECK(std::strcmp(csv.name, "CSVReporter") == 0); + BM_CHECK(csv.name == std::string("CSVReporter")); internal::GetResultsChecker().CheckResults(csv.out_stream); } -#ifdef __GNUC__ -#pragma GCC diagnostic pop -#endif +BENCHMARK_RESTORE_DEPRECATED_WARNING int SubstrCnt(const std::string& haystack, const std::string& pat) { if (pat.length() == 0) return 0; @@ -468,9 +469,8 @@ static char RandomHexChar() { static std::string GetRandomFileName() { std::string model = "test.%%%%%%"; - for (auto & ch : model) { - if (ch == '%') - ch = RandomHexChar(); + for (auto& ch : model) { + if (ch == '%') ch = RandomHexChar(); } return model; } @@ -487,8 +487,7 @@ static std::string GetTempFileName() { int retries = 3; while (--retries) { std::string name = GetRandomFileName(); - if (!FileExists(name)) - return name; + if (!FileExists(name)) return name; } std::cerr << "Failed to create unique temporary file name" << std::endl; std::abort(); diff --git a/test/perf_counters_gtest.cc b/test/perf_counters_gtest.cc new file mode 100644 index 0000000..54c7863 --- /dev/null +++ b/test/perf_counters_gtest.cc @@ -0,0 +1,307 @@ +#include <random> +#include <thread> + +#include "../src/perf_counters.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +#ifndef GTEST_SKIP +struct MsgHandler { + void operator=(std::ostream&) {} +}; +#define GTEST_SKIP() return MsgHandler() = std::cout +#endif + +using benchmark::internal::PerfCounters; +using benchmark::internal::PerfCountersMeasurement; +using benchmark::internal::PerfCounterValues; +using ::testing::AllOf; +using ::testing::Gt; +using ::testing::Lt; + +namespace { +const char kGenericPerfEvent1[] = "CYCLES"; +const char kGenericPerfEvent2[] = "INSTRUCTIONS"; + +TEST(PerfCountersTest, Init) { + EXPECT_EQ(PerfCounters::Initialize(), PerfCounters::kSupported); +} + +TEST(PerfCountersTest, OneCounter) { + if (!PerfCounters::kSupported) { + GTEST_SKIP() << "Performance counters not supported.\n"; + } + EXPECT_TRUE(PerfCounters::Initialize()); + EXPECT_EQ(PerfCounters::Create({kGenericPerfEvent1}).num_counters(), 1); +} + +TEST(PerfCountersTest, NegativeTest) { + if (!PerfCounters::kSupported) { + EXPECT_FALSE(PerfCounters::Initialize()); + return; + } + EXPECT_TRUE(PerfCounters::Initialize()); + // Sanity checks + // Create() will always create a valid object, even if passed no or + // wrong arguments as the new behavior is to warn and drop unsupported + // counters + EXPECT_EQ(PerfCounters::Create({}).num_counters(), 0); + EXPECT_EQ(PerfCounters::Create({""}).num_counters(), 0); + EXPECT_EQ(PerfCounters::Create({"not a counter name"}).num_counters(), 0); + { + // Try sneaking in a bad egg to see if it is filtered out. The + // number of counters has to be two, not zero + auto counter = + PerfCounters::Create({kGenericPerfEvent2, "", kGenericPerfEvent1}); + EXPECT_EQ(counter.num_counters(), 2); + EXPECT_EQ(counter.names(), std::vector<std::string>( + {kGenericPerfEvent2, kGenericPerfEvent1})); + } + { + // Try sneaking in an outrageous counter, like a fat finger mistake + auto counter = PerfCounters::Create( + {kGenericPerfEvent2, "not a counter name", kGenericPerfEvent1}); + EXPECT_EQ(counter.num_counters(), 2); + EXPECT_EQ(counter.names(), std::vector<std::string>( + {kGenericPerfEvent2, kGenericPerfEvent1})); + } + { + // Finally try a golden input - it should like both of them + EXPECT_EQ(PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2}) + .num_counters(), + 2); + } + { + // Add a bad apple in the end of the chain to check the edges + auto counter = PerfCounters::Create( + {kGenericPerfEvent1, kGenericPerfEvent2, "bad event name"}); + EXPECT_EQ(counter.num_counters(), 2); + EXPECT_EQ(counter.names(), std::vector<std::string>( + {kGenericPerfEvent1, kGenericPerfEvent2})); + } +} + +TEST(PerfCountersTest, Read1Counter) { + if (!PerfCounters::kSupported) { + GTEST_SKIP() << "Test skipped because libpfm is not supported.\n"; + } + EXPECT_TRUE(PerfCounters::Initialize()); + auto counters = PerfCounters::Create({kGenericPerfEvent1}); + EXPECT_EQ(counters.num_counters(), 1); + PerfCounterValues values1(1); + EXPECT_TRUE(counters.Snapshot(&values1)); + EXPECT_GT(values1[0], 0); + PerfCounterValues values2(1); + EXPECT_TRUE(counters.Snapshot(&values2)); + EXPECT_GT(values2[0], 0); + EXPECT_GT(values2[0], values1[0]); +} + +TEST(PerfCountersTest, Read2Counters) { + if (!PerfCounters::kSupported) { + GTEST_SKIP() << "Test skipped because libpfm is not supported.\n"; + } + EXPECT_TRUE(PerfCounters::Initialize()); + auto counters = + PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2}); + EXPECT_EQ(counters.num_counters(), 2); + PerfCounterValues values1(2); + EXPECT_TRUE(counters.Snapshot(&values1)); + EXPECT_GT(values1[0], 0); + EXPECT_GT(values1[1], 0); + PerfCounterValues values2(2); + EXPECT_TRUE(counters.Snapshot(&values2)); + EXPECT_GT(values2[0], 0); + EXPECT_GT(values2[1], 0); +} + +TEST(PerfCountersTest, ReopenExistingCounters) { + // This test works in recent and old Intel hardware, Pixel 3, and Pixel 6. + // However we cannot make assumptions beyond 2 HW counters due to Pixel 6. + if (!PerfCounters::kSupported) { + GTEST_SKIP() << "Test skipped because libpfm is not supported.\n"; + } + EXPECT_TRUE(PerfCounters::Initialize()); + std::vector<std::string> kMetrics({kGenericPerfEvent1}); + std::vector<PerfCounters> counters(2); + for (auto& counter : counters) { + counter = PerfCounters::Create(kMetrics); + } + PerfCounterValues values(1); + EXPECT_TRUE(counters[0].Snapshot(&values)); + EXPECT_TRUE(counters[1].Snapshot(&values)); +} + +TEST(PerfCountersTest, CreateExistingMeasurements) { + // The test works (i.e. causes read to fail) for the assumptions + // about hardware capabilities (i.e. small number (2) hardware + // counters) at this date, + // the same as previous test ReopenExistingCounters. + if (!PerfCounters::kSupported) { + GTEST_SKIP() << "Test skipped because libpfm is not supported.\n"; + } + EXPECT_TRUE(PerfCounters::Initialize()); + + // This means we will try 10 counters but we can only guarantee + // for sure at this time that only 3 will work. Perhaps in the future + // we could use libpfm to query for the hardware limits on this + // particular platform. + const int kMaxCounters = 10; + const int kMinValidCounters = 2; + + // Let's use a ubiquitous counter that is guaranteed to work + // on all platforms + const std::vector<std::string> kMetrics{"cycles"}; + + // Cannot create a vector of actual objects because the + // copy constructor of PerfCounters is deleted - and so is + // implicitly deleted on PerfCountersMeasurement too + std::vector<std::unique_ptr<PerfCountersMeasurement>> + perf_counter_measurements; + + perf_counter_measurements.reserve(kMaxCounters); + for (int j = 0; j < kMaxCounters; ++j) { + perf_counter_measurements.emplace_back( + new PerfCountersMeasurement(kMetrics)); + } + + std::vector<std::pair<std::string, double>> measurements; + + // Start all counters together to see if they hold + size_t max_counters = kMaxCounters; + for (size_t i = 0; i < kMaxCounters; ++i) { + auto& counter(*perf_counter_measurements[i]); + EXPECT_EQ(counter.num_counters(), 1); + if (!counter.Start()) { + max_counters = i; + break; + }; + } + + ASSERT_GE(max_counters, kMinValidCounters); + + // Start all together + for (size_t i = 0; i < max_counters; ++i) { + auto& counter(*perf_counter_measurements[i]); + EXPECT_TRUE(counter.Stop(measurements) || (i >= kMinValidCounters)); + } + + // Start/stop individually + for (size_t i = 0; i < max_counters; ++i) { + auto& counter(*perf_counter_measurements[i]); + measurements.clear(); + counter.Start(); + EXPECT_TRUE(counter.Stop(measurements) || (i >= kMinValidCounters)); + } +} + +// We try to do some meaningful work here but the compiler +// insists in optimizing away our loop so we had to add a +// no-optimize macro. In case it fails, we added some entropy +// to this pool as well. + +BENCHMARK_DONT_OPTIMIZE size_t do_work() { + static std::mt19937 rd{std::random_device{}()}; + static std::uniform_int_distribution<size_t> mrand(0, 10); + const size_t kNumLoops = 1000000; + size_t sum = 0; + for (size_t j = 0; j < kNumLoops; ++j) { + sum += mrand(rd); + } + benchmark::DoNotOptimize(sum); + return sum; +} + +void measure(size_t threadcount, PerfCounterValues* before, + PerfCounterValues* after) { + BM_CHECK_NE(before, nullptr); + BM_CHECK_NE(after, nullptr); + std::vector<std::thread> threads(threadcount); + auto work = [&]() { BM_CHECK(do_work() > 1000); }; + + // We need to first set up the counters, then start the threads, so the + // threads would inherit the counters. But later, we need to first destroy + // the thread pool (so all the work finishes), then measure the counters. So + // the scopes overlap, and we need to explicitly control the scope of the + // threadpool. + auto counters = + PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2}); + for (auto& t : threads) t = std::thread(work); + counters.Snapshot(before); + for (auto& t : threads) t.join(); + counters.Snapshot(after); +} + +TEST(PerfCountersTest, MultiThreaded) { + if (!PerfCounters::kSupported) { + GTEST_SKIP() << "Test skipped because libpfm is not supported."; + } + EXPECT_TRUE(PerfCounters::Initialize()); + PerfCounterValues before(2); + PerfCounterValues after(2); + + // Notice that this test will work even if we taskset it to a single CPU + // In this case the threads will run sequentially + // Start two threads and measure the number of combined cycles and + // instructions + measure(2, &before, &after); + std::vector<double> Elapsed2Threads{ + static_cast<double>(after[0] - before[0]), + static_cast<double>(after[1] - before[1])}; + + // Start four threads and measure the number of combined cycles and + // instructions + measure(4, &before, &after); + std::vector<double> Elapsed4Threads{ + static_cast<double>(after[0] - before[0]), + static_cast<double>(after[1] - before[1])}; + + // The following expectations fail (at least on a beefy workstation with lots + // of cpus) - it seems that in some circumstances the runtime of 4 threads + // can even be better than with 2. + // So instead of expecting 4 threads to be slower, let's just make sure they + // do not differ too much in general (one is not more than 10x than the + // other). + EXPECT_THAT(Elapsed4Threads[0] / Elapsed2Threads[0], AllOf(Gt(0.1), Lt(10))); + EXPECT_THAT(Elapsed4Threads[1] / Elapsed2Threads[1], AllOf(Gt(0.1), Lt(10))); +} + +TEST(PerfCountersTest, HardwareLimits) { + // The test works (i.e. causes read to fail) for the assumptions + // about hardware capabilities (i.e. small number (3-4) hardware + // counters) at this date, + // the same as previous test ReopenExistingCounters. + if (!PerfCounters::kSupported) { + GTEST_SKIP() << "Test skipped because libpfm is not supported.\n"; + } + EXPECT_TRUE(PerfCounters::Initialize()); + + // Taken from `perf list`, but focusses only on those HW events that actually + // were reported when running `sudo perf stat -a sleep 10`, intersected over + // several platforms. All HW events listed in the first command not reported + // in the second seem to not work. This is sad as we don't really get to test + // the grouping here (groups can contain up to 6 members)... + std::vector<std::string> counter_names{ + "cycles", // leader + "instructions", // + "branch-misses", // + }; + + // In the off-chance that some of these values are not supported, + // we filter them out so the test will complete without failure + // albeit it might not actually test the grouping on that platform + std::vector<std::string> valid_names; + for (const std::string& name : counter_names) { + if (PerfCounters::IsCounterSupported(name)) { + valid_names.push_back(name); + } + } + PerfCountersMeasurement counter(valid_names); + + std::vector<std::pair<std::string, double>> measurements; + + counter.Start(); + EXPECT_TRUE(counter.Stop(measurements)); +} + +} // namespace diff --git a/test/perf_counters_test.cc b/test/perf_counters_test.cc new file mode 100644 index 0000000..b0a3ab0 --- /dev/null +++ b/test/perf_counters_test.cc @@ -0,0 +1,92 @@ +#include <cstdarg> +#undef NDEBUG + +#include "../src/commandlineflags.h" +#include "../src/perf_counters.h" +#include "benchmark/benchmark.h" +#include "output_test.h" + +namespace benchmark { + +BM_DECLARE_string(benchmark_perf_counters); + +} // namespace benchmark + +static void BM_Simple(benchmark::State& state) { + for (auto _ : state) { + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); + } +} +BENCHMARK(BM_Simple); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Simple\",$"}}); + +const int kIters = 1000000; + +void BM_WithoutPauseResume(benchmark::State& state) { + int n = 0; + + for (auto _ : state) { + for (auto i = 0; i < kIters; ++i) { + n = 1 - n; + benchmark::DoNotOptimize(n); + } + } +} + +BENCHMARK(BM_WithoutPauseResume); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_WithoutPauseResume\",$"}}); + +void BM_WithPauseResume(benchmark::State& state) { + int m = 0, n = 0; + + for (auto _ : state) { + for (auto i = 0; i < kIters; ++i) { + n = 1 - n; + benchmark::DoNotOptimize(n); + } + + state.PauseTiming(); + for (auto j = 0; j < kIters; ++j) { + m = 1 - m; + benchmark::DoNotOptimize(m); + } + state.ResumeTiming(); + } +} + +BENCHMARK(BM_WithPauseResume); + +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_WithPauseResume\",$"}}); + +static void CheckSimple(Results const& e) { + CHECK_COUNTER_VALUE(e, double, "CYCLES", GT, 0); +} + +double withoutPauseResumeInstrCount = 0.0; +double withPauseResumeInstrCount = 0.0; + +static void SaveInstrCountWithoutResume(Results const& e) { + withoutPauseResumeInstrCount = e.GetAs<double>("INSTRUCTIONS"); +} + +static void SaveInstrCountWithResume(Results const& e) { + withPauseResumeInstrCount = e.GetAs<double>("INSTRUCTIONS"); +} + +CHECK_BENCHMARK_RESULTS("BM_Simple", &CheckSimple); +CHECK_BENCHMARK_RESULTS("BM_WithoutPauseResume", &SaveInstrCountWithoutResume); +CHECK_BENCHMARK_RESULTS("BM_WithPauseResume", &SaveInstrCountWithResume); + +int main(int argc, char* argv[]) { + if (!benchmark::internal::PerfCounters::kSupported) { + return 0; + } + benchmark::FLAGS_benchmark_perf_counters = "CYCLES,INSTRUCTIONS"; + benchmark::internal::PerfCounters::Initialize(); + RunOutputTests(argc, argv); + + BM_CHECK_GT(withPauseResumeInstrCount, kIters); + BM_CHECK_GT(withoutPauseResumeInstrCount, kIters); + BM_CHECK_LT(withPauseResumeInstrCount, 1.5 * withoutPauseResumeInstrCount); +} diff --git a/test/register_benchmark_test.cc b/test/register_benchmark_test.cc index 3ac5b21..d69d144 100644 --- a/test/register_benchmark_test.cc +++ b/test/register_benchmark_test.cc @@ -10,7 +10,7 @@ namespace { class TestReporter : public benchmark::ConsoleReporter { public: - virtual void ReportRuns(const std::vector<Run>& report) { + void ReportRuns(const std::vector<Run>& report) override { all_runs_.insert(all_runs_.end(), begin(report), end(report)); ConsoleReporter::ReportRuns(report); } @@ -19,24 +19,24 @@ class TestReporter : public benchmark::ConsoleReporter { }; struct TestCase { - std::string name; - const char* label; + const std::string name; + const std::string label; // Note: not explicit as we rely on it being converted through ADD_CASES. - TestCase(const char* xname) : TestCase(xname, nullptr) {} - TestCase(const char* xname, const char* xlabel) + TestCase(const std::string& xname) : TestCase(xname, "") {} + TestCase(const std::string& xname, const std::string& xlabel) : name(xname), label(xlabel) {} typedef benchmark::BenchmarkReporter::Run Run; void CheckRun(Run const& run) const { // clang-format off - CHECK(name == run.benchmark_name()) << "expected " << name << " got " + BM_CHECK(name == run.benchmark_name()) << "expected " << name << " got " << run.benchmark_name(); - if (label) { - CHECK(run.report_label == label) << "expected " << label << " got " + if (!label.empty()) { + BM_CHECK(run.report_label == label) << "expected " << label << " got " << run.report_label; } else { - CHECK(run.report_label == ""); + BM_CHECK(run.report_label.empty()); } // clang-format on } @@ -45,7 +45,7 @@ struct TestCase { std::vector<TestCase> ExpectedResults; int AddCases(std::initializer_list<TestCase> const& v) { - for (auto N : v) { + for (const auto& N : v) { ExpectedResults.push_back(N); } return 0; @@ -96,6 +96,18 @@ ADD_CASES({"test1", "One"}, {"test2", "Two"}, {"test3", "Three"}); #endif // BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK //----------------------------------------------------------------------------// +// Test RegisterBenchmark with DISABLED_ benchmark +//----------------------------------------------------------------------------// +void DISABLED_BM_function(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(DISABLED_BM_function); +ReturnVal dummy3 = benchmark::RegisterBenchmark("DISABLED_BM_function_manual", + DISABLED_BM_function); +// No need to add cases because we don't expect them to run. + +//----------------------------------------------------------------------------// // Test RegisterBenchmark with different callable types //----------------------------------------------------------------------------// @@ -111,7 +123,7 @@ void TestRegistrationAtRuntime() { { CustomFixture fx; benchmark::RegisterBenchmark("custom_fixture", fx); - AddCases({"custom_fixture"}); + AddCases({std::string("custom_fixture")}); } #endif #ifndef BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK diff --git a/test/repetitions_test.cc b/test/repetitions_test.cc new file mode 100644 index 0000000..569777d --- /dev/null +++ b/test/repetitions_test.cc @@ -0,0 +1,214 @@ + +#include "benchmark/benchmark.h" +#include "output_test.h" + +// ========================================================================= // +// ------------------------ Testing Basic Output --------------------------- // +// ========================================================================= // + +static void BM_ExplicitRepetitions(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_ExplicitRepetitions)->Repetitions(2); + +ADD_CASES(TC_ConsoleOut, + {{"^BM_ExplicitRepetitions/repeats:2 %console_report$"}}); +ADD_CASES(TC_ConsoleOut, + {{"^BM_ExplicitRepetitions/repeats:2 %console_report$"}}); +ADD_CASES(TC_ConsoleOut, + {{"^BM_ExplicitRepetitions/repeats:2_mean %console_report$"}}); +ADD_CASES(TC_ConsoleOut, + {{"^BM_ExplicitRepetitions/repeats:2_median %console_report$"}}); +ADD_CASES(TC_ConsoleOut, + {{"^BM_ExplicitRepetitions/repeats:2_stddev %console_report$"}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_ExplicitRepetitions/repeats:2\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_ExplicitRepetitions/repeats:2\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_ExplicitRepetitions/repeats:2\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_ExplicitRepetitions/repeats:2\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"repetition_index\": 1,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_ExplicitRepetitions/repeats:2_mean\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_ExplicitRepetitions/repeats:2\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_ExplicitRepetitions/repeats:2_median\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_ExplicitRepetitions/repeats:2\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_ExplicitRepetitions/repeats:2_stddev\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_ExplicitRepetitions/repeats:2\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_ExplicitRepetitions/repeats:2\",%csv_report$"}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_ExplicitRepetitions/repeats:2\",%csv_report$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_ExplicitRepetitions/repeats:2_mean\",%csv_report$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_ExplicitRepetitions/repeats:2_median\",%csv_report$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_ExplicitRepetitions/repeats:2_stddev\",%csv_report$"}}); + +// ========================================================================= // +// ------------------------ Testing Basic Output --------------------------- // +// ========================================================================= // + +static void BM_ImplicitRepetitions(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_ImplicitRepetitions); + +ADD_CASES(TC_ConsoleOut, {{"^BM_ImplicitRepetitions %console_report$"}}); +ADD_CASES(TC_ConsoleOut, {{"^BM_ImplicitRepetitions %console_report$"}}); +ADD_CASES(TC_ConsoleOut, {{"^BM_ImplicitRepetitions %console_report$"}}); +ADD_CASES(TC_ConsoleOut, {{"^BM_ImplicitRepetitions_mean %console_report$"}}); +ADD_CASES(TC_ConsoleOut, {{"^BM_ImplicitRepetitions_median %console_report$"}}); +ADD_CASES(TC_ConsoleOut, {{"^BM_ImplicitRepetitions_stddev %console_report$"}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions\",$"}, + {"\"family_index\": 1,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_ImplicitRepetitions\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions\",$"}, + {"\"family_index\": 1,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_ImplicitRepetitions\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"repetition_index\": 1,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions\",$"}, + {"\"family_index\": 1,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_ImplicitRepetitions\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"repetition_index\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions_mean\",$"}, + {"\"family_index\": 1,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_ImplicitRepetitions\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions_median\",$"}, + {"\"family_index\": 1,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_ImplicitRepetitions\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions_stddev\",$"}, + {"\"family_index\": 1,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_ImplicitRepetitions\",$", MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_ImplicitRepetitions\",%csv_report$"}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_ImplicitRepetitions\",%csv_report$"}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_ImplicitRepetitions_mean\",%csv_report$"}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_ImplicitRepetitions_median\",%csv_report$"}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_ImplicitRepetitions_stddev\",%csv_report$"}}); + +// ========================================================================= // +// --------------------------- TEST CASES END ------------------------------ // +// ========================================================================= // + +int main(int argc, char* argv[]) { RunOutputTests(argc, argv); } diff --git a/test/report_aggregates_only_test.cc b/test/report_aggregates_only_test.cc index 9646b9b..47da503 100644 --- a/test/report_aggregates_only_test.cc +++ b/test/report_aggregates_only_test.cc @@ -19,17 +19,19 @@ BENCHMARK(BM_SummaryRepeat)->Repetitions(3)->ReportAggregatesOnly(); int main(int argc, char* argv[]) { const std::string output = GetFileReporterOutput(argc, argv); - if (SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3") != 3 || + if (SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3") != 4 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_mean\"") != 1 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_median\"") != 1 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\"") != - 1) { - std::cout << "Precondition mismatch. Expected to only find three " + 1 || + SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_cv\"") != 1) { + std::cout << "Precondition mismatch. Expected to only find four " "occurrences of \"BM_SummaryRepeat/repeats:3\" substring:\n" "\"name\": \"BM_SummaryRepeat/repeats:3_mean\", " "\"name\": \"BM_SummaryRepeat/repeats:3_median\", " - "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\"\nThe entire " + "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\", " + "\"name\": \"BM_SummaryRepeat/repeats:3_cv\"\nThe entire " "output:\n"; std::cout << output; return 1; diff --git a/test/reporter_output_test.cc b/test/reporter_output_test.cc index d24a57d..2eb545a 100644 --- a/test/reporter_output_test.cc +++ b/test/reporter_output_test.cc @@ -1,5 +1,6 @@ #undef NDEBUG +#include <numeric> #include <utility> #include "benchmark/benchmark.h" @@ -16,7 +17,7 @@ static int AddContextCases() { AddCases(TC_ConsoleErr, { {"^%int-%int-%intT%int:%int:%int[-+]%int:%int$", MR_Default}, - {"Running .*/reporter_output_test(\\.exe)?$", MR_Next}, + {"Running .*(/|\\\\)reporter_output_test(\\.exe)?$", MR_Next}, {"Run on \\(%int X %float MHz CPU s?\\)", MR_Next}, }); AddCases(TC_JSONOut, @@ -71,9 +72,11 @@ BENCHMARK(BM_basic); ADD_CASES(TC_ConsoleOut, {{"^BM_basic %console_report$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_basic\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_basic\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -90,7 +93,8 @@ ADD_CASES(TC_CSVOut, {{"^\"BM_basic\",%csv_report$"}}); void BM_bytes_per_second(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero - benchmark::DoNotOptimize(state.iterations()); + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } state.SetBytesProcessed(1); } @@ -99,9 +103,11 @@ BENCHMARK(BM_bytes_per_second); ADD_CASES(TC_ConsoleOut, {{"^BM_bytes_per_second %console_report " "bytes_per_second=%float[kM]{0,1}/s$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_bytes_per_second\",$"}, + {"\"family_index\": 1,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_bytes_per_second\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -119,7 +125,8 @@ ADD_CASES(TC_CSVOut, {{"^\"BM_bytes_per_second\",%csv_bytes_report$"}}); void BM_items_per_second(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero - benchmark::DoNotOptimize(state.iterations()); + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } state.SetItemsProcessed(1); } @@ -128,9 +135,11 @@ BENCHMARK(BM_items_per_second); ADD_CASES(TC_ConsoleOut, {{"^BM_items_per_second %console_report " "items_per_second=%float[kM]{0,1}/s$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_items_per_second\",$"}, + {"\"family_index\": 2,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_items_per_second\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -154,9 +163,11 @@ BENCHMARK(BM_label); ADD_CASES(TC_ConsoleOut, {{"^BM_label %console_report some label$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_label\",$"}, + {"\"family_index\": 3,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_label\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -181,9 +192,11 @@ BENCHMARK(BM_time_label_nanosecond)->Unit(benchmark::kNanosecond); ADD_CASES(TC_ConsoleOut, {{"^BM_time_label_nanosecond %console_report$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_time_label_nanosecond\",$"}, + {"\"family_index\": 4,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_time_label_nanosecond\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -202,9 +215,11 @@ BENCHMARK(BM_time_label_microsecond)->Unit(benchmark::kMicrosecond); ADD_CASES(TC_ConsoleOut, {{"^BM_time_label_microsecond %console_us_report$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_time_label_microsecond\",$"}, + {"\"family_index\": 5,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_time_label_microsecond\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -223,9 +238,11 @@ BENCHMARK(BM_time_label_millisecond)->Unit(benchmark::kMillisecond); ADD_CASES(TC_ConsoleOut, {{"^BM_time_label_millisecond %console_ms_report$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_time_label_millisecond\",$"}, + {"\"family_index\": 6,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_time_label_millisecond\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -243,9 +260,11 @@ BENCHMARK(BM_time_label_second)->Unit(benchmark::kSecond); ADD_CASES(TC_ConsoleOut, {{"^BM_time_label_second %console_s_report$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_time_label_second\",$"}, + {"\"family_index\": 7,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_time_label_second\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -267,9 +286,11 @@ void BM_error(benchmark::State& state) { BENCHMARK(BM_error); ADD_CASES(TC_ConsoleOut, {{"^BM_error[ ]+ERROR OCCURRED: 'message'$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_error\",$"}, + {"\"family_index\": 8,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_error\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"error_occurred\": true,$", MR_Next}, @@ -289,15 +310,17 @@ void BM_no_arg_name(benchmark::State& state) { BENCHMARK(BM_no_arg_name)->Arg(3); ADD_CASES(TC_ConsoleOut, {{"^BM_no_arg_name/3 %console_report$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_no_arg_name/3\",$"}, + {"\"family_index\": 9,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_no_arg_name/3\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_no_arg_name/3\",%csv_report$"}}); // ========================================================================= // -// ------------------------ Testing Arg Name Output ----------------------- // +// ------------------------ Testing Arg Name Output ------------------------ // // ========================================================================= // void BM_arg_name(benchmark::State& state) { @@ -307,9 +330,11 @@ void BM_arg_name(benchmark::State& state) { BENCHMARK(BM_arg_name)->ArgName("first")->Arg(3); ADD_CASES(TC_ConsoleOut, {{"^BM_arg_name/first:3 %console_report$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_arg_name/first:3\",$"}, + {"\"family_index\": 10,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_arg_name/first:3\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_arg_name/first:3\",%csv_report$"}}); @@ -327,14 +352,42 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_arg_names/first:2/5/third:4 %console_report$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_arg_names/first:2/5/third:4\",$"}, + {"\"family_index\": 11,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_arg_names/first:2/5/third:4\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_arg_names/first:2/5/third:4\",%csv_report$"}}); // ========================================================================= // +// ------------------------ Testing Name Output ---------------------------- // +// ========================================================================= // + +void BM_name(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_name)->Name("BM_custom_name"); + +ADD_CASES(TC_ConsoleOut, {{"^BM_custom_name %console_report$"}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_custom_name\",$"}, + {"\"family_index\": 12,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_custom_name\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_custom_name\",%csv_report$"}}); + +// ========================================================================= // // ------------------------ Testing Big Args Output ------------------------ // // ========================================================================= // @@ -353,7 +406,8 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_BigArgs/1073741824 %console_report$"}, void BM_Complexity_O1(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero - benchmark::DoNotOptimize(state.iterations()); + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } state.SetComplexityN(state.range(0)); } @@ -381,37 +435,50 @@ ADD_CASES(TC_ConsoleOut, {"^BM_Repeat/repeats:2_median %console_time_only_report [ ]*2$"}, {"^BM_Repeat/repeats:2_stddev %console_time_only_report [ ]*2$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Repeat/repeats:2\",$"}, + {"\"family_index\": 15,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:2\"", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:2\",$"}, + {"\"family_index\": 15,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:2\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"repetition_index\": 1,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:2_mean\",$"}, + {"\"family_index\": 15,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:2\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 2,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:2_median\",$"}, + {"\"family_index\": 15,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:2\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 2,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:2_stddev\",$"}, + {"\"family_index\": 15,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:2\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 2,$", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Repeat/repeats:2\",%csv_report$"}, {"^\"BM_Repeat/repeats:2\",%csv_report$"}, @@ -428,43 +495,58 @@ ADD_CASES(TC_ConsoleOut, {"^BM_Repeat/repeats:3_median %console_time_only_report [ ]*3$"}, {"^BM_Repeat/repeats:3_stddev %console_time_only_report [ ]*3$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Repeat/repeats:3\",$"}, + {"\"family_index\": 16,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:3\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:3\",$"}, + {"\"family_index\": 16,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:3\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"repetition_index\": 1,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:3\",$"}, + {"\"family_index\": 16,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:3\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"repetition_index\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:3_mean\",$"}, + {"\"family_index\": 16,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:3\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:3_median\",$"}, + {"\"family_index\": 16,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:3\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:3_stddev\",$"}, + {"\"family_index\": 16,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:3\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Repeat/repeats:3\",%csv_report$"}, {"^\"BM_Repeat/repeats:3\",%csv_report$"}, @@ -483,49 +565,66 @@ ADD_CASES(TC_ConsoleOut, {"^BM_Repeat/repeats:4_median %console_time_only_report [ ]*4$"}, {"^BM_Repeat/repeats:4_stddev %console_time_only_report [ ]*4$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Repeat/repeats:4\",$"}, + {"\"family_index\": 17,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 4,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:4\",$"}, + {"\"family_index\": 17,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 4,$", MR_Next}, {"\"repetition_index\": 1,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:4\",$"}, + {"\"family_index\": 17,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 4,$", MR_Next}, {"\"repetition_index\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:4\",$"}, + {"\"family_index\": 17,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 4,$", MR_Next}, {"\"repetition_index\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:4_mean\",$"}, + {"\"family_index\": 17,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 4,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 4,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:4_median\",$"}, + {"\"family_index\": 17,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 4,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 4,$", MR_Next}, {"\"name\": \"BM_Repeat/repeats:4_stddev\",$"}, + {"\"family_index\": 17,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 4,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 4,$", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Repeat/repeats:4\",%csv_report$"}, {"^\"BM_Repeat/repeats:4\",%csv_report$"}, @@ -544,6 +643,8 @@ void BM_RepeatOnce(benchmark::State& state) { BENCHMARK(BM_RepeatOnce)->Repetitions(1)->ReportAggregatesOnly(); ADD_CASES(TC_ConsoleOut, {{"^BM_RepeatOnce/repeats:1 %console_report$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_RepeatOnce/repeats:1\",$"}, + {"\"family_index\": 18,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_RepeatOnce/repeats:1\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 1,$", MR_Next}, @@ -566,25 +667,34 @@ ADD_CASES( ADD_CASES(TC_JSONOut, {{".*BM_SummaryRepeat/repeats:3 ", MR_Not}, {"\"name\": \"BM_SummaryRepeat/repeats:3_mean\",$"}, + {"\"family_index\": 19,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_SummaryRepeat/repeats:3\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"name\": \"BM_SummaryRepeat/repeats:3_median\",$"}, + {"\"family_index\": 19,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_SummaryRepeat/repeats:3\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"name\": \"BM_SummaryRepeat/repeats:3_stddev\",$"}, + {"\"family_index\": 19,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_SummaryRepeat/repeats:3\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}}); ADD_CASES(TC_CSVOut, {{".*BM_SummaryRepeat/repeats:3 ", MR_Not}, {"^\"BM_SummaryRepeat/repeats:3_mean\",%csv_report$"}, @@ -608,25 +718,34 @@ ADD_CASES( ADD_CASES(TC_JSONOut, {{".*BM_SummaryDisplay/repeats:2 ", MR_Not}, {"\"name\": \"BM_SummaryDisplay/repeats:2_mean\",$"}, + {"\"family_index\": 20,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_SummaryDisplay/repeats:2\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 2,$", MR_Next}, {"\"name\": \"BM_SummaryDisplay/repeats:2_median\",$"}, + {"\"family_index\": 20,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_SummaryDisplay/repeats:2\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 2,$", MR_Next}, {"\"name\": \"BM_SummaryDisplay/repeats:2_stddev\",$"}, + {"\"family_index\": 20,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_SummaryDisplay/repeats:2\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 2,$", MR_Next}}); ADD_CASES(TC_CSVOut, {{".*BM_SummaryDisplay/repeats:2 ", MR_Not}, @@ -654,27 +773,36 @@ ADD_CASES( ADD_CASES(TC_JSONOut, {{".*BM_RepeatTimeUnit/repeats:3 ", MR_Not}, {"\"name\": \"BM_RepeatTimeUnit/repeats:3_mean\",$"}, + {"\"family_index\": 21,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_RepeatTimeUnit/repeats:3\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"time_unit\": \"us\",?$"}, {"\"name\": \"BM_RepeatTimeUnit/repeats:3_median\",$"}, + {"\"family_index\": 21,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_RepeatTimeUnit/repeats:3\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"time_unit\": \"us\",?$"}, {"\"name\": \"BM_RepeatTimeUnit/repeats:3_stddev\",$"}, + {"\"family_index\": 21,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_RepeatTimeUnit/repeats:3\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"time_unit\": \"us\",?$"}}); ADD_CASES(TC_CSVOut, @@ -722,6 +850,8 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_UserStats/iterations:5/repeats:3/manual_time [ " ADD_CASES( TC_JSONOut, {{"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$"}, + {"\"family_index\": 22,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, @@ -731,6 +861,8 @@ ADD_CASES( {"\"iterations\": 5,$", MR_Next}, {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, {"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$"}, + {"\"family_index\": 22,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, @@ -740,6 +872,8 @@ ADD_CASES( {"\"iterations\": 5,$", MR_Next}, {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, {"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$"}, + {"\"family_index\": 22,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, @@ -749,39 +883,51 @@ ADD_CASES( {"\"iterations\": 5,$", MR_Next}, {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, {"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time_mean\",$"}, + {"\"family_index\": 22,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, {"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time_median\",$"}, + {"\"family_index\": 22,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, {"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time_stddev\",$"}, + {"\"family_index\": 22,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time_\",$"}, + {"\"family_index\": 22,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 3,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 3,$", MR_Next}, {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}}); ADD_CASES( @@ -797,6 +943,154 @@ ADD_CASES( {"^\"BM_UserStats/iterations:5/repeats:3/manual_time_\",%csv_report$"}}); // ========================================================================= // +// ------------- Testing relative standard deviation statistics ------------ // +// ========================================================================= // + +const auto UserPercentStatistics = [](const std::vector<double>&) { + return 1. / 100.; +}; +void BM_UserPercentStats(benchmark::State& state) { + for (auto _ : state) { + state.SetIterationTime(150 / 10e8); + } +} +// clang-format off +BENCHMARK(BM_UserPercentStats) + ->Repetitions(3) + ->Iterations(5) + ->UseManualTime() + ->Unit(benchmark::TimeUnit::kNanosecond) + ->ComputeStatistics("", UserPercentStatistics, benchmark::StatisticUnit::kPercentage); +// clang-format on + +// check that UserPercent-provided stats is calculated, and is after the +// default-ones empty string as name is intentional, it would sort before +// anything else +ADD_CASES(TC_ConsoleOut, + {{"^BM_UserPercentStats/iterations:5/repeats:3/manual_time [ " + "]* 150 ns %time [ ]*5$"}, + {"^BM_UserPercentStats/iterations:5/repeats:3/manual_time [ " + "]* 150 ns %time [ ]*5$"}, + {"^BM_UserPercentStats/iterations:5/repeats:3/manual_time [ " + "]* 150 ns %time [ ]*5$"}, + {"^BM_UserPercentStats/iterations:5/repeats:3/" + "manual_time_mean [ ]* 150 ns %time [ ]*3$"}, + {"^BM_UserPercentStats/iterations:5/repeats:3/" + "manual_time_median [ ]* 150 ns %time [ ]*3$"}, + {"^BM_UserPercentStats/iterations:5/repeats:3/" + "manual_time_stddev [ ]* 0.000 ns %time [ ]*3$"}, + {"^BM_UserPercentStats/iterations:5/repeats:3/manual_time_ " + "[ ]* 1.00 % [ ]* 1.00 %[ ]*3$"}}); +ADD_CASES( + TC_JSONOut, + {{"\"name\": \"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$"}, + {"\"family_index\": 23,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": " + "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$", + MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": 5,$", MR_Next}, + {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, + {"\"name\": \"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$"}, + {"\"family_index\": 23,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": " + "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$", + MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"repetition_index\": 1,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": 5,$", MR_Next}, + {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, + {"\"name\": \"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$"}, + {"\"family_index\": 23,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": " + "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$", + MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"repetition_index\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": 5,$", MR_Next}, + {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, + {"\"name\": " + "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time_mean\",$"}, + {"\"family_index\": 23,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": " + "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, + {"\"iterations\": 3,$", MR_Next}, + {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, + {"\"name\": " + "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time_median\",$"}, + {"\"family_index\": 23,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": " + "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, + {"\"iterations\": 3,$", MR_Next}, + {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}, + {"\"name\": " + "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time_stddev\",$"}, + {"\"family_index\": 23,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": " + "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, + {"\"iterations\": 3,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"name\": " + "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time_\",$"}, + {"\"family_index\": 23,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": " + "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 3,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"\",$", MR_Next}, + {"\"aggregate_unit\": \"percentage\",$", MR_Next}, + {"\"iterations\": 3,$", MR_Next}, + {"\"real_time\": 1\\.(0)*e-(0)*2,$", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_UserPercentStats/iterations:5/repeats:3/" + "manual_time\",%csv_report$"}, + {"^\"BM_UserPercentStats/iterations:5/repeats:3/" + "manual_time\",%csv_report$"}, + {"^\"BM_UserPercentStats/iterations:5/repeats:3/" + "manual_time\",%csv_report$"}, + {"^\"BM_UserPercentStats/iterations:5/repeats:3/" + "manual_time_mean\",%csv_report$"}, + {"^\"BM_UserPercentStats/iterations:5/repeats:3/" + "manual_time_median\",%csv_report$"}, + {"^\"BM_UserPercentStats/iterations:5/repeats:3/" + "manual_time_stddev\",%csv_report$"}, + {"^\"BM_UserPercentStats/iterations:5/repeats:3/" + "manual_time_\",%csv_report$"}}); + +// ========================================================================= // // ------------------------- Testing StrEscape JSON ------------------------ // // ========================================================================= // #if 0 // enable when csv testing code correctly handles multi-line fields @@ -807,9 +1101,11 @@ void BM_JSON_Format(benchmark::State& state) { } BENCHMARK(BM_JSON_Format); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_JSON_Format\",$"}, + {"\"family_index\": 23,$", MR_Next}, +{"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_JSON_Format\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"error_occurred\": true,$", MR_Next}, diff --git a/test/skip_with_error_test.cc b/test/skip_with_error_test.cc index 97a2e3c..b4c5e15 100644 --- a/test/skip_with_error_test.cc +++ b/test/skip_with_error_test.cc @@ -10,17 +10,17 @@ namespace { class TestReporter : public benchmark::ConsoleReporter { public: - virtual bool ReportContext(const Context& context) { + bool ReportContext(const Context& context) override { return ConsoleReporter::ReportContext(context); }; - virtual void ReportRuns(const std::vector<Run>& report) { + void ReportRuns(const std::vector<Run>& report) override { all_runs_.insert(all_runs_.end(), begin(report), end(report)); ConsoleReporter::ReportRuns(report); } TestReporter() {} - virtual ~TestReporter() {} + ~TestReporter() override {} mutable std::vector<Run> all_runs_; }; @@ -33,21 +33,23 @@ struct TestCase { typedef benchmark::BenchmarkReporter::Run Run; void CheckRun(Run const& run) const { - CHECK(name == run.benchmark_name()) + BM_CHECK(name == run.benchmark_name()) << "expected " << name << " got " << run.benchmark_name(); - CHECK(error_occurred == run.error_occurred); - CHECK(error_message == run.error_message); + BM_CHECK_EQ(error_occurred, + benchmark::internal::SkippedWithError == run.skipped); + BM_CHECK(error_message == run.skip_message); if (error_occurred) { - // CHECK(run.iterations == 0); + // BM_CHECK(run.iterations == 0); } else { - CHECK(run.iterations != 0); + BM_CHECK(run.iterations != 0); } } }; std::vector<TestCase> ExpectedResults; -int AddCases(const char* base_name, std::initializer_list<TestCase> const& v) { +int AddCases(const std::string& base_name, + std::initializer_list<TestCase> const& v) { for (auto TC : v) { TC.name = base_name + TC.name; ExpectedResults.push_back(std::move(TC)); @@ -97,7 +99,7 @@ ADD_CASES("BM_error_before_running_range_for", {{"", true, "error message"}}); void BM_error_during_running(benchmark::State& state) { int first_iter = true; while (state.KeepRunning()) { - if (state.range(0) == 1 && state.thread_index <= (state.threads / 2)) { + if (state.range(0) == 1 && state.thread_index() <= (state.threads() / 2)) { assert(first_iter); first_iter = false; state.SkipWithError("error message"); @@ -119,12 +121,13 @@ ADD_CASES("BM_error_during_running", {{"/1/threads:1", true, "error message"}, void BM_error_during_running_ranged_for(benchmark::State& state) { assert(state.max_iterations > 3 && "test requires at least a few iterations"); - int first_iter = true; + bool first_iter = true; // NOTE: Users should not write the for loop explicitly. for (auto It = state.begin(), End = state.end(); It != End; ++It) { if (state.range(0) == 1) { assert(first_iter); first_iter = false; + (void)first_iter; state.SkipWithError("error message"); // Test the unfortunate but documented behavior that the ranged-for loop // doesn't automatically terminate when SkipWithError is set. @@ -140,9 +143,10 @@ ADD_CASES("BM_error_during_running_ranged_for", void BM_error_after_running(benchmark::State& state) { for (auto _ : state) { - benchmark::DoNotOptimize(state.iterations()); + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } - if (state.thread_index <= (state.threads / 2)) + if (state.thread_index() <= (state.threads() / 2)) state.SkipWithError("error message"); } BENCHMARK(BM_error_after_running)->ThreadRange(1, 8); @@ -154,7 +158,7 @@ ADD_CASES("BM_error_after_running", {{"/threads:1", true, "error message"}, void BM_error_while_paused(benchmark::State& state) { bool first_iter = true; while (state.KeepRunning()) { - if (state.range(0) == 1 && state.thread_index <= (state.threads / 2)) { + if (state.range(0) == 1 && state.thread_index() <= (state.threads() / 2)) { assert(first_iter); first_iter = false; state.PauseTiming(); diff --git a/test/spec_arg_test.cc b/test/spec_arg_test.cc new file mode 100644 index 0000000..06aafbe --- /dev/null +++ b/test/spec_arg_test.cc @@ -0,0 +1,105 @@ +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstdlib> +#include <cstring> +#include <iostream> +#include <limits> +#include <string> +#include <vector> + +#include "benchmark/benchmark.h" + +// Tests that we can override benchmark-spec value from FLAGS_benchmark_filter +// with argument to RunSpecifiedBenchmarks(...). + +namespace { + +class TestReporter : public benchmark::ConsoleReporter { + public: + bool ReportContext(const Context& context) override { + return ConsoleReporter::ReportContext(context); + }; + + void ReportRuns(const std::vector<Run>& report) override { + assert(report.size() == 1); + matched_functions.push_back(report[0].run_name.function_name); + ConsoleReporter::ReportRuns(report); + }; + + TestReporter() {} + + ~TestReporter() override {} + + const std::vector<std::string>& GetMatchedFunctions() const { + return matched_functions; + } + + private: + std::vector<std::string> matched_functions; +}; + +} // end namespace + +static void BM_NotChosen(benchmark::State& state) { + assert(false && "SHOULD NOT BE CALLED"); + for (auto _ : state) { + } +} +BENCHMARK(BM_NotChosen); + +static void BM_Chosen(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_Chosen); + +int main(int argc, char** argv) { + const std::string flag = "BM_NotChosen"; + + // Verify that argv specify --benchmark_filter=BM_NotChosen. + bool found = false; + for (int i = 0; i < argc; ++i) { + if (strcmp("--benchmark_filter=BM_NotChosen", argv[i]) == 0) { + found = true; + break; + } + } + assert(found); + + benchmark::Initialize(&argc, argv); + + // Check that the current flag value is reported accurately via the + // GetBenchmarkFilter() function. + if (flag != benchmark::GetBenchmarkFilter()) { + std::cerr + << "Seeing different value for flags. GetBenchmarkFilter() returns [" + << benchmark::GetBenchmarkFilter() << "] expected flag=[" << flag + << "]\n"; + return 1; + } + TestReporter test_reporter; + const char* const spec = "BM_Chosen"; + const size_t returned_count = + benchmark::RunSpecifiedBenchmarks(&test_reporter, spec); + assert(returned_count == 1); + const std::vector<std::string> matched_functions = + test_reporter.GetMatchedFunctions(); + assert(matched_functions.size() == 1); + if (strcmp(spec, matched_functions.front().c_str()) != 0) { + std::cerr << "Expected benchmark [" << spec << "] to run, but got [" + << matched_functions.front() << "]\n"; + return 2; + } + + // Test that SetBenchmarkFilter works. + const std::string golden_value = "golden_value"; + benchmark::SetBenchmarkFilter(golden_value); + std::string current_value = benchmark::GetBenchmarkFilter(); + if (golden_value != current_value) { + std::cerr << "Expected [" << golden_value + << "] for --benchmark_filter but got [" << current_value << "]\n"; + return 3; + } + return 0; +} diff --git a/test/spec_arg_verbosity_test.cc b/test/spec_arg_verbosity_test.cc new file mode 100644 index 0000000..8f8eb6d --- /dev/null +++ b/test/spec_arg_verbosity_test.cc @@ -0,0 +1,43 @@ +#include <string.h> + +#include <iostream> + +#include "benchmark/benchmark.h" + +// Tests that the user specified verbosity level can be get. +static void BM_Verbosity(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_Verbosity); + +int main(int argc, char** argv) { + const int32_t flagv = 42; + + // Verify that argv specify --v=42. + bool found = false; + for (int i = 0; i < argc; ++i) { + if (strcmp("--v=42", argv[i]) == 0) { + found = true; + break; + } + } + if (!found) { + std::cerr << "This test requires '--v=42' to be passed as a command-line " + << "argument.\n"; + return 1; + } + + benchmark::Initialize(&argc, argv); + + // Check that the current flag value is reported accurately via the + // GetBenchmarkVerbosity() function. + if (flagv != benchmark::GetBenchmarkVerbosity()) { + std::cerr + << "Seeing different value for flags. GetBenchmarkVerbosity() returns [" + << benchmark::GetBenchmarkVerbosity() << "] expected flag=[" << flagv + << "]\n"; + return 1; + } + return 0; +} diff --git a/test/statistics_gtest.cc b/test/statistics_gtest.cc index 3ddc72d..1de2d87 100644 --- a/test/statistics_gtest.cc +++ b/test/statistics_gtest.cc @@ -25,4 +25,11 @@ TEST(StatisticsTest, StdDev) { 1.151086443322134); } +TEST(StatisticsTest, CV) { + EXPECT_DOUBLE_EQ(benchmark::StatisticsCV({101, 101, 101, 101}), 0.0); + EXPECT_DOUBLE_EQ(benchmark::StatisticsCV({1, 2, 3}), 1. / 2.); + EXPECT_DOUBLE_EQ(benchmark::StatisticsCV({2.5, 2.4, 3.3, 4.2, 5.1}), + 0.32888184094918121); +} + } // end namespace diff --git a/test/string_util_gtest.cc b/test/string_util_gtest.cc index 01bf155..67b4bc0 100644 --- a/test/string_util_gtest.cc +++ b/test/string_util_gtest.cc @@ -1,9 +1,12 @@ //===---------------------------------------------------------------------===// -// statistics_test - Unit tests for src/statistics.cc +// string_util_test - Unit tests for src/string_util.cc //===---------------------------------------------------------------------===// -#include "../src/string_util.h" +#include <tuple> + #include "../src/internal_macros.h" +#include "../src/string_util.h" +#include "gmock/gmock.h" #include "gtest/gtest.h" namespace { @@ -32,7 +35,8 @@ TEST(StringUtilTest, stoul) { #elif ULONG_MAX == 0xFFFFFFFFFFFFFFFFul { size_t pos = 0; - EXPECT_EQ(0xFFFFFFFFFFFFFFFFul, benchmark::stoul("18446744073709551615", &pos)); + EXPECT_EQ(0xFFFFFFFFFFFFFFFFul, + benchmark::stoul("18446744073709551615", &pos)); EXPECT_EQ(20ul, pos); } #endif @@ -63,91 +67,133 @@ TEST(StringUtilTest, stoul) { } #ifndef BENCHMARK_HAS_NO_EXCEPTIONS { - ASSERT_THROW(benchmark::stoul("this is a test"), std::invalid_argument); + ASSERT_THROW(std::ignore = benchmark::stoul("this is a test"), + std::invalid_argument); } #endif } -TEST(StringUtilTest, stoi) { - { - size_t pos = 0; - EXPECT_EQ(0, benchmark::stoi("0", &pos)); - EXPECT_EQ(1ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(-17, benchmark::stoi("-17", &pos)); - EXPECT_EQ(3ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(1357, benchmark::stoi("1357", &pos)); - EXPECT_EQ(4ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(10, benchmark::stoi("1010", &pos, 2)); - EXPECT_EQ(4ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(520, benchmark::stoi("1010", &pos, 8)); - EXPECT_EQ(4ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(1010, benchmark::stoi("1010", &pos, 10)); - EXPECT_EQ(4ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(4112, benchmark::stoi("1010", &pos, 16)); - EXPECT_EQ(4ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(0xBEEF, benchmark::stoi("BEEF", &pos, 16)); - EXPECT_EQ(4ul, pos); - } +TEST(StringUtilTest, stoi){{size_t pos = 0; +EXPECT_EQ(0, benchmark::stoi("0", &pos)); +EXPECT_EQ(1ul, pos); +} // namespace +{ + size_t pos = 0; + EXPECT_EQ(-17, benchmark::stoi("-17", &pos)); + EXPECT_EQ(3ul, pos); +} +{ + size_t pos = 0; + EXPECT_EQ(1357, benchmark::stoi("1357", &pos)); + EXPECT_EQ(4ul, pos); +} +{ + size_t pos = 0; + EXPECT_EQ(10, benchmark::stoi("1010", &pos, 2)); + EXPECT_EQ(4ul, pos); +} +{ + size_t pos = 0; + EXPECT_EQ(520, benchmark::stoi("1010", &pos, 8)); + EXPECT_EQ(4ul, pos); +} +{ + size_t pos = 0; + EXPECT_EQ(1010, benchmark::stoi("1010", &pos, 10)); + EXPECT_EQ(4ul, pos); +} +{ + size_t pos = 0; + EXPECT_EQ(4112, benchmark::stoi("1010", &pos, 16)); + EXPECT_EQ(4ul, pos); +} +{ + size_t pos = 0; + EXPECT_EQ(0xBEEF, benchmark::stoi("BEEF", &pos, 16)); + EXPECT_EQ(4ul, pos); +} #ifndef BENCHMARK_HAS_NO_EXCEPTIONS - { - ASSERT_THROW(benchmark::stoi("this is a test"), std::invalid_argument); - } +{ + ASSERT_THROW(std::ignore = benchmark::stoi("this is a test"), + std::invalid_argument); +} #endif } -TEST(StringUtilTest, stod) { - { - size_t pos = 0; - EXPECT_EQ(0.0, benchmark::stod("0", &pos)); - EXPECT_EQ(1ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(-84.0, benchmark::stod("-84", &pos)); - EXPECT_EQ(3ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(1234.0, benchmark::stod("1234", &pos)); - EXPECT_EQ(4ul, pos); - } - { - size_t pos = 0; - EXPECT_EQ(1.5, benchmark::stod("1.5", &pos)); - EXPECT_EQ(3ul, pos); - } - { - size_t pos = 0; - /* Note: exactly representable as double */ - EXPECT_EQ(-1.25e+9, benchmark::stod("-1.25e+9", &pos)); - EXPECT_EQ(8ul, pos); - } +TEST(StringUtilTest, stod){{size_t pos = 0; +EXPECT_EQ(0.0, benchmark::stod("0", &pos)); +EXPECT_EQ(1ul, pos); +} +{ + size_t pos = 0; + EXPECT_EQ(-84.0, benchmark::stod("-84", &pos)); + EXPECT_EQ(3ul, pos); +} +{ + size_t pos = 0; + EXPECT_EQ(1234.0, benchmark::stod("1234", &pos)); + EXPECT_EQ(4ul, pos); +} +{ + size_t pos = 0; + EXPECT_EQ(1.5, benchmark::stod("1.5", &pos)); + EXPECT_EQ(3ul, pos); +} +{ + size_t pos = 0; + /* Note: exactly representable as double */ + EXPECT_EQ(-1.25e+9, benchmark::stod("-1.25e+9", &pos)); + EXPECT_EQ(8ul, pos); +} #ifndef BENCHMARK_HAS_NO_EXCEPTIONS - { - ASSERT_THROW(benchmark::stod("this is a test"), std::invalid_argument); - } +{ + ASSERT_THROW(std::ignore = benchmark::stod("this is a test"), + std::invalid_argument); +} #endif } +TEST(StringUtilTest, StrSplit) { + EXPECT_EQ(benchmark::StrSplit("", ','), std::vector<std::string>{}); + EXPECT_EQ(benchmark::StrSplit("hello", ','), + std::vector<std::string>({"hello"})); + EXPECT_EQ(benchmark::StrSplit("hello,there,is,more", ','), + std::vector<std::string>({"hello", "there", "is", "more"})); +} + +using HumanReadableFixture = ::testing::TestWithParam< + std::tuple<double, benchmark::Counter::OneK, std::string>>; + +INSTANTIATE_TEST_SUITE_P( + HumanReadableTests, HumanReadableFixture, + ::testing::Values( + std::make_tuple(0.0, benchmark::Counter::kIs1024, "0"), + std::make_tuple(999.0, benchmark::Counter::kIs1024, "999"), + std::make_tuple(1000.0, benchmark::Counter::kIs1024, "1000"), + std::make_tuple(1024.0, benchmark::Counter::kIs1024, "1Ki"), + std::make_tuple(1000 * 1000.0, benchmark::Counter::kIs1024, + "976\\.56.Ki"), + std::make_tuple(1024 * 1024.0, benchmark::Counter::kIs1024, "1Mi"), + std::make_tuple(1000 * 1000 * 1000.0, benchmark::Counter::kIs1024, + "953\\.674Mi"), + std::make_tuple(1024 * 1024 * 1024.0, benchmark::Counter::kIs1024, + "1Gi"), + std::make_tuple(0.0, benchmark::Counter::kIs1000, "0"), + std::make_tuple(999.0, benchmark::Counter::kIs1000, "999"), + std::make_tuple(1000.0, benchmark::Counter::kIs1000, "1k"), + std::make_tuple(1024.0, benchmark::Counter::kIs1000, "1.024k"), + std::make_tuple(1000 * 1000.0, benchmark::Counter::kIs1000, "1M"), + std::make_tuple(1024 * 1024.0, benchmark::Counter::kIs1000, + "1\\.04858M"), + std::make_tuple(1000 * 1000 * 1000.0, benchmark::Counter::kIs1000, + "1G"), + std::make_tuple(1024 * 1024 * 1024.0, benchmark::Counter::kIs1000, + "1\\.07374G"))); + +TEST_P(HumanReadableFixture, HumanReadableNumber) { + std::string str = benchmark::HumanReadableNumber(std::get<0>(GetParam()), + std::get<1>(GetParam())); + ASSERT_THAT(str, ::testing::MatchesRegex(std::get<2>(GetParam()))); +} + } // end namespace diff --git a/test/templated_fixture_test.cc b/test/templated_fixture_test.cc index fe9865c..af239c3 100644 --- a/test/templated_fixture_test.cc +++ b/test/templated_fixture_test.cc @@ -1,9 +1,9 @@ -#include "benchmark/benchmark.h" - #include <cassert> #include <memory> +#include "benchmark/benchmark.h" + template <typename T> class MyFixture : public ::benchmark::Fixture { public: diff --git a/test/time_unit_gtest.cc b/test/time_unit_gtest.cc new file mode 100644 index 0000000..484ecbc --- /dev/null +++ b/test/time_unit_gtest.cc @@ -0,0 +1,37 @@ +#include "../include/benchmark/benchmark.h" +#include "gtest/gtest.h" + +namespace benchmark { +namespace internal { + +namespace { + +class DummyBenchmark : public Benchmark { + public: + DummyBenchmark() : Benchmark("dummy") {} + void Run(State&) override {} +}; + +TEST(DefaultTimeUnitTest, TimeUnitIsNotSet) { + DummyBenchmark benchmark; + EXPECT_EQ(benchmark.GetTimeUnit(), kNanosecond); +} + +TEST(DefaultTimeUnitTest, DefaultIsSet) { + DummyBenchmark benchmark; + EXPECT_EQ(benchmark.GetTimeUnit(), kNanosecond); + SetDefaultTimeUnit(kMillisecond); + EXPECT_EQ(benchmark.GetTimeUnit(), kMillisecond); +} + +TEST(DefaultTimeUnitTest, DefaultAndExplicitUnitIsSet) { + DummyBenchmark benchmark; + benchmark.Unit(kMillisecond); + SetDefaultTimeUnit(kMicrosecond); + + EXPECT_EQ(benchmark.GetTimeUnit(), kMillisecond); +} + +} // namespace +} // namespace internal +} // namespace benchmark diff --git a/test/user_counters_tabular_test.cc b/test/user_counters_tabular_test.cc index 18373c0..c98b769 100644 --- a/test/user_counters_tabular_test.cc +++ b/test/user_counters_tabular_test.cc @@ -7,19 +7,25 @@ // @todo: <jpmag> this checks the full output at once; the rule for // CounterSet1 was failing because it was not matching "^[-]+$". // @todo: <jpmag> check that the counters are vertically aligned. -ADD_CASES( - TC_ConsoleOut, - { - // keeping these lines long improves readability, so: - // clang-format off +ADD_CASES(TC_ConsoleOut, + { + // keeping these lines long improves readability, so: + // clang-format off {"^[-]+$", MR_Next}, {"^Benchmark %s Time %s CPU %s Iterations %s Bar %s Bat %s Baz %s Foo %s Frob %s Lob$", MR_Next}, {"^[-]+$", MR_Next}, - {"^BM_Counters_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, - {"^BM_Counters_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, - {"^BM_Counters_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, - {"^BM_Counters_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, - {"^BM_Counters_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:1 %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:1 %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:1_mean %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:1_median %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:1_stddev %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:1_cv %console_percentage_report [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*%$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:2 %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:2 %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:2_mean %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:2_median %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:2_stddev %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:2_cv %console_percentage_report [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*%$", MR_Next}, {"^BM_CounterRates_Tabular/threads:%int %console_report [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s$", MR_Next}, {"^BM_CounterRates_Tabular/threads:%int %console_report [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s$", MR_Next}, {"^BM_CounterRates_Tabular/threads:%int %console_report [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s$", MR_Next}, @@ -46,8 +52,8 @@ ADD_CASES( {"^BM_CounterSet2_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_CounterSet2_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_CounterSet2_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$"}, - // clang-format on - }); + // clang-format on + }); ADD_CASES(TC_CSVOut, {{"%csv_header," "\"Bar\",\"Bat\",\"Baz\",\"Foo\",\"Frob\",\"Lob\""}}); @@ -68,12 +74,15 @@ void BM_Counters_Tabular(benchmark::State& state) { {"Lob", {32, bm::Counter::kAvgThreads}}, }); } -BENCHMARK(BM_Counters_Tabular)->ThreadRange(1, 16); +BENCHMARK(BM_Counters_Tabular)->ThreadRange(1, 2)->Repetitions(2); ADD_CASES(TC_JSONOut, - {{"\"name\": \"BM_Counters_Tabular/threads:%int\",$"}, - {"\"run_name\": \"BM_Counters_Tabular/threads:%int\",$", MR_Next}, + {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$", + MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -87,8 +96,260 @@ ADD_CASES(TC_JSONOut, {"\"Frob\": %float,$", MR_Next}, {"\"Lob\": %float$", MR_Next}, {"}", MR_Next}}); -ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_Tabular/threads:%int\",%csv_report," - "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$", + MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"repetition_index\": 1,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bar\": %float,$", MR_Next}, + {"\"Bat\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float,$", MR_Next}, + {"\"Frob\": %float,$", MR_Next}, + {"\"Lob\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:1_mean\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bar\": %float,$", MR_Next}, + {"\"Bat\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float,$", MR_Next}, + {"\"Frob\": %float,$", MR_Next}, + {"\"Lob\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:1_median\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bar\": %float,$", MR_Next}, + {"\"Bat\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float,$", MR_Next}, + {"\"Frob\": %float,$", MR_Next}, + {"\"Lob\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:1_stddev\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bar\": %float,$", MR_Next}, + {"\"Bat\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float,$", MR_Next}, + {"\"Frob\": %float,$", MR_Next}, + {"\"Lob\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:1_cv\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"cv\",$", MR_Next}, + {"\"aggregate_unit\": \"percentage\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bar\": %float,$", MR_Next}, + {"\"Bat\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float,$", MR_Next}, + {"\"Frob\": %float,$", MR_Next}, + {"\"Lob\": %float$", MR_Next}, + {"}", MR_Next}}); + +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 1,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$", + MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 2,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bar\": %float,$", MR_Next}, + {"\"Bat\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float,$", MR_Next}, + {"\"Frob\": %float,$", MR_Next}, + {"\"Lob\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 1,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$", + MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"repetition_index\": 1,$", MR_Next}, + {"\"threads\": 2,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bar\": %float,$", MR_Next}, + {"\"Bat\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float,$", MR_Next}, + {"\"Frob\": %float,$", MR_Next}, + {"\"Lob\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:2_median\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 1,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 2,$", MR_Next}, + {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bar\": %float,$", MR_Next}, + {"\"Bat\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float,$", MR_Next}, + {"\"Frob\": %float,$", MR_Next}, + {"\"Lob\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:2_stddev\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 1,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 2,$", MR_Next}, + {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bar\": %float,$", MR_Next}, + {"\"Bat\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float,$", MR_Next}, + {"\"Frob\": %float,$", MR_Next}, + {"\"Lob\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:2_cv\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 1,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 2,$", MR_Next}, + {"\"aggregate_name\": \"cv\",$", MR_Next}, + {"\"aggregate_unit\": \"percentage\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bar\": %float,$", MR_Next}, + {"\"Bat\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float,$", MR_Next}, + {"\"Frob\": %float,$", MR_Next}, + {"\"Lob\": %float$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:1\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:1\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:1_mean\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:1_median\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:1_stddev\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:1_cv\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:2\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:2\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:2_mean\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:2_median\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:2_stddev\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:2_cv\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); // VS2013 does not allow this function to be passed as a lambda argument // to CHECK_BENCHMARK_RESULTS() void CheckTabular(Results const& e) { @@ -99,7 +360,10 @@ void CheckTabular(Results const& e) { CHECK_COUNTER_VALUE(e, int, "Frob", EQ, 16); CHECK_COUNTER_VALUE(e, int, "Lob", EQ, 32); } -CHECK_BENCHMARK_RESULTS("BM_Counters_Tabular/threads:%int", &CheckTabular); +CHECK_BENCHMARK_RESULTS("BM_Counters_Tabular/repeats:2/threads:1$", + &CheckTabular); +CHECK_BENCHMARK_RESULTS("BM_Counters_Tabular/repeats:2/threads:2$", + &CheckTabular); // ========================================================================= // // -------------------- Tabular+Rate Counters Output ----------------------- // @@ -108,7 +372,8 @@ CHECK_BENCHMARK_RESULTS("BM_Counters_Tabular/threads:%int", &CheckTabular); void BM_CounterRates_Tabular(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero - benchmark::DoNotOptimize(state.iterations()); + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } namespace bm = benchmark; state.counters.insert({ @@ -123,10 +388,12 @@ void BM_CounterRates_Tabular(benchmark::State& state) { BENCHMARK(BM_CounterRates_Tabular)->ThreadRange(1, 16); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_CounterRates_Tabular/threads:%int\",$"}, + {"\"family_index\": 1,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_CounterRates_Tabular/threads:%int\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -174,9 +441,11 @@ void BM_CounterSet0_Tabular(benchmark::State& state) { BENCHMARK(BM_CounterSet0_Tabular)->ThreadRange(1, 16); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_CounterSet0_Tabular/threads:%int\",$"}, + {"\"family_index\": 2,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_CounterSet0_Tabular/threads:%int\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -212,9 +481,11 @@ void BM_CounterSet1_Tabular(benchmark::State& state) { BENCHMARK(BM_CounterSet1_Tabular)->ThreadRange(1, 16); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_CounterSet1_Tabular/threads:%int\",$"}, + {"\"family_index\": 3,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_CounterSet1_Tabular/threads:%int\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -254,9 +525,11 @@ void BM_CounterSet2_Tabular(benchmark::State& state) { BENCHMARK(BM_CounterSet2_Tabular)->ThreadRange(1, 16); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_CounterSet2_Tabular/threads:%int\",$"}, + {"\"family_index\": 4,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_CounterSet2_Tabular/threads:%int\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, diff --git a/test/user_counters_test.cc b/test/user_counters_test.cc index 5699f4f..4cd8ee3 100644 --- a/test/user_counters_test.cc +++ b/test/user_counters_test.cc @@ -26,15 +26,17 @@ void BM_Counters_Simple(benchmark::State& state) { for (auto _ : state) { } state.counters["foo"] = 1; - state.counters["bar"] = 2 * (double)state.iterations(); + state.counters["bar"] = 2 * static_cast<double>(state.iterations()); } BENCHMARK(BM_Counters_Simple); ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_Simple %console_report bar=%hrfloat foo=%hrfloat$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Simple\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_Simple\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -65,7 +67,8 @@ int num_calls1 = 0; void BM_Counters_WithBytesAndItemsPSec(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero - benchmark::DoNotOptimize(state.iterations()); + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } state.counters["foo"] = 1; state.counters["bar"] = ++num_calls1; @@ -78,9 +81,11 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_WithBytesAndItemsPSec %console_report " "foo=%hrfloat items_per_second=%hrfloat/s$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_WithBytesAndItemsPSec\",$"}, + {"\"family_index\": 1,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_WithBytesAndItemsPSec\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -114,7 +119,8 @@ CHECK_BENCHMARK_RESULTS("BM_Counters_WithBytesAndItemsPSec", void BM_Counters_Rate(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero - benchmark::DoNotOptimize(state.iterations()); + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } namespace bm = benchmark; state.counters["foo"] = bm::Counter{1, bm::Counter::kIsRate}; @@ -125,9 +131,11 @@ ADD_CASES( TC_ConsoleOut, {{"^BM_Counters_Rate %console_report bar=%hrfloat/s foo=%hrfloat/s$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Rate\",$"}, + {"\"family_index\": 2,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_Rate\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -155,7 +163,8 @@ CHECK_BENCHMARK_RESULTS("BM_Counters_Rate", &CheckRate); void BM_Invert(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero - benchmark::DoNotOptimize(state.iterations()); + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } namespace bm = benchmark; state.counters["foo"] = bm::Counter{0.0001, bm::Counter::kInvert}; @@ -165,9 +174,11 @@ BENCHMARK(BM_Invert); ADD_CASES(TC_ConsoleOut, {{"^BM_Invert %console_report bar=%hrfloatu foo=%hrfloatk$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Invert\",$"}, + {"\"family_index\": 3,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Invert\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -187,14 +198,14 @@ void CheckInvert(Results const& e) { CHECK_BENCHMARK_RESULTS("BM_Invert", &CheckInvert); // ========================================================================= // -// ------------------------- InvertedRate Counters Output -// -------------------------- // +// --------------------- InvertedRate Counters Output ---------------------- // // ========================================================================= // void BM_Counters_InvertedRate(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero - benchmark::DoNotOptimize(state.iterations()); + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } namespace bm = benchmark; state.counters["foo"] = @@ -207,9 +218,11 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_InvertedRate %console_report " "bar=%hrfloats foo=%hrfloats$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_InvertedRate\",$"}, + {"\"family_index\": 4,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_InvertedRate\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -246,9 +259,11 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_Threads/threads:%int %console_report " "bar=%hrfloat foo=%hrfloat$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Threads/threads:%int\",$"}, + {"\"family_index\": 5,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_Threads/threads:%int\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -285,9 +300,11 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_AvgThreads/threads:%int " "%console_report bar=%hrfloat foo=%hrfloat$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_AvgThreads/threads:%int\",$"}, + {"\"family_index\": 6,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_AvgThreads/threads:%int\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -316,7 +333,8 @@ CHECK_BENCHMARK_RESULTS("BM_Counters_AvgThreads/threads:%int", void BM_Counters_AvgThreadsRate(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero - benchmark::DoNotOptimize(state.iterations()); + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } namespace bm = benchmark; state.counters["foo"] = bm::Counter{1, bm::Counter::kAvgThreadsRate}; @@ -327,10 +345,12 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_AvgThreadsRate/threads:%int " "%console_report bar=%hrfloat/s foo=%hrfloat/s$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_AvgThreadsRate/threads:%int\",$"}, + {"\"family_index\": 7,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_AvgThreadsRate/threads:%int\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -367,9 +387,11 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_IterationInvariant %console_report " "bar=%hrfloat foo=%hrfloat$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_IterationInvariant\",$"}, + {"\"family_index\": 8,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_IterationInvariant\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -399,7 +421,8 @@ CHECK_BENCHMARK_RESULTS("BM_Counters_IterationInvariant", void BM_Counters_kIsIterationInvariantRate(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero - benchmark::DoNotOptimize(state.iterations()); + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } namespace bm = benchmark; state.counters["foo"] = @@ -412,10 +435,12 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_kIsIterationInvariantRate " "%console_report bar=%hrfloat/s foo=%hrfloat/s$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_kIsIterationInvariantRate\",$"}, + {"\"family_index\": 9,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_kIsIterationInvariantRate\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -440,7 +465,7 @@ CHECK_BENCHMARK_RESULTS("BM_Counters_kIsIterationInvariantRate", &CheckIsIterationInvariantRate); // ========================================================================= // -// ------------------- AvgIterations Counters Output ------------------ // +// --------------------- AvgIterations Counters Output --------------------- // // ========================================================================= // void BM_Counters_AvgIterations(benchmark::State& state) { @@ -455,9 +480,11 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_AvgIterations %console_report " "bar=%hrfloat foo=%hrfloat$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_AvgIterations\",$"}, + {"\"family_index\": 10,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_AvgIterations\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, @@ -480,13 +507,14 @@ void CheckAvgIterations(Results const& e) { CHECK_BENCHMARK_RESULTS("BM_Counters_AvgIterations", &CheckAvgIterations); // ========================================================================= // -// ----------------- AvgIterationsRate Counters Output ---------------- // +// ------------------- AvgIterationsRate Counters Output ------------------- // // ========================================================================= // void BM_Counters_kAvgIterationsRate(benchmark::State& state) { for (auto _ : state) { // This test requires a non-zero CPU time to avoid divide-by-zero - benchmark::DoNotOptimize(state.iterations()); + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); } namespace bm = benchmark; state.counters["foo"] = bm::Counter{1, bm::Counter::kAvgIterationsRate}; @@ -498,9 +526,11 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_kAvgIterationsRate " "%console_report bar=%hrfloat/s foo=%hrfloat/s$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_kAvgIterationsRate\",$"}, + {"\"family_index\": 11,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_kAvgIterationsRate\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, - {"\"repetitions\": 0,$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, {"\"repetition_index\": 0,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"iterations\": %int,$", MR_Next}, diff --git a/test/user_counters_thousands_test.cc b/test/user_counters_thousands_test.cc index 21d8285..fc15383 100644 --- a/test/user_counters_thousands_test.cc +++ b/test/user_counters_thousands_test.cc @@ -16,13 +16,13 @@ void BM_Counters_Thousands(benchmark::State& state) { {"t0_1000000DefaultBase", bm::Counter(1000 * 1000, bm::Counter::kDefaults)}, {"t1_1000000Base1000", bm::Counter(1000 * 1000, bm::Counter::kDefaults, - benchmark::Counter::OneK::kIs1000)}, + bm::Counter::OneK::kIs1000)}, {"t2_1000000Base1024", bm::Counter(1000 * 1000, bm::Counter::kDefaults, - benchmark::Counter::OneK::kIs1024)}, + bm::Counter::OneK::kIs1024)}, {"t3_1048576Base1000", bm::Counter(1024 * 1024, bm::Counter::kDefaults, - benchmark::Counter::OneK::kIs1000)}, + bm::Counter::OneK::kIs1000)}, {"t4_1048576Base1024", bm::Counter(1024 * 1024, bm::Counter::kDefaults, - benchmark::Counter::OneK::kIs1024)}, + bm::Counter::OneK::kIs1024)}, }); } BENCHMARK(BM_Counters_Thousands)->Repetitions(2); @@ -30,27 +30,29 @@ ADD_CASES( TC_ConsoleOut, { {"^BM_Counters_Thousands/repeats:2 %console_report " - "t0_1000000DefaultBase=1000k " - "t1_1000000Base1000=1000k t2_1000000Base1024=976.56[23]k " - "t3_1048576Base1000=1048.58k t4_1048576Base1024=1024k$"}, + "t0_1000000DefaultBase=1M " + "t1_1000000Base1000=1M t2_1000000Base1024=976.56[23]Ki " + "t3_1048576Base1000=1.04858M t4_1048576Base1024=1Mi$"}, {"^BM_Counters_Thousands/repeats:2 %console_report " - "t0_1000000DefaultBase=1000k " - "t1_1000000Base1000=1000k t2_1000000Base1024=976.56[23]k " - "t3_1048576Base1000=1048.58k t4_1048576Base1024=1024k$"}, + "t0_1000000DefaultBase=1M " + "t1_1000000Base1000=1M t2_1000000Base1024=976.56[23]Ki " + "t3_1048576Base1000=1.04858M t4_1048576Base1024=1Mi$"}, {"^BM_Counters_Thousands/repeats:2_mean %console_report " - "t0_1000000DefaultBase=1000k t1_1000000Base1000=1000k " - "t2_1000000Base1024=976.56[23]k t3_1048576Base1000=1048.58k " - "t4_1048576Base1024=1024k$"}, + "t0_1000000DefaultBase=1M t1_1000000Base1000=1M " + "t2_1000000Base1024=976.56[23]Ki t3_1048576Base1000=1.04858M " + "t4_1048576Base1024=1Mi$"}, {"^BM_Counters_Thousands/repeats:2_median %console_report " - "t0_1000000DefaultBase=1000k t1_1000000Base1000=1000k " - "t2_1000000Base1024=976.56[23]k t3_1048576Base1000=1048.58k " - "t4_1048576Base1024=1024k$"}, + "t0_1000000DefaultBase=1M t1_1000000Base1000=1M " + "t2_1000000Base1024=976.56[23]Ki t3_1048576Base1000=1.04858M " + "t4_1048576Base1024=1Mi$"}, {"^BM_Counters_Thousands/repeats:2_stddev %console_time_only_report [ " "]*2 t0_1000000DefaultBase=0 t1_1000000Base1000=0 " "t2_1000000Base1024=0 t3_1048576Base1000=0 t4_1048576Base1024=0$"}, }); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Thousands/repeats:2\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_Thousands/repeats:2\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, @@ -68,6 +70,8 @@ ADD_CASES(TC_JSONOut, {"}", MR_Next}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Thousands/repeats:2\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_Thousands/repeats:2\",$", MR_Next}, {"\"run_type\": \"iteration\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, @@ -85,11 +89,14 @@ ADD_CASES(TC_JSONOut, {"}", MR_Next}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Thousands/repeats:2_mean\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_Thousands/repeats:2\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"mean\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 2,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, @@ -102,11 +109,14 @@ ADD_CASES(TC_JSONOut, {"}", MR_Next}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Thousands/repeats:2_median\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_Thousands/repeats:2\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"median\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 2,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, @@ -119,11 +129,14 @@ ADD_CASES(TC_JSONOut, {"}", MR_Next}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Thousands/repeats:2_stddev\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, {"\"run_name\": \"BM_Counters_Thousands/repeats:2\",$", MR_Next}, {"\"run_type\": \"aggregate\",$", MR_Next}, {"\"repetitions\": 2,$", MR_Next}, {"\"threads\": 1,$", MR_Next}, {"\"aggregate_name\": \"stddev\",$", MR_Next}, + {"\"aggregate_unit\": \"time\",$", MR_Next}, {"\"iterations\": 2,$", MR_Next}, {"\"real_time\": %float,$", MR_Next}, {"\"cpu_time\": %float,$", MR_Next}, diff --git a/tools/BUILD.bazel b/tools/BUILD.bazel index 5895883..d25caa7 100644 --- a/tools/BUILD.bazel +++ b/tools/BUILD.bazel @@ -1,4 +1,4 @@ -load("@py_deps//:requirements.bzl", "requirement") +load("@tools_pip_deps//:requirements.bzl", "requirement") py_library( name = "gbench", @@ -12,7 +12,7 @@ py_library( py_binary( name = "compare", srcs = ["compare.py"], - python_version = "PY2", + python_version = "PY3", deps = [ ":gbench", ], diff --git a/tools/compare.py b/tools/compare.py index 66eed93..e5eeb24 100755 --- a/tools/compare.py +++ b/tools/compare.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import unittest """ @@ -9,25 +9,28 @@ import argparse from argparse import ArgumentParser import json import sys +import os import gbench from gbench import util, report -from gbench.util import * def check_inputs(in1, in2, flags): """ Perform checking on the user provided inputs and diagnose any abnormalities """ - in1_kind, in1_err = classify_input_file(in1) - in2_kind, in2_err = classify_input_file(in2) - output_file = find_benchmark_flag('--benchmark_out=', flags) - output_type = find_benchmark_flag('--benchmark_out_format=', flags) - if in1_kind == IT_Executable and in2_kind == IT_Executable and output_file: + in1_kind, in1_err = util.classify_input_file(in1) + in2_kind, in2_err = util.classify_input_file(in2) + output_file = util.find_benchmark_flag('--benchmark_out=', flags) + output_type = util.find_benchmark_flag('--benchmark_out_format=', flags) + if in1_kind == util.IT_Executable and in2_kind == util.IT_Executable and output_file: print(("WARNING: '--benchmark_out=%s' will be passed to both " "benchmarks causing it to be overwritten") % output_file) - if in1_kind == IT_JSON and in2_kind == IT_JSON and len(flags) > 0: - print("WARNING: passing optional flags has no effect since both " - "inputs are JSON") + if in1_kind == util.IT_JSON and in2_kind == util.IT_JSON: + # When both sides are JSON the only supported flag is + # --benchmark_filter= + for flag in util.remove_benchmark_flags('--benchmark_filter=', flags): + print("WARNING: passing %s has no effect since both " + "inputs are JSON" % flag) if output_type is not None and output_type != 'json': print(("ERROR: passing '--benchmark_out_format=%s' to 'compare.py`" " is not supported.") % output_type) @@ -238,10 +241,10 @@ def main(): options_contender = ['--benchmark_filter=%s' % filter_contender] # Run the benchmarks and report the results - json1 = json1_orig = gbench.util.run_or_load_benchmark( - test_baseline, benchmark_options + options_baseline) - json2 = json2_orig = gbench.util.run_or_load_benchmark( - test_contender, benchmark_options + options_contender) + json1 = json1_orig = gbench.util.sort_benchmark_results(gbench.util.run_or_load_benchmark( + test_baseline, benchmark_options + options_baseline)) + json2 = json2_orig = gbench.util.sort_benchmark_results(gbench.util.run_or_load_benchmark( + test_contender, benchmark_options + options_contender)) # Now, filter the benchmarks so that the difference report can work if filter_baseline and filter_contender: diff --git a/tools/gbench/Inputs/test1_run1.json b/tools/gbench/Inputs/test1_run1.json index 601e327..9daed0b 100644 --- a/tools/gbench/Inputs/test1_run1.json +++ b/tools/gbench/Inputs/test1_run1.json @@ -114,6 +114,14 @@ "real_time": 1, "cpu_time": 1, "time_unit": "s" + }, + { + "name": "BM_hasLabel", + "label": "a label", + "iterations": 1, + "real_time": 1, + "cpu_time": 1, + "time_unit": "s" } ] } diff --git a/tools/gbench/Inputs/test1_run2.json b/tools/gbench/Inputs/test1_run2.json index 3cbcf39..dc52970 100644 --- a/tools/gbench/Inputs/test1_run2.json +++ b/tools/gbench/Inputs/test1_run2.json @@ -114,6 +114,14 @@ "real_time": 1, "cpu_time": 1, "time_unit": "ns" + }, + { + "name": "BM_hasLabel", + "label": "a label", + "iterations": 1, + "real_time": 1, + "cpu_time": 1, + "time_unit": "s" } ] } diff --git a/tools/gbench/Inputs/test4_run.json b/tools/gbench/Inputs/test4_run.json new file mode 100644 index 0000000..eaa005f --- /dev/null +++ b/tools/gbench/Inputs/test4_run.json @@ -0,0 +1,96 @@ +{ + "benchmarks": [ + { + "name": "99 family 0 instance 0 repetition 0", + "run_type": "iteration", + "family_index": 0, + "per_family_instance_index": 0, + "repetition_index": 0 + }, + { + "name": "98 family 0 instance 0 repetition 1", + "run_type": "iteration", + "family_index": 0, + "per_family_instance_index": 0, + "repetition_index": 1 + }, + { + "name": "97 family 0 instance 0 aggregate", + "run_type": "aggregate", + "family_index": 0, + "per_family_instance_index": 0, + "aggregate_name": "9 aggregate" + }, + + + { + "name": "96 family 0 instance 1 repetition 0", + "run_type": "iteration", + "family_index": 0, + "per_family_instance_index": 1, + "repetition_index": 0 + }, + { + "name": "95 family 0 instance 1 repetition 1", + "run_type": "iteration", + "family_index": 0, + "per_family_instance_index": 1, + "repetition_index": 1 + }, + { + "name": "94 family 0 instance 1 aggregate", + "run_type": "aggregate", + "family_index": 0, + "per_family_instance_index": 1, + "aggregate_name": "9 aggregate" + }, + + + + + { + "name": "93 family 1 instance 0 repetition 0", + "run_type": "iteration", + "family_index": 1, + "per_family_instance_index": 0, + "repetition_index": 0 + }, + { + "name": "92 family 1 instance 0 repetition 1", + "run_type": "iteration", + "family_index": 1, + "per_family_instance_index": 0, + "repetition_index": 1 + }, + { + "name": "91 family 1 instance 0 aggregate", + "run_type": "aggregate", + "family_index": 1, + "per_family_instance_index": 0, + "aggregate_name": "9 aggregate" + }, + + + { + "name": "90 family 1 instance 1 repetition 0", + "run_type": "iteration", + "family_index": 1, + "per_family_instance_index": 1, + "repetition_index": 0 + }, + { + "name": "89 family 1 instance 1 repetition 1", + "run_type": "iteration", + "family_index": 1, + "per_family_instance_index": 1, + "repetition_index": 1 + }, + { + "name": "88 family 1 instance 1 aggregate", + "run_type": "aggregate", + "family_index": 1, + "per_family_instance_index": 1, + "aggregate_name": "9 aggregate" + } + ] +} diff --git a/tools/gbench/Inputs/test4_run0.json b/tools/gbench/Inputs/test4_run0.json new file mode 100644 index 0000000..54cf127 --- /dev/null +++ b/tools/gbench/Inputs/test4_run0.json @@ -0,0 +1,21 @@ +{ + "context": { + "date": "2016-08-02 17:44:46", + "num_cpus": 4, + "mhz_per_cpu": 4228, + "cpu_scaling_enabled": false, + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "whocares", + "run_type": "aggregate", + "aggregate_name": "zz", + "aggregate_unit": "percentage", + "iterations": 1000, + "real_time": 0.01, + "cpu_time": 0.10, + "time_unit": "ns" + } + ] +} diff --git a/tools/gbench/Inputs/test4_run1.json b/tools/gbench/Inputs/test4_run1.json new file mode 100644 index 0000000..25d5605 --- /dev/null +++ b/tools/gbench/Inputs/test4_run1.json @@ -0,0 +1,21 @@ +{ + "context": { + "date": "2016-08-02 17:44:46", + "num_cpus": 4, + "mhz_per_cpu": 4228, + "cpu_scaling_enabled": false, + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "whocares", + "run_type": "aggregate", + "aggregate_name": "zz", + "aggregate_unit": "percentage", + "iterations": 1000, + "real_time": 0.005, + "cpu_time": 0.15, + "time_unit": "ns" + } + ] +} diff --git a/tools/gbench/report.py b/tools/gbench/report.py index bf29492..b2bbfb9 100644 --- a/tools/gbench/report.py +++ b/tools/gbench/report.py @@ -1,11 +1,14 @@ -import unittest """report.py - Utilities for reporting statistics about benchmark results """ + +import unittest import os import re import copy +import random -from scipy.stats import mannwhitneyu +from scipy.stats import mannwhitneyu, gmean +from numpy import array class BenchmarkColor(object): @@ -39,6 +42,13 @@ UTEST_MIN_REPETITIONS = 2 UTEST_OPTIMAL_REPETITIONS = 9 # Lowest reasonable number, More is better. UTEST_COL_NAME = "_pvalue" +_TIME_UNIT_TO_SECONDS_MULTIPLIER = { + "s": 1.0, + "ms": 1e-3, + "us": 1e-6, + "ns": 1e-9, +} + def color_format(use_color, fmt_str, *args, **kwargs): """ @@ -148,6 +158,30 @@ def partition_benchmarks(json1, json2): return partitions +def get_timedelta_field_as_seconds(benchmark, field_name): + """ + Get value of field_name field of benchmark, which is time with time unit + time_unit, as time in seconds. + """ + timedelta = benchmark[field_name] + time_unit = benchmark.get('time_unit', 's') + return timedelta * _TIME_UNIT_TO_SECONDS_MULTIPLIER.get(time_unit) + + +def calculate_geomean(json): + """ + Extract all real/cpu times from all the benchmarks as seconds, + and calculate their geomean. + """ + times = [] + for benchmark in json['benchmarks']: + if 'run_type' in benchmark and benchmark['run_type'] == 'aggregate': + continue + times.append([get_timedelta_field_as_seconds(benchmark, 'real_time'), + get_timedelta_field_as_seconds(benchmark, 'cpu_time')]) + return gmean(times) if times else array([]) + + def extract_field(partition, field_name): # The count of elements may be different. We want *all* of them. lhs = [x[field_name] for x in partition[0]] @@ -172,6 +206,7 @@ def calc_utest(timings_cpu, timings_time): return (min_rep_cnt >= UTEST_OPTIMAL_REPETITIONS), cpu_pvalue, time_pvalue + def print_utest(bc_name, utest, utest_alpha, first_col_width, use_color=True): def get_utest_color(pval): return BC_FAIL if pval >= utest_alpha else BC_OKGREEN @@ -220,6 +255,7 @@ def get_difference_report( partitions = partition_benchmarks(json1, json2) for partition in partitions: benchmark_name = partition[0][0]['name'] + label = partition[0][0]['label'] if 'label' in partition[0][0] else '' time_unit = partition[0][0]['time_unit'] measurements = [] utest_results = {} @@ -240,7 +276,8 @@ def get_difference_report( if utest: timings_cpu = extract_field(partition, 'cpu_time') timings_time = extract_field(partition, 'real_time') - have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest(timings_cpu, timings_time) + have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest( + timings_cpu, timings_time) if cpu_pvalue and time_pvalue: utest_results = { 'have_optimal_repetitions': have_optimal_repetitions, @@ -259,6 +296,7 @@ def get_difference_report( aggregate_name = partition[0][0]['aggregate_name'] if run_type == 'aggregate' and 'aggregate_name' in partition[0][0] else '' diff_report.append({ 'name': benchmark_name, + 'label': label, 'measurements': measurements, 'time_unit': time_unit, 'run_type': run_type, @@ -266,6 +304,26 @@ def get_difference_report( 'utest': utest_results }) + lhs_gmean = calculate_geomean(json1) + rhs_gmean = calculate_geomean(json2) + if lhs_gmean.any() and rhs_gmean.any(): + diff_report.append({ + 'name': 'OVERALL_GEOMEAN', + 'label': '', + 'measurements': [{ + 'real_time': lhs_gmean[0], + 'cpu_time': lhs_gmean[1], + 'real_time_other': rhs_gmean[0], + 'cpu_time_other': rhs_gmean[1], + 'time': calculate_change(lhs_gmean[0], rhs_gmean[0]), + 'cpu': calculate_change(lhs_gmean[1], rhs_gmean[1]) + }], + 'time_unit': 's', + 'run_type': 'aggregate', + 'aggregate_name': 'geomean', + 'utest': {} + }) + return diff_report @@ -301,26 +359,23 @@ def print_difference_report( fmt_str = "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}" for benchmark in json_diff_report: # *If* we were asked to only include aggregates, - # and if it is non-aggregate, then skip it. - if include_aggregates_only and 'run_type' in benchmark: - if benchmark['run_type'] != 'aggregate': - continue - - for measurement in benchmark['measurements']: - output_strs += [color_format(use_color, - fmt_str, - BC_HEADER, - benchmark['name'], - first_col_width, - get_color(measurement['time']), - measurement['time'], - get_color(measurement['cpu']), - measurement['cpu'], - measurement['real_time'], - measurement['real_time_other'], - measurement['cpu_time'], - measurement['cpu_time_other'], - endc=BC_ENDC)] + # and if it is non-aggregate, then don't print it. + if not include_aggregates_only or not 'run_type' in benchmark or benchmark['run_type'] == 'aggregate': + for measurement in benchmark['measurements']: + output_strs += [color_format(use_color, + fmt_str, + BC_HEADER, + benchmark['name'], + first_col_width, + get_color(measurement['time']), + measurement['time'], + get_color(measurement['cpu']), + measurement['cpu'], + measurement['real_time'], + measurement['real_time_other'], + measurement['cpu_time'], + measurement['cpu_time_other'], + endc=BC_ENDC)] # After processing the measurements, if requested and # if applicable (e.g. u-test exists for given benchmark), @@ -404,6 +459,8 @@ class TestReportDifference(unittest.TestCase): '-0.1000', '100', '110', '100', '90'], ['BM_ThirdFaster', '-0.3333', '-0.3334', '100', '67', '100', '67'], ['BM_NotBadTimeUnit', '-0.9000', '+0.2000', '0', '0', '0', '1'], + ['BM_hasLabel', '+0.0000', '+0.0000', '1', '1', '1', '1'], + ['OVERALL_GEOMEAN', '-0.8113', '-0.7779', '0', '0', '0', '0'] ] output_lines_with_header = print_difference_report( self.json_diff_report, use_color=False) @@ -420,81 +477,137 @@ class TestReportDifference(unittest.TestCase): expected_output = [ { 'name': 'BM_SameTimes', - 'measurements': [{'time': 0.0000, 'cpu': 0.0000, 'real_time': 10, 'real_time_other': 10, 'cpu_time': 10, 'cpu_time_other': 10}], + 'label': '', + 'measurements': [{'time': 0.0000, 'cpu': 0.0000, + 'real_time': 10, 'real_time_other': 10, + 'cpu_time': 10, 'cpu_time_other': 10}], 'time_unit': 'ns', 'utest': {} }, { 'name': 'BM_2xFaster', - 'measurements': [{'time': -0.5000, 'cpu': -0.5000, 'real_time': 50, 'real_time_other': 25, 'cpu_time': 50, 'cpu_time_other': 25}], + 'label': '', + 'measurements': [{'time': -0.5000, 'cpu': -0.5000, + 'real_time': 50, 'real_time_other': 25, + 'cpu_time': 50, 'cpu_time_other': 25}], 'time_unit': 'ns', 'utest': {} }, { 'name': 'BM_2xSlower', - 'measurements': [{'time': 1.0000, 'cpu': 1.0000, 'real_time': 50, 'real_time_other': 100, 'cpu_time': 50, 'cpu_time_other': 100}], + 'label': '', + 'measurements': [{'time': 1.0000, 'cpu': 1.0000, + 'real_time': 50, 'real_time_other': 100, + 'cpu_time': 50, 'cpu_time_other': 100}], 'time_unit': 'ns', 'utest': {} }, { 'name': 'BM_1PercentFaster', - 'measurements': [{'time': -0.0100, 'cpu': -0.0100, 'real_time': 100, 'real_time_other': 98.9999999, 'cpu_time': 100, 'cpu_time_other': 98.9999999}], + 'label': '', + 'measurements': [{'time': -0.0100, 'cpu': -0.0100, + 'real_time': 100, 'real_time_other': 98.9999999, + 'cpu_time': 100, 'cpu_time_other': 98.9999999}], 'time_unit': 'ns', 'utest': {} }, { 'name': 'BM_1PercentSlower', - 'measurements': [{'time': 0.0100, 'cpu': 0.0100, 'real_time': 100, 'real_time_other': 101, 'cpu_time': 100, 'cpu_time_other': 101}], + 'label': '', + 'measurements': [{'time': 0.0100, 'cpu': 0.0100, + 'real_time': 100, 'real_time_other': 101, + 'cpu_time': 100, 'cpu_time_other': 101}], 'time_unit': 'ns', 'utest': {} }, { 'name': 'BM_10PercentFaster', - 'measurements': [{'time': -0.1000, 'cpu': -0.1000, 'real_time': 100, 'real_time_other': 90, 'cpu_time': 100, 'cpu_time_other': 90}], + 'label': '', + 'measurements': [{'time': -0.1000, 'cpu': -0.1000, + 'real_time': 100, 'real_time_other': 90, + 'cpu_time': 100, 'cpu_time_other': 90}], 'time_unit': 'ns', 'utest': {} }, { 'name': 'BM_10PercentSlower', - 'measurements': [{'time': 0.1000, 'cpu': 0.1000, 'real_time': 100, 'real_time_other': 110, 'cpu_time': 100, 'cpu_time_other': 110}], + 'label': '', + 'measurements': [{'time': 0.1000, 'cpu': 0.1000, + 'real_time': 100, 'real_time_other': 110, + 'cpu_time': 100, 'cpu_time_other': 110}], 'time_unit': 'ns', 'utest': {} }, { 'name': 'BM_100xSlower', - 'measurements': [{'time': 99.0000, 'cpu': 99.0000, 'real_time': 100, 'real_time_other': 10000, 'cpu_time': 100, 'cpu_time_other': 10000}], + 'label': '', + 'measurements': [{'time': 99.0000, 'cpu': 99.0000, + 'real_time': 100, 'real_time_other': 10000, + 'cpu_time': 100, 'cpu_time_other': 10000}], 'time_unit': 'ns', 'utest': {} }, { 'name': 'BM_100xFaster', - 'measurements': [{'time': -0.9900, 'cpu': -0.9900, 'real_time': 10000, 'real_time_other': 100, 'cpu_time': 10000, 'cpu_time_other': 100}], + 'label': '', + 'measurements': [{'time': -0.9900, 'cpu': -0.9900, + 'real_time': 10000, 'real_time_other': 100, + 'cpu_time': 10000, 'cpu_time_other': 100}], 'time_unit': 'ns', 'utest': {} }, { 'name': 'BM_10PercentCPUToTime', - 'measurements': [{'time': 0.1000, 'cpu': -0.1000, 'real_time': 100, 'real_time_other': 110, 'cpu_time': 100, 'cpu_time_other': 90}], + 'label': '', + 'measurements': [{'time': 0.1000, 'cpu': -0.1000, + 'real_time': 100, 'real_time_other': 110, + 'cpu_time': 100, 'cpu_time_other': 90}], 'time_unit': 'ns', 'utest': {} }, { 'name': 'BM_ThirdFaster', - 'measurements': [{'time': -0.3333, 'cpu': -0.3334, 'real_time': 100, 'real_time_other': 67, 'cpu_time': 100, 'cpu_time_other': 67}], + 'label': '', + 'measurements': [{'time': -0.3333, 'cpu': -0.3334, + 'real_time': 100, 'real_time_other': 67, + 'cpu_time': 100, 'cpu_time_other': 67}], 'time_unit': 'ns', 'utest': {} }, { 'name': 'BM_NotBadTimeUnit', - 'measurements': [{'time': -0.9000, 'cpu': 0.2000, 'real_time': 0.4, 'real_time_other': 0.04, 'cpu_time': 0.5, 'cpu_time_other': 0.6}], + 'label': '', + 'measurements': [{'time': -0.9000, 'cpu': 0.2000, + 'real_time': 0.4, 'real_time_other': 0.04, + 'cpu_time': 0.5, 'cpu_time_other': 0.6}], + 'time_unit': 's', + 'utest': {} + }, + { + 'name': 'BM_hasLabel', + 'label': 'a label', + 'measurements': [{'time': 0.0000, 'cpu': 0.0000, + 'real_time': 1, 'real_time_other': 1, + 'cpu_time': 1, 'cpu_time_other': 1}], 'time_unit': 's', 'utest': {} }, + { + 'name': 'OVERALL_GEOMEAN', + 'label': '', + 'measurements': [{'real_time': 3.1622776601683826e-06, 'cpu_time': 3.2130844755623912e-06, + 'real_time_other': 1.9768988699420897e-07, 'cpu_time_other': 2.397447755209533e-07, + 'time': -0.8112976497120911, 'cpu': -0.7778551721181174}], + 'time_unit': 's', + 'run_type': 'aggregate', + 'aggregate_name': 'geomean', 'utest': {} + }, ] self.assertEqual(len(self.json_diff_report), len(expected_output)) for out, expected in zip( self.json_diff_report, expected_output): self.assertEqual(out['name'], expected['name']) + self.assertEqual(out['label'], expected['label']) self.assertEqual(out['time_unit'], expected['time_unit']) assert_utest(self, out, expected) assert_measurements(self, out, expected) @@ -525,6 +638,7 @@ class TestReportDifferenceBetweenFamilies(unittest.TestCase): ['./4', '-0.5000', '-0.5000', '40', '20', '40', '20'], ['Prefix/.', '-0.5000', '-0.5000', '20', '10', '20', '10'], ['Prefix/./3', '-0.5000', '-0.5000', '30', '15', '30', '15'], + ['OVERALL_GEOMEAN', '-0.5000', '-0.5000', '0', '0', '0', '0'] ] output_lines_with_header = print_difference_report( self.json_diff_report, use_color=False) @@ -562,6 +676,16 @@ class TestReportDifferenceBetweenFamilies(unittest.TestCase): 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 30, 'real_time_other': 15, 'cpu_time': 30, 'cpu_time_other': 15}], 'time_unit': 'ns', 'utest': {} + }, + { + 'name': 'OVERALL_GEOMEAN', + 'measurements': [{'real_time': 2.213363839400641e-08, 'cpu_time': 2.213363839400641e-08, + 'real_time_other': 1.1066819197003185e-08, 'cpu_time_other': 1.1066819197003185e-08, + 'time': -0.5000000000000009, 'cpu': -0.5000000000000009}], + 'time_unit': 's', + 'run_type': 'aggregate', + 'aggregate_name': 'geomean', + 'utest': {} } ] self.assertEqual(len(self.json_diff_report), len(expected_output)) @@ -600,8 +724,8 @@ class TestReportDifferenceWithUTest(unittest.TestCase): ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'], ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'], ['BM_Two_pvalue', - '0.6985', - '0.6985', + '1.0000', + '0.6667', 'U', 'Test,', 'Repetitions:', @@ -618,7 +742,7 @@ class TestReportDifferenceWithUTest(unittest.TestCase): ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'], ['short_pvalue', '0.7671', - '0.1489', + '0.2000', 'U', 'Test,', 'Repetitions:', @@ -632,6 +756,7 @@ class TestReportDifferenceWithUTest(unittest.TestCase): 'repetitions', 'recommended.'], ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'], + ['OVERALL_GEOMEAN', '+1.6405', '-0.6985', '0', '0', '0', '0'] ] output_lines_with_header = print_difference_report( self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False) @@ -643,6 +768,53 @@ class TestReportDifferenceWithUTest(unittest.TestCase): parts = [x for x in output_lines[i].split(' ') if x] self.assertEqual(expect_lines[i], parts) + def test_json_diff_report_pretty_printing_aggregates_only(self): + expect_lines = [ + ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'], + ['BM_Two_pvalue', + '1.0000', + '0.6667', + 'U', + 'Test,', + 'Repetitions:', + '2', + 'vs', + '2.', + 'WARNING:', + 'Results', + 'unreliable!', + '9+', + 'repetitions', + 'recommended.'], + ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'], + ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'], + ['short_pvalue', + '0.7671', + '0.2000', + 'U', + 'Test,', + 'Repetitions:', + '2', + 'vs', + '3.', + 'WARNING:', + 'Results', + 'unreliable!', + '9+', + 'repetitions', + 'recommended.'], + ['OVERALL_GEOMEAN', '+1.6405', '-0.6985', '0', '0', '0', '0'] + ] + output_lines_with_header = print_difference_report( + self.json_diff_report, include_aggregates_only=True, utest=True, utest_alpha=0.05, use_color=False) + output_lines = output_lines_with_header[2:] + print("\n") + print("\n".join(output_lines_with_header)) + self.assertEqual(len(output_lines), len(expect_lines)) + for i in range(0, len(output_lines)): + parts = [x for x in output_lines[i].split(' ') if x] + self.assertEqual(expect_lines[i], parts) + def test_json_diff_report(self): expected_output = [ { @@ -672,7 +844,7 @@ class TestReportDifferenceWithUTest(unittest.TestCase): ], 'time_unit': 'ns', 'utest': { - 'have_optimal_repetitions': False, 'cpu_pvalue': 0.6985353583033387, 'time_pvalue': 0.6985353583033387 + 'have_optimal_repetitions': False, 'cpu_pvalue': 0.6666666666666666, 'time_pvalue': 1.0 } }, { @@ -693,7 +865,7 @@ class TestReportDifferenceWithUTest(unittest.TestCase): ], 'time_unit': 'ns', 'utest': { - 'have_optimal_repetitions': False, 'cpu_pvalue': 0.14891467317876572, 'time_pvalue': 0.7670968684102772 + 'have_optimal_repetitions': False, 'cpu_pvalue': 0.2, 'time_pvalue': 0.7670968684102772 } }, { @@ -708,6 +880,16 @@ class TestReportDifferenceWithUTest(unittest.TestCase): ], 'time_unit': 'ns', 'utest': {} + }, + { + 'name': 'OVERALL_GEOMEAN', + 'measurements': [{'real_time': 8.48528137423858e-09, 'cpu_time': 8.441336246629233e-08, + 'real_time_other': 2.2405267593145244e-08, 'cpu_time_other': 2.5453661413660466e-08, + 'time': 1.6404861082353634, 'cpu': -0.6984640740519662}], + 'time_unit': 's', + 'run_type': 'aggregate', + 'aggregate_name': 'geomean', + 'utest': {} } ] self.assertEqual(len(self.json_diff_report), len(expected_output)) @@ -747,8 +929,8 @@ class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly( ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'], ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'], ['BM_Two_pvalue', - '0.6985', - '0.6985', + '1.0000', + '0.6667', 'U', 'Test,', 'Repetitions:', @@ -765,7 +947,7 @@ class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly( ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'], ['short_pvalue', '0.7671', - '0.1489', + '0.2000', 'U', 'Test,', 'Repetitions:', @@ -778,7 +960,8 @@ class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly( '9+', 'repetitions', 'recommended.'], - ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'] + ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'], + ['OVERALL_GEOMEAN', '+1.6405', '-0.6985', '0', '0', '0', '0'] ] output_lines_with_header = print_difference_report( self.json_diff_report, @@ -820,7 +1003,7 @@ class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly( ], 'time_unit': 'ns', 'utest': { - 'have_optimal_repetitions': False, 'cpu_pvalue': 0.6985353583033387, 'time_pvalue': 0.6985353583033387 + 'have_optimal_repetitions': False, 'cpu_pvalue': 0.6666666666666666, 'time_pvalue': 1.0 } }, { @@ -841,7 +1024,7 @@ class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly( ], 'time_unit': 'ns', 'utest': { - 'have_optimal_repetitions': False, 'cpu_pvalue': 0.14891467317876572, 'time_pvalue': 0.7670968684102772 + 'have_optimal_repetitions': False, 'cpu_pvalue': 0.2, 'time_pvalue': 0.7670968684102772 } }, { @@ -853,11 +1036,83 @@ class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly( 'real_time': 8, 'cpu_time_other': 53, 'cpu': -0.3375 - } + } ], 'utest': {}, 'time_unit': u'ns', 'aggregate_name': '' + }, + { + 'name': 'OVERALL_GEOMEAN', + 'measurements': [{'real_time': 8.48528137423858e-09, 'cpu_time': 8.441336246629233e-08, + 'real_time_other': 2.2405267593145244e-08, 'cpu_time_other': 2.5453661413660466e-08, + 'time': 1.6404861082353634, 'cpu': -0.6984640740519662}], + 'time_unit': 's', + 'run_type': 'aggregate', + 'aggregate_name': 'geomean', + 'utest': {} + } + ] + self.assertEqual(len(self.json_diff_report), len(expected_output)) + for out, expected in zip( + self.json_diff_report, expected_output): + self.assertEqual(out['name'], expected['name']) + self.assertEqual(out['time_unit'], expected['time_unit']) + assert_utest(self, out, expected) + assert_measurements(self, out, expected) + + +class TestReportDifferenceForPercentageAggregates( + unittest.TestCase): + @classmethod + def setUpClass(cls): + def load_results(): + import json + testInputs = os.path.join( + os.path.dirname( + os.path.realpath(__file__)), + 'Inputs') + testOutput1 = os.path.join(testInputs, 'test4_run0.json') + testOutput2 = os.path.join(testInputs, 'test4_run1.json') + with open(testOutput1, 'r') as f: + json1 = json.load(f) + with open(testOutput2, 'r') as f: + json2 = json.load(f) + return json1, json2 + + json1, json2 = load_results() + cls.json_diff_report = get_difference_report( + json1, json2, utest=True) + + def test_json_diff_report_pretty_printing(self): + expect_lines = [ + ['whocares', '-0.5000', '+0.5000', '0', '0', '0', '0'] + ] + output_lines_with_header = print_difference_report( + self.json_diff_report, + utest=True, utest_alpha=0.05, use_color=False) + output_lines = output_lines_with_header[2:] + print("\n") + print("\n".join(output_lines_with_header)) + self.assertEqual(len(output_lines), len(expect_lines)) + for i in range(0, len(output_lines)): + parts = [x for x in output_lines[i].split(' ') if x] + self.assertEqual(expect_lines[i], parts) + + def test_json_diff_report(self): + expected_output = [ + { + 'name': u'whocares', + 'measurements': [ + {'time': -0.5, + 'cpu': 0.5, + 'real_time': 0.01, + 'real_time_other': 0.005, + 'cpu_time': 0.10, + 'cpu_time_other': 0.15} + ], + 'time_unit': 'ns', + 'utest': {} } ] self.assertEqual(len(self.json_diff_report), len(expected_output)) @@ -869,6 +1124,49 @@ class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly( assert_measurements(self, out, expected) +class TestReportSorting(unittest.TestCase): + @classmethod + def setUpClass(cls): + def load_result(): + import json + testInputs = os.path.join( + os.path.dirname( + os.path.realpath(__file__)), + 'Inputs') + testOutput = os.path.join(testInputs, 'test4_run.json') + with open(testOutput, 'r') as f: + json = json.load(f) + return json + + cls.json = load_result() + + def test_json_diff_report_pretty_printing(self): + import util + + expected_names = [ + "99 family 0 instance 0 repetition 0", + "98 family 0 instance 0 repetition 1", + "97 family 0 instance 0 aggregate", + "96 family 0 instance 1 repetition 0", + "95 family 0 instance 1 repetition 1", + "94 family 0 instance 1 aggregate", + "93 family 1 instance 0 repetition 0", + "92 family 1 instance 0 repetition 1", + "91 family 1 instance 0 aggregate", + "90 family 1 instance 1 repetition 0", + "89 family 1 instance 1 repetition 1", + "88 family 1 instance 1 aggregate" + ] + + for n in range(len(self.json['benchmarks']) ** 2): + random.shuffle(self.json['benchmarks']) + sorted_benchmarks = util.sort_benchmark_results(self.json)[ + 'benchmarks'] + self.assertEqual(len(expected_names), len(sorted_benchmarks)) + for out, expected in zip(sorted_benchmarks, expected_names): + self.assertEqual(out['name'], expected) + + def assert_utest(unittest_instance, lhs, rhs): if lhs['utest']: unittest_instance.assertAlmostEqual( diff --git a/tools/gbench/util.py b/tools/gbench/util.py index 661c4ba..5e79da8 100644 --- a/tools/gbench/util.py +++ b/tools/gbench/util.py @@ -2,9 +2,11 @@ """ import json import os -import tempfile +import re import subprocess import sys +import tempfile + # Input file type enumeration IT_Invalid = 0 @@ -57,7 +59,7 @@ def classify_input_file(filename): """ Return a tuple (type, msg) where 'type' specifies the classified type of 'filename'. If 'type' is 'IT_Invalid' then 'msg' is a human readable - string represeting the error. + string representing the error. """ ftype = IT_Invalid err_msg = None @@ -110,13 +112,49 @@ def remove_benchmark_flags(prefix, benchmark_flags): return [f for f in benchmark_flags if not f.startswith(prefix)] -def load_benchmark_results(fname): +def load_benchmark_results(fname, benchmark_filter): """ Read benchmark output from a file and return the JSON object. + + Apply benchmark_filter, a regular expression, with nearly the same + semantics of the --benchmark_filter argument. May be None. + Note: the Python regular expression engine is used instead of the + one used by the C++ code, which may produce different results + in complex cases. + REQUIRES: 'fname' names a file containing JSON benchmark output. """ + def benchmark_wanted(benchmark): + if benchmark_filter is None: + return True + name = benchmark.get('run_name', None) or benchmark['name'] + if re.search(benchmark_filter, name): + return True + return False + with open(fname, 'r') as f: - return json.load(f) + results = json.load(f) + if 'benchmarks' in results: + results['benchmarks'] = list(filter(benchmark_wanted, + results['benchmarks'])) + return results + + +def sort_benchmark_results(result): + benchmarks = result['benchmarks'] + + # From inner key to the outer key! + benchmarks = sorted( + benchmarks, key=lambda benchmark: benchmark['repetition_index'] if 'repetition_index' in benchmark else -1) + benchmarks = sorted( + benchmarks, key=lambda benchmark: 1 if 'run_type' in benchmark and benchmark['run_type'] == "aggregate" else 0) + benchmarks = sorted( + benchmarks, key=lambda benchmark: benchmark['per_family_instance_index'] if 'per_family_instance_index' in benchmark else -1) + benchmarks = sorted( + benchmarks, key=lambda benchmark: benchmark['family_index'] if 'family_index' in benchmark else -1) + + result['benchmarks'] = benchmarks + return result def run_benchmark(exe_name, benchmark_flags): @@ -142,7 +180,7 @@ def run_benchmark(exe_name, benchmark_flags): if exitCode != 0: print('TEST FAILED...') sys.exit(exitCode) - json_res = load_benchmark_results(output_name) + json_res = load_benchmark_results(output_name, None) if is_temp_output: os.unlink(output_name) return json_res @@ -157,7 +195,9 @@ def run_or_load_benchmark(filename, benchmark_flags): """ ftype = check_input_file(filename) if ftype == IT_JSON: - return load_benchmark_results(filename) + benchmark_filter = find_benchmark_flag('--benchmark_filter=', + benchmark_flags) + return load_benchmark_results(filename, benchmark_filter) if ftype == IT_Executable: return run_benchmark(filename, benchmark_flags) raise ValueError('Unknown file type %s' % ftype) diff --git a/tools/libpfm.BUILD.bazel b/tools/libpfm.BUILD.bazel new file mode 100644 index 0000000..6269534 --- /dev/null +++ b/tools/libpfm.BUILD.bazel @@ -0,0 +1,22 @@ +# Build rule for libpfm, which is required to collect performance counters for +# BENCHMARK_ENABLE_LIBPFM builds. + +load("@rules_foreign_cc//foreign_cc:defs.bzl", "make") + +filegroup( + name = "pfm_srcs", + srcs = glob(["**"]), +) + +make( + name = "libpfm", + lib_source = ":pfm_srcs", + lib_name = "libpfm", + copts = [ + "-Wno-format-truncation", + "-Wno-use-after-free", + ], + visibility = [ + "//visibility:public", + ], +) diff --git a/tools/requirements.txt b/tools/requirements.txt index 3b3331b..f32f35b 100644 --- a/tools/requirements.txt +++ b/tools/requirements.txt @@ -1 +1,2 @@ -scipy>=1.5.0
\ No newline at end of file +numpy == 1.25 +scipy == 1.10.0 diff --git a/tools/strip_asm.py b/tools/strip_asm.py index 9030550..d131dc7 100755 --- a/tools/strip_asm.py +++ b/tools/strip_asm.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """ strip_asm.py - Cleanup ASM output for the specified file |