Merge branch 'main' into 2027

This commit is contained in:
Peter Johnson
2025-07-21 18:38:43 -07:00
70 changed files with 10941 additions and 26 deletions

View File

@@ -91,6 +91,7 @@ option(WITH_TESTS "Build unit tests (requires internet connection)" ON)
option(WITH_GUI "Build GUI items" ON)
option(WITH_SIMULATION_MODULES "Build simulation modules" ON)
option(WITH_PROTOBUF "Build protobuf support" ON)
option(WITH_BENCHMARK "Build the benchmark project" ON)
# Options for using a package manager (e.g., vcpkg) for certain dependencies.
option(USE_SYSTEM_FMTLIB "Use system fmtlib" OFF)
@@ -360,6 +361,10 @@ if(WITH_WPILIB)
add_subdirectory(developerRobot)
endif()
if(WITH_BENCHMARK)
add_subdirectory(benchmark)
endif()
if(WITH_SIMULATION_MODULES)
add_subdirectory(simulation)
endif()

44
benchmark/CMakeLists.txt Normal file
View File

@@ -0,0 +1,44 @@
project(benchmark)
include(CompileWarnings)
file(GLOB benchmarkCpp_src src/main/native/cpp/*.cpp src/main/native/thirdparty/benchmark/src/*.cpp)
add_executable(benchmarkCpp ${benchmarkCpp_src})
target_compile_features(benchmarkCpp PUBLIC cxx_std_20)
wpilib_target_warnings(benchmarkCpp)
target_link_libraries(
benchmarkCpp
PUBLIC
$<TARGET_NAME_IF_EXISTS:apriltag>
$<TARGET_NAME_IF_EXISTS:wpilibc>
$<TARGET_NAME_IF_EXISTS:wpilibNewCommands>
$<TARGET_NAME_IF_EXISTS:wpimath>
$<TARGET_NAME_IF_EXISTS:wpiutil>
)
# benchmark library setup
target_compile_definitions(benchmarkCpp PRIVATE benchmark_EXPORTS)
if(${CMAKE_SYSTEM_NAME} MATCHES "Windows")
target_link_libraries(benchmarkCpp PRIVATE shlwapi)
endif()
if(NOT BUILD_SHARED_LIBS)
target_compile_definitions(benchmarkCpp PUBLIC -DBENCHMARK_STATIC_DEFINE)
endif()
install(
DIRECTORY src/main/native/thirdparty/benchmark/include/
DESTINATION "${include_dest}/benchmark"
)
target_include_directories(
benchmarkCpp
SYSTEM
PRIVATE
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/main/native/thirdparty/benchmark/include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/main/native/thirdparty/benchmark/src>
)

49
benchmark/README.md Normal file
View File

@@ -0,0 +1,49 @@
# Benchmark
This is a benchmark project built directly against this repo's sources.
## Desktop benchmarking
This command runs the Java benchmarks on desktop.
```bash
./gradlew benchmark:run
```
This command runs the C++ benchmarks on desktop.
```
./gradlew benchmark:runCpp
```
## Deploy to a roboRIO
This project can only deploy over USB. If an alternate IP address is preferred, the `address` block in benchmark/build.gradle can be changed to point to another address.
This command deploys the C++ project using shared dependencies. Prefer this one for most C++ development.
```bash
./gradlew benchmark:deployShared
```
This command deploys the C++ project with all dependencies statically linked.
```bash
./gradlew benchmark:deployStatic
```
This command deploys the Java project and all required dependencies. It also installs the JRE if it's not currently installed.
```bash
./gradlew benchmark:deployJava
```
Those commands won't start the robot executable, so you have to manually ssh in and start it. The following command will do that.
```bash
ssh lvuser@172.22.11.2 frcRunRobot.sh
```
Console log prints will appear in the terminal.
Deploying any of these to the roboRIO will disable the current startup project until it is redeployed.
## Faster builds
If your benchmarks only need some projects, you can comment out or delete unnecessary subprojects from the dependencies, benchmarkCpp, and benchmarkCppStatic blocks in benchmark/build.gradle (Java or C++) and from `target_link_libraries()` in benchmark/CMakeLists.txt (C++ only).

335
benchmark/build.gradle Normal file
View File

@@ -0,0 +1,335 @@
import edu.wpi.first.deployutils.deploy.target.RemoteTarget
import edu.wpi.first.deployutils.deploy.target.location.SshDeployLocation
import edu.wpi.first.deployutils.deploy.artifact.*
import org.gradle.internal.os.OperatingSystem
plugins {
id 'java'
id 'application'
id 'cpp'
id 'visual-studio'
}
apply plugin: 'edu.wpi.first.NativeUtils'
apply plugin: 'edu.wpi.first.DeployUtils'
apply from: "${rootDir}/shared/config.gradle"
application {
if (OperatingSystem.current().isMacOsX()) {
applicationDefaultJvmArgs = ['-XstartOnFirstThread']
}
}
ext {
sharedCvConfigs = [benchmarkCpp: []]
staticCvConfigs = [benchmarkCppStatic: []]
useJava = true
useCpp = true
skipDev = true
}
apply from: "${rootDir}/shared/opencv.gradle"
application {
mainClass = 'frc.robot.Main'
}
apply plugin: 'com.gradleup.shadow'
repositories {
maven {
url = 'https://frcmaven.wpi.edu/artifactory/ex-mvn'
}
}
dependencies {
implementation project(':apriltag')
implementation project(':cameraserver')
implementation project(':cscore')
implementation project(':epilogue-runtime')
implementation project(':hal')
implementation project(':ntcore')
implementation project(':wpilibj')
implementation project(':wpilibNewCommands')
implementation project(':wpimath')
implementation project(':wpinet')
implementation project(':wpiunits')
implementation project(':wpiutil')
annotationProcessor project(':epilogue-processor')
implementation "org.openjdk.jmh:jmh-core:1.37"
annotationProcessor "org.openjdk.jmh:jmh-generator-annprocess:1.37"
}
tasks.withType(com.github.spotbugs.snom.SpotBugsTask).configureEach {
onlyIf { false }
}
deploy {
targets {
roborio(RemoteTarget) {
directory = '/home/lvuser'
maxChannels = 4
locations {
ssh(SshDeployLocation) {
address = "172.22.11.2"
user = 'admin'
password = ''
ipv6 = false
}
}
def remote = it
artifacts.registerFactory(WPIJREArtifact) {
return objects.newInstance(WPIJREArtifact, it, remote)
}
artifacts {
all {
predeploy << { ctx ->
ctx.execute('. /etc/profile.d/natinst-path.sh; /usr/local/frc/bin/frcKillRobot.sh -t 2> /dev/null')
ctx.execute("sed -i -e 's/\"exec /\"/' /usr/local/frc/bin/frcRunRobot.sh")
}
postdeploy << { ctx ->
ctx.execute("sync")
ctx.execute("ldconfig")
}
}
benchmarkCpp(NativeExecutableArtifact) {
libraryDirectory = '/usr/local/frc/third-party/lib'
def excludes = getLibraryFilter().getExcludes()
excludes.add('**/*.so.debug')
excludes.add('**/*.so.*.debug')
postdeploy << { ctx ->
ctx.execute("echo '/home/lvuser/benchmarkCpp' > /home/lvuser/robotCommand")
ctx.execute("chmod +x /home/lvuser/robotCommand; chown lvuser /home/lvuser/robotCommand")
ctx.execute("setcap cap_sys_nice+eip \"/home/lvuser/benchmarkCpp\"")
ctx.execute('chmod +x benchmarkCpp')
}
}
benchmarkCppStatic(NativeExecutableArtifact) {
libraryDirectory = '/usr/local/frc/third-party/lib'
postdeploy << { ctx ->
ctx.execute("echo '/home/lvuser/benchmarkCppStatic' > /home/lvuser/robotCommand")
ctx.execute("chmod +x /home/lvuser/robotCommand; chown lvuser /home/lvuser/robotCommand")
ctx.execute("setcap cap_sys_nice+eip \"/home/lvuser/benchmarkCppStatic\"")
ctx.execute('chmod +x benchmarkCppStatic')
}
}
benchmarkCppJava(NativeExecutableArtifact) {
libraryDirectory = '/usr/local/frc/third-party/lib'
def excludes = getLibraryFilter().getExcludes()
excludes.add('**/*.so.debug')
excludes.add('**/*.so.*.debug')
}
jre(WPIJREArtifact) {
}
benchmarkJava(JavaArtifact) {
jarTask = shadowJar
postdeploy << { ctx ->
ctx.execute("echo '/usr/local/frc/JRE/bin/java -XX:+UseSerialGC -Djava.library.path=/usr/local/frc/third-party/lib -Djava.lang.invoke.stringConcat=BC_SB -jar /home/lvuser/benchmark-all.jar' > /home/lvuser/robotCommand")
ctx.execute("chmod +x /home/lvuser/robotCommand; chown lvuser /home/lvuser/robotCommand")
}
}
}
}
}
}
// Prevent the eclipse compiler (used by the VS Code extension for intellisense and debugging)
// from generating bad class files from annotation processors like Epilogue
eclipse {
classpath {
containers 'org.eclipse.buildship.core.gradleclasspathcontainer'
file.whenMerged { cp ->
def entries = cp.entries;
def src = new org.gradle.plugins.ide.eclipse.model.SourceFolder('build/generated/sources/annotationProcessor/java/main/', null)
entries.add(src)
}
}
}
tasks.register('deployJava') {
try {
dependsOn tasks.named('deployjreroborio')
dependsOn tasks.named('deploybenchmarkJavaroborio')
dependsOn tasks.named('deploybenchmarkCppJavaroborio') // Deploying shared C++ is how to get the Java shared libraries.
} catch (ignored) {
}
}
tasks.register('deployShared') {
try {
dependsOn tasks.named('deploybenchmarkCpproborio')
} catch (ignored) {
}
}
tasks.register('deployStatic') {
try {
dependsOn tasks.named('deploybenchmarkCppStaticroborio')
} catch (ignored) {
}
}
model {
components {
benchmarkCpp(NativeExecutableSpec) {
targetBuildTypes 'debug'
sources {
cpp {
source {
srcDirs = [
'src/main/native/cpp',
'src/main/native/thirdparty/benchmark/src'
]
includes = ['**/*.cpp']
}
exportedHeaders {
srcDirs = [
'src/main/native/include',
'src/main/native/thirdparty/benchmark/include',
'src/main/native/thirdparty/benchmark/src'
]
includes = ['**/*.h']
}
}
}
binaries.all { binary ->
if (binary.targetPlatform.name == nativeUtils.wpi.platforms.roborio) {
if (binary.buildType.name == 'debug') {
deploy.targets.roborio.artifacts.benchmarkCpp.binary = binary
deploy.targets.roborio.artifacts.benchmarkCppJava.binary = binary
}
}
lib project: ':apriltag', library: 'apriltag', linkage: 'shared'
lib project: ':cameraserver', library: 'cameraserver', linkage: 'shared'
lib project: ':cscore', library: 'cscore', linkage: 'shared'
lib project: ':cscore', library: 'cscoreJNIShared', linkage: 'shared'
project(':hal').addHalDependency(binary, 'shared')
project(':hal').addHalJniDependency(binary)
project(':ntcore').addNtcoreDependency(binary, 'shared')
project(':ntcore').addNtcoreJniDependency(binary)
lib project: ':wpilibc', library: 'wpilibc', linkage: 'shared'
lib project: ':wpilibNewCommands', library: 'wpilibNewCommands', linkage: 'shared'
lib project: ':wpimath', library: 'wpimath', linkage: 'shared'
lib project: ':wpimath', library: 'wpimathJNIShared', linkage: 'shared'
lib project: ':wpinet', library: 'wpinet', linkage: 'shared'
lib project: ':wpinet', library: 'wpinetJNIShared', linkage: 'shared'
lib project: ':wpiutil', library: 'wpiutil', linkage: 'shared'
lib project: ':wpiutil', library: 'wpiutilJNIShared', linkage: 'shared'
if (binary.targetPlatform.name == nativeUtils.wpi.platforms.roborio) {
nativeUtils.useRequiredLibrary(binary, 'ni_link_libraries', 'ni_runtime_libraries')
}
if (binary.targetPlatform.operatingSystem.isWindows()) {
// Shlwapi.lib is needed for SHGetValueA() inside thirdparty benchmark
binary.linker.args << "Shlwapi.lib"
}
binary.cppCompiler.define 'benchmark_EXPORTS'
}
}
benchmarkCppStatic(NativeExecutableSpec) {
targetBuildTypes 'debug'
nativeUtils.excludeBinariesFromStrip(it)
sources {
cpp {
source {
srcDirs = [
'src/main/native/cpp',
'src/main/native/thirdparty/benchmark/src'
]
includes = ['**/*.cpp']
}
exportedHeaders {
srcDirs = [
'src/main/native/include',
'src/main/native/thirdparty/benchmark/include',
'src/main/native/thirdparty/benchmark/src'
]
includes = ['**/*.h']
}
}
}
binaries.all { binary ->
if (binary.targetPlatform.name == nativeUtils.wpi.platforms.roborio) {
if (binary.buildType.name == 'debug') {
deploy.targets.roborio.artifacts.benchmarkCppStatic.binary = binary
}
}
lib project: ':apriltag', library: 'apriltag', linkage: 'static'
lib project: ':cameraserver', library: 'cameraserver', linkage: 'static'
lib project: ':cscore', library: 'cscore', linkage: 'static'
project(':hal').addHalDependency(binary, 'static')
project(':ntcore').addNtcoreDependency(binary, 'static')
lib project: ':wpilibc', library: 'wpilibc', linkage: 'static'
lib project: ':wpilibNewCommands', library: 'wpilibNewCommands', linkage: 'static'
lib project: ':wpimath', library: 'wpimath', linkage: 'static'
lib project: ':wpinet', library: 'wpinet', linkage: 'static'
lib project: ':wpiutil', library: 'wpiutil', linkage: 'static'
if (binary.targetPlatform.name == nativeUtils.wpi.platforms.roborio) {
nativeUtils.useRequiredLibrary(binary, 'ni_link_libraries', 'ni_runtime_libraries')
}
if (binary.targetPlatform.operatingSystem.isWindows()) {
// Shlwapi.lib is needed for SHGetValueA() inside thirdparty benchmark
binary.linker.args << "Shlwapi.lib"
}
binary.cppCompiler.define 'benchmark_EXPORTS'
binary.cppCompiler.define 'BENCHMARK_STATIC_DEFINE'
}
}
all {
it.sources.each {
it.exportedHeaders {
srcDirs 'src/main/native/thirdparty/benchmark/include'
}
}
}
}
tasks {
def c = $.components
project.tasks.create('runCpp', Exec) {
group = 'WPILib'
description = "Run the benchmarkCpp executable"
def found = false
def systemArch = getCurrentArch()
c.each {
if (it in NativeExecutableSpec && it.name == "benchmarkCpp") {
it.binaries.each {
if (!found) {
def arch = it.targetPlatform.name
if (arch == systemArch) {
dependsOn it.tasks.install
commandLine it.tasks.install.runScriptFile.get().asFile.toString()
def filePath = it.tasks.install.installDirectory.get().toString() + File.separatorChar + 'lib'
run.dependsOn it.tasks.install
run.systemProperty 'java.library.path', filePath
found = true
}
}
}
}
}
}
installAthena(Task) {
$.binaries.each {
if (it in NativeExecutableBinarySpec && it.targetPlatform.name == nativeUtils.wpi.platforms.roborio && it.component.name == 'benchmarkCpp') {
dependsOn it.tasks.install
}
}
}
installAthenaStatic(Task) {
$.binaries.each {
if (it in NativeExecutableBinarySpec && it.targetPlatform.name == nativeUtils.wpi.platforms.roborio && it.component.name == 'benchmarkCppStatic') {
dependsOn it.tasks.install
}
}
}
}
}

View File

@@ -0,0 +1,79 @@
// Copyright (c) FIRST and other WPILib contributors.
// Open Source Software; you can modify and/or share it under the terms of
// the WPILib BSD license file in the root directory of this project.
package frc.robot;
import edu.wpi.first.math.geometry.Pose2d;
import edu.wpi.first.math.geometry.Rotation2d;
import edu.wpi.first.math.path.TravelingSalesman;
import java.util.concurrent.TimeUnit;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.profile.GCProfiler;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.RunnerException;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;
import org.openjdk.jmh.runner.options.TimeValue;
public class Main {
private static final Pose2d[] poses = {
new Pose2d(-1, 1, Rotation2d.kCW_90deg),
new Pose2d(-1, 2, Rotation2d.kCCW_90deg),
new Pose2d(0, 0, Rotation2d.kZero),
new Pose2d(0, 3, Rotation2d.kCW_90deg),
new Pose2d(1, 1, Rotation2d.kCCW_90deg),
new Pose2d(1, 2, Rotation2d.kCCW_90deg),
};
private static final int iterations = 100;
private static final TravelingSalesman transformTraveler =
new TravelingSalesman(
(pose1, pose2) -> {
var transform = pose2.minus(pose1);
return Math.hypot(transform.getX(), transform.getY());
});
private static final TravelingSalesman twistTraveler =
new TravelingSalesman(
(pose1, pose2) -> {
var twist = pose1.log(pose2);
return Math.hypot(twist.dx, twist.dy);
});
/**
* Main function.
*
* @param args The (unused) arguments to the program.
*/
public static void main(String... args) throws RunnerException {
Options opt =
new OptionsBuilder()
.include(Main.class.getSimpleName())
.addProfiler(GCProfiler.class)
.forks(1)
.warmupIterations(2)
.warmupTime(TimeValue.seconds(3))
.measurementIterations(3)
.measurementTime(TimeValue.seconds(3))
.build();
new Runner(opt).run();
}
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public Pose2d[] transform() {
return transformTraveler.solve(poses, iterations);
}
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public Pose2d[] twist() {
return twistTraveler.solve(poses, iterations);
}
}

View File

@@ -0,0 +1,42 @@
// Copyright (c) FIRST and other WPILib contributors.
// Open Source Software; you can modify and/or share it under the terms of
// the WPILib BSD license file in the root directory of this project.
#include <benchmark/benchmark.h>
#include <frc/geometry/Pose2d.h>
#include <frc/path/TravelingSalesman.h>
#include <units/angle.h>
#include <units/length.h>
#include <wpi/array.h>
static constexpr wpi::array<frc::Pose2d, 6> poses{
frc::Pose2d{-1_m, 1_m, -90_deg}, frc::Pose2d{-1_m, 2_m, 90_deg},
frc::Pose2d{0_m, 0_m, 0_deg}, frc::Pose2d{0_m, 3_m, -90_deg},
frc::Pose2d{1_m, 1_m, 90_deg}, frc::Pose2d{1_m, 2_m, 90_deg},
};
static constexpr int iterations = 100;
void BM_Transform(benchmark::State& state) {
frc::TravelingSalesman traveler{[](auto pose1, auto pose2) {
auto transform = pose2 - pose1;
return units::math::hypot(transform.X(), transform.Y()).value();
}};
for (auto _ : state) {
traveler.Solve(poses, iterations);
}
}
BENCHMARK(BM_Transform);
void BM_Twist(benchmark::State& state) {
frc::TravelingSalesman traveler{[](auto pose1, auto pose2) {
auto twist = pose1.Log(pose2);
return units::math::hypot(twist.dx, twist.dy).value();
}};
for (auto _ : state) {
traveler.Solve(poses, iterations);
}
}
BENCHMARK(BM_Twist);
BENCHMARK_MAIN();

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,47 @@
#ifndef BENCHMARK_EXPORT_H
#define BENCHMARK_EXPORT_H
#if defined(_WIN32)
#define EXPORT_ATTR __declspec(dllexport)
#define IMPORT_ATTR __declspec(dllimport)
#define NO_EXPORT_ATTR
#define DEPRECATED_ATTR __declspec(deprecated)
#else // _WIN32
#define EXPORT_ATTR __attribute__((visibility("default")))
#define IMPORT_ATTR __attribute__((visibility("default")))
#define NO_EXPORT_ATTR __attribute__((visibility("hidden")))
#define DEPRECATE_ATTR __attribute__((__deprecated__))
#endif // _WIN32
#ifdef BENCHMARK_STATIC_DEFINE
#define BENCHMARK_EXPORT
#define BENCHMARK_NO_EXPORT
#else // BENCHMARK_STATIC_DEFINE
#ifndef BENCHMARK_EXPORT
#ifdef benchmark_EXPORTS
/* We are building this library */
#define BENCHMARK_EXPORT EXPORT_ATTR
#else // benchmark_EXPORTS
/* We are using this library */
#define BENCHMARK_EXPORT IMPORT_ATTR
#endif // benchmark_EXPORTS
#endif // !BENCHMARK_EXPORT
#ifndef BENCHMARK_NO_EXPORT
#define BENCHMARK_NO_EXPORT NO_EXPORT_ATTR
#endif // !BENCHMARK_NO_EXPORT
#endif // BENCHMARK_STATIC_DEFINE
#ifndef BENCHMARK_DEPRECATED
#define BENCHMARK_DEPRECATED DEPRECATE_ATTR
#endif // BENCHMARK_DEPRECATED
#ifndef BENCHMARK_DEPRECATED_EXPORT
#define BENCHMARK_DEPRECATED_EXPORT BENCHMARK_EXPORT BENCHMARK_DEPRECATED
#endif // BENCHMARK_DEPRECATED_EXPORT
#ifndef BENCHMARK_DEPRECATED_NO_EXPORT
#define BENCHMARK_DEPRECATED_NO_EXPORT BENCHMARK_NO_EXPORT BENCHMARK_DEPRECATED
#endif // BENCHMARK_DEPRECATED_EXPORT
#endif /* BENCHMARK_EXPORT_H */

View File

@@ -0,0 +1,33 @@
#ifndef BENCHMARK_ARRAYSIZE_H_
#define BENCHMARK_ARRAYSIZE_H_
#include "internal_macros.h"
namespace benchmark {
namespace internal {
// The arraysize(arr) macro returns the # of elements in an array arr.
// The expression is a compile-time constant, and therefore can be
// used in defining new arrays, for example. If you use arraysize on
// a pointer by mistake, you will get a compile-time error.
//
// This template function declaration is used in defining arraysize.
// Note that the function doesn't need an implementation, as we only
// use its type.
template <typename T, size_t N>
char (&ArraySizeHelper(T (&array)[N]))[N];
// That gcc wants both of these prototypes seems mysterious. VC, for
// its part, can't decide which to use (another mystery). Matching of
// template overloads: the final frontier.
#ifndef COMPILER_MSVC
template <typename T, size_t N>
char (&ArraySizeHelper(const T (&array)[N]))[N];
#endif
#define arraysize(array) (sizeof(::benchmark::internal::ArraySizeHelper(array)))
} // end namespace internal
} // end namespace benchmark
#endif // BENCHMARK_ARRAYSIZE_H_

View File

@@ -0,0 +1,912 @@
// Copyright 2015 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "benchmark/benchmark.h"
#include "benchmark_api_internal.h"
#include "benchmark_runner.h"
#include "internal_macros.h"
#ifndef BENCHMARK_OS_WINDOWS
#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT)
#include <sys/resource.h>
#endif
#include <sys/time.h>
#include <unistd.h>
#endif
#ifdef BENCHMARK_OS_LINUX
#include <sys/personality.h>
#endif
#include <algorithm>
#include <atomic>
#include <condition_variable>
#include <cstdio>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <limits>
#include <map>
#include <memory>
#include <random>
#include <string>
#include <thread>
#include <utility>
#include "check.h"
#include "colorprint.h"
#include "commandlineflags.h"
#include "complexity.h"
#include "counter.h"
#include "log.h"
#include "mutex.h"
#include "perf_counters.h"
#include "re.h"
#include "statistics.h"
#include "string_util.h"
#include "thread_manager.h"
#include "thread_timer.h"
namespace benchmark {
// Print a list of benchmarks. This option overrides all other options.
BM_DEFINE_bool(benchmark_list_tests, false);
// A regular expression that specifies the set of benchmarks to execute. If
// this flag is empty, or if this flag is the string \"all\", all benchmarks
// linked into the binary are run.
BM_DEFINE_string(benchmark_filter, "");
// Specification of how long to run the benchmark.
//
// It can be either an exact number of iterations (specified as `<integer>x`),
// or a minimum number of seconds (specified as `<float>s`). If the latter
// format (ie., min seconds) is used, the system may run the benchmark longer
// until the results are considered significant.
//
// For backward compatibility, the `s` suffix may be omitted, in which case,
// the specified number is interpreted as the number of seconds.
//
// For cpu-time based tests, this is the lower bound
// on the total cpu time used by all threads that make up the test. For
// real-time based tests, this is the lower bound on the elapsed time of the
// benchmark execution, regardless of number of threads.
BM_DEFINE_string(benchmark_min_time, kDefaultMinTimeStr);
// Minimum number of seconds a benchmark should be run before results should be
// taken into account. This e.g can be necessary for benchmarks of code which
// needs to fill some form of cache before performance is of interest.
// Note: results gathered within this period are discarded and not used for
// reported result.
BM_DEFINE_double(benchmark_min_warmup_time, 0.0);
// The number of runs of each benchmark. If greater than 1, the mean and
// standard deviation of the runs will be reported.
BM_DEFINE_int32(benchmark_repetitions, 1);
// If enabled, forces each benchmark to execute exactly one iteration and one
// repetition, bypassing any configured
// MinTime()/MinWarmUpTime()/Iterations()/Repetitions()
BM_DEFINE_bool(benchmark_dry_run, false);
// If set, enable random interleaving of repetitions of all benchmarks.
// See http://github.com/google/benchmark/issues/1051 for details.
BM_DEFINE_bool(benchmark_enable_random_interleaving, false);
// Report the result of each benchmark repetitions. When 'true' is specified
// only the mean, standard deviation, and other statistics are reported for
// repeated benchmarks. Affects all reporters.
BM_DEFINE_bool(benchmark_report_aggregates_only, false);
// Display the result of each benchmark repetitions. When 'true' is specified
// only the mean, standard deviation, and other statistics are displayed for
// repeated benchmarks. Unlike benchmark_report_aggregates_only, only affects
// the display reporter, but *NOT* file reporter, which will still contain
// all the output.
BM_DEFINE_bool(benchmark_display_aggregates_only, false);
// The format to use for console output.
// Valid values are 'console', 'json', or 'csv'.
BM_DEFINE_string(benchmark_format, "console");
// The format to use for file output.
// Valid values are 'console', 'json', or 'csv'.
BM_DEFINE_string(benchmark_out_format, "json");
// The file to write additional output to.
BM_DEFINE_string(benchmark_out, "");
// Whether to use colors in the output. Valid values:
// 'true'/'yes'/1, 'false'/'no'/0, and 'auto'. 'auto' means to use colors if
// the output is being sent to a terminal and the TERM environment variable is
// set to a terminal type that supports colors.
BM_DEFINE_string(benchmark_color, "auto");
// Whether to use tabular format when printing user counters to the console.
// Valid values: 'true'/'yes'/1, 'false'/'no'/0. Defaults to false.
BM_DEFINE_bool(benchmark_counters_tabular, false);
// List of additional perf counters to collect, in libpfm format. For more
// information about libpfm: https://man7.org/linux/man-pages/man3/libpfm.3.html
BM_DEFINE_string(benchmark_perf_counters, "");
// Extra context to include in the output formatted as comma-separated key-value
// pairs. Kept internal as it's only used for parsing from env/command line.
BM_DEFINE_kvpairs(benchmark_context, {});
// Set the default time unit to use for reports
// Valid values are 'ns', 'us', 'ms' or 's'
BM_DEFINE_string(benchmark_time_unit, "");
// The level of verbose logging to output
BM_DEFINE_int32(v, 0);
namespace internal {
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
std::map<std::string, std::string>* global_context = nullptr;
BENCHMARK_EXPORT std::map<std::string, std::string>*& GetGlobalContext() {
return global_context;
}
namespace {
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
void const volatile* volatile global_force_escape_pointer;
} // namespace
// FIXME: Verify if LTO still messes this up?
void UseCharPointer(char const volatile* const v) {
// We want to escape the pointer `v` so that the compiler can not eliminate
// computations that produced it. To do that, we escape the pointer by storing
// it into a volatile variable, since generally, volatile store, is not
// something the compiler is allowed to elide.
global_force_escape_pointer = reinterpret_cast<void const volatile*>(v);
}
} // namespace internal
State::State(std::string name, IterationCount max_iters,
const std::vector<int64_t>& ranges, int thread_i, int n_threads,
internal::ThreadTimer* timer, internal::ThreadManager* manager,
internal::PerfCountersMeasurement* perf_counters_measurement,
ProfilerManager* profiler_manager)
: total_iterations_(0),
batch_leftover_(0),
max_iterations(max_iters),
started_(false),
finished_(false),
skipped_(internal::NotSkipped),
range_(ranges),
complexity_n_(0),
name_(std::move(name)),
thread_index_(thread_i),
threads_(n_threads),
timer_(timer),
manager_(manager),
perf_counters_measurement_(perf_counters_measurement),
profiler_manager_(profiler_manager) {
BM_CHECK(max_iterations != 0) << "At least one iteration must be run";
BM_CHECK_LT(thread_index_, threads_)
<< "thread_index must be less than threads";
// Add counters with correct flag now. If added with `counters[name]` in
// `PauseTiming`, a new `Counter` will be inserted the first time, which
// won't have the flag. Inserting them now also reduces the allocations
// during the benchmark.
if (perf_counters_measurement_ != nullptr) {
for (const std::string& counter_name :
perf_counters_measurement_->names()) {
counters[counter_name] = Counter(0.0, Counter::kAvgIterations);
}
}
// Note: The use of offsetof below is technically undefined until C++17
// because State is not a standard layout type. However, all compilers
// currently provide well-defined behavior as an extension (which is
// demonstrated since constexpr evaluation must diagnose all undefined
// behavior). However, GCC and Clang also warn about this use of offsetof,
// which must be suppressed.
#if defined(__INTEL_COMPILER)
#pragma warning push
#pragma warning(disable : 1875)
#elif defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Winvalid-offsetof"
#endif
#if defined(__NVCC__)
#pragma nv_diagnostic push
#pragma nv_diag_suppress 1427
#endif
#if defined(__NVCOMPILER)
#pragma diagnostic push
#pragma diag_suppress offset_in_non_POD_nonstandard
#endif
// Offset tests to ensure commonly accessed data is on the first cache line.
const int cache_line_size = 64;
static_assert(
offsetof(State, skipped_) <= (cache_line_size - sizeof(skipped_)), "");
#if defined(__INTEL_COMPILER)
#pragma warning pop
#elif defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic pop
#endif
#if defined(__NVCC__)
#pragma nv_diagnostic pop
#endif
#if defined(__NVCOMPILER)
#pragma diagnostic pop
#endif
}
void State::PauseTiming() {
// Add in time accumulated so far
BM_CHECK(started_ && !finished_ && !skipped());
timer_->StopTimer();
if (perf_counters_measurement_ != nullptr) {
std::vector<std::pair<std::string, double>> measurements;
if (!perf_counters_measurement_->Stop(measurements)) {
BM_CHECK(false) << "Perf counters read the value failed.";
}
for (const auto& name_and_measurement : measurements) {
const std::string& name = name_and_measurement.first;
const double measurement = name_and_measurement.second;
// Counter was inserted with `kAvgIterations` flag by the constructor.
assert(counters.find(name) != counters.end());
counters[name].value += measurement;
}
}
}
void State::ResumeTiming() {
BM_CHECK(started_ && !finished_ && !skipped());
timer_->StartTimer();
if (perf_counters_measurement_ != nullptr) {
perf_counters_measurement_->Start();
}
}
void State::SkipWithMessage(const std::string& msg) {
skipped_ = internal::SkippedWithMessage;
{
MutexLock l(manager_->GetBenchmarkMutex());
if (internal::NotSkipped == manager_->results.skipped_) {
manager_->results.skip_message_ = msg;
manager_->results.skipped_ = skipped_;
}
}
total_iterations_ = 0;
if (timer_->running()) {
timer_->StopTimer();
}
}
void State::SkipWithError(const std::string& msg) {
skipped_ = internal::SkippedWithError;
{
MutexLock l(manager_->GetBenchmarkMutex());
if (internal::NotSkipped == manager_->results.skipped_) {
manager_->results.skip_message_ = msg;
manager_->results.skipped_ = skipped_;
}
}
total_iterations_ = 0;
if (timer_->running()) {
timer_->StopTimer();
}
}
void State::SetIterationTime(double seconds) {
timer_->SetIterationTime(seconds);
}
void State::SetLabel(const std::string& label) {
MutexLock l(manager_->GetBenchmarkMutex());
manager_->results.report_label_ = label;
}
void State::StartKeepRunning() {
BM_CHECK(!started_ && !finished_);
started_ = true;
total_iterations_ = skipped() ? 0 : max_iterations;
if (BENCHMARK_BUILTIN_EXPECT(profiler_manager_ != nullptr, false)) {
profiler_manager_->AfterSetupStart();
}
manager_->StartStopBarrier();
if (!skipped()) {
ResumeTiming();
}
}
void State::FinishKeepRunning() {
BM_CHECK(started_ && (!finished_ || skipped()));
if (!skipped()) {
PauseTiming();
}
// Total iterations has now wrapped around past 0. Fix this.
total_iterations_ = 0;
finished_ = true;
manager_->StartStopBarrier();
if (BENCHMARK_BUILTIN_EXPECT(profiler_manager_ != nullptr, false)) {
profiler_manager_->BeforeTeardownStop();
}
}
namespace internal {
namespace {
// Flushes streams after invoking reporter methods that write to them. This
// ensures users get timely updates even when streams are not line-buffered.
void FlushStreams(BenchmarkReporter* reporter) {
if (reporter == nullptr) {
return;
}
std::flush(reporter->GetOutputStream());
std::flush(reporter->GetErrorStream());
}
// Reports in both display and file reporters.
void Report(BenchmarkReporter* display_reporter,
BenchmarkReporter* file_reporter, const RunResults& run_results) {
auto report_one = [](BenchmarkReporter* reporter, bool aggregates_only,
const RunResults& results) {
assert(reporter);
// If there are no aggregates, do output non-aggregates.
aggregates_only &= !results.aggregates_only.empty();
if (!aggregates_only) {
reporter->ReportRuns(results.non_aggregates);
}
if (!results.aggregates_only.empty()) {
reporter->ReportRuns(results.aggregates_only);
}
};
report_one(display_reporter, run_results.display_report_aggregates_only,
run_results);
if (file_reporter != nullptr) {
report_one(file_reporter, run_results.file_report_aggregates_only,
run_results);
}
FlushStreams(display_reporter);
FlushStreams(file_reporter);
}
void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
BenchmarkReporter* display_reporter,
BenchmarkReporter* file_reporter) {
// Note the file_reporter can be null.
BM_CHECK(display_reporter != nullptr);
// Determine the width of the name field using a minimum width of 10.
bool might_have_aggregates = FLAGS_benchmark_repetitions > 1;
size_t name_field_width = 10;
size_t stat_field_width = 0;
for (const BenchmarkInstance& benchmark : benchmarks) {
name_field_width =
std::max<size_t>(name_field_width, benchmark.name().str().size());
might_have_aggregates |= benchmark.repetitions() > 1;
for (const auto& Stat : benchmark.statistics()) {
stat_field_width = std::max<size_t>(stat_field_width, Stat.name_.size());
}
}
if (might_have_aggregates) {
name_field_width += 1 + stat_field_width;
}
// Print header here
BenchmarkReporter::Context context;
context.name_field_width = name_field_width;
// Keep track of running times of all instances of each benchmark family.
std::map<int /*family_index*/, BenchmarkReporter::PerFamilyRunReports>
per_family_reports;
if (display_reporter->ReportContext(context) &&
((file_reporter == nullptr) || file_reporter->ReportContext(context))) {
FlushStreams(display_reporter);
FlushStreams(file_reporter);
size_t num_repetitions_total = 0;
// This perfcounters object needs to be created before the runners vector
// below so it outlasts their lifetime.
PerfCountersMeasurement perfcounters(
StrSplit(FLAGS_benchmark_perf_counters, ','));
// Vector of benchmarks to run
std::vector<internal::BenchmarkRunner> runners;
runners.reserve(benchmarks.size());
// Count the number of benchmarks with threads to warn the user in case
// performance counters are used.
int benchmarks_with_threads = 0;
// Loop through all benchmarks
for (const BenchmarkInstance& benchmark : benchmarks) {
BenchmarkReporter::PerFamilyRunReports* reports_for_family = nullptr;
if (benchmark.complexity() != oNone) {
reports_for_family = &per_family_reports[benchmark.family_index()];
}
benchmarks_with_threads += static_cast<int>(benchmark.threads() > 1);
runners.emplace_back(benchmark, &perfcounters, reports_for_family);
int num_repeats_of_this_instance = runners.back().GetNumRepeats();
num_repetitions_total +=
static_cast<size_t>(num_repeats_of_this_instance);
if (reports_for_family != nullptr) {
reports_for_family->num_runs_total += num_repeats_of_this_instance;
}
}
assert(runners.size() == benchmarks.size() && "Unexpected runner count.");
// The use of performance counters with threads would be unintuitive for
// the average user so we need to warn them about this case
if ((benchmarks_with_threads > 0) && (perfcounters.num_counters() > 0)) {
GetErrorLogInstance()
<< "***WARNING*** There are " << benchmarks_with_threads
<< " benchmarks with threads and " << perfcounters.num_counters()
<< " performance counters were requested. Beware counters will "
"reflect the combined usage across all "
"threads.\n";
}
std::vector<size_t> repetition_indices;
repetition_indices.reserve(num_repetitions_total);
for (size_t runner_index = 0, num_runners = runners.size();
runner_index != num_runners; ++runner_index) {
const internal::BenchmarkRunner& runner = runners[runner_index];
std::fill_n(std::back_inserter(repetition_indices),
runner.GetNumRepeats(), runner_index);
}
assert(repetition_indices.size() == num_repetitions_total &&
"Unexpected number of repetition indexes.");
if (FLAGS_benchmark_enable_random_interleaving) {
std::random_device rd;
std::mt19937 g(rd());
std::shuffle(repetition_indices.begin(), repetition_indices.end(), g);
}
for (size_t repetition_index : repetition_indices) {
internal::BenchmarkRunner& runner = runners[repetition_index];
runner.DoOneRepetition();
if (runner.HasRepeatsRemaining()) {
continue;
}
// FIXME: report each repetition separately, not all of them in bulk.
display_reporter->ReportRunsConfig(
runner.GetMinTime(), runner.HasExplicitIters(), runner.GetIters());
if (file_reporter != nullptr) {
file_reporter->ReportRunsConfig(
runner.GetMinTime(), runner.HasExplicitIters(), runner.GetIters());
}
RunResults run_results = runner.GetResults();
// Maybe calculate complexity report
if (const auto* reports_for_family = runner.GetReportsForFamily()) {
if (reports_for_family->num_runs_done ==
reports_for_family->num_runs_total) {
auto additional_run_stats = ComputeBigO(reports_for_family->Runs);
run_results.aggregates_only.insert(run_results.aggregates_only.end(),
additional_run_stats.begin(),
additional_run_stats.end());
per_family_reports.erase(
static_cast<int>(reports_for_family->Runs.front().family_index));
}
}
Report(display_reporter, file_reporter, run_results);
}
}
display_reporter->Finalize();
if (file_reporter != nullptr) {
file_reporter->Finalize();
}
FlushStreams(display_reporter);
FlushStreams(file_reporter);
}
// Disable deprecated warnings temporarily because we need to reference
// CSVReporter but don't want to trigger -Werror=-Wdeprecated-declarations
BENCHMARK_DISABLE_DEPRECATED_WARNING
std::unique_ptr<BenchmarkReporter> CreateReporter(
std::string const& name, ConsoleReporter::OutputOptions output_opts) {
typedef std::unique_ptr<BenchmarkReporter> PtrType;
if (name == "console") {
return PtrType(new ConsoleReporter(output_opts));
}
if (name == "json") {
return PtrType(new JSONReporter());
}
if (name == "csv") {
return PtrType(new CSVReporter());
}
std::cerr << "Unexpected format: '" << name << "'\n";
std::flush(std::cerr);
std::exit(1);
}
BENCHMARK_RESTORE_DEPRECATED_WARNING
} // end namespace
bool IsZero(double n) {
return std::abs(n) < std::numeric_limits<double>::epsilon();
}
ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color) {
int output_opts = ConsoleReporter::OO_Defaults;
auto is_benchmark_color = [force_no_color]() -> bool {
if (force_no_color) {
return false;
}
if (FLAGS_benchmark_color == "auto") {
return IsColorTerminal();
}
return IsTruthyFlagValue(FLAGS_benchmark_color);
};
if (is_benchmark_color()) {
output_opts |= ConsoleReporter::OO_Color;
} else {
output_opts &= ~ConsoleReporter::OO_Color;
}
if (FLAGS_benchmark_counters_tabular) {
output_opts |= ConsoleReporter::OO_Tabular;
} else {
output_opts &= ~ConsoleReporter::OO_Tabular;
}
return static_cast<ConsoleReporter::OutputOptions>(output_opts);
}
} // end namespace internal
BenchmarkReporter* CreateDefaultDisplayReporter() {
static auto* default_display_reporter =
internal::CreateReporter(FLAGS_benchmark_format,
internal::GetOutputOptions())
.release();
return default_display_reporter;
}
size_t RunSpecifiedBenchmarks() {
return RunSpecifiedBenchmarks(nullptr, nullptr, FLAGS_benchmark_filter);
}
size_t RunSpecifiedBenchmarks(std::string spec) {
return RunSpecifiedBenchmarks(nullptr, nullptr, std::move(spec));
}
size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter) {
return RunSpecifiedBenchmarks(display_reporter, nullptr,
FLAGS_benchmark_filter);
}
size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
std::string spec) {
return RunSpecifiedBenchmarks(display_reporter, nullptr, std::move(spec));
}
size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
BenchmarkReporter* file_reporter) {
return RunSpecifiedBenchmarks(display_reporter, file_reporter,
FLAGS_benchmark_filter);
}
size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
BenchmarkReporter* file_reporter,
std::string spec) {
if (spec.empty() || spec == "all") {
spec = "."; // Regexp that matches all benchmarks
}
// Setup the reporters
std::ofstream output_file;
std::unique_ptr<BenchmarkReporter> default_display_reporter;
std::unique_ptr<BenchmarkReporter> default_file_reporter;
if (display_reporter == nullptr) {
default_display_reporter.reset(CreateDefaultDisplayReporter());
display_reporter = default_display_reporter.get();
}
auto& Out = display_reporter->GetOutputStream();
auto& Err = display_reporter->GetErrorStream();
std::string const& fname = FLAGS_benchmark_out;
if (fname.empty() && (file_reporter != nullptr)) {
Err << "A custom file reporter was provided but "
"--benchmark_out=<file> was not specified.\n";
Out.flush();
Err.flush();
std::exit(1);
}
if (!fname.empty()) {
output_file.open(fname);
if (!output_file.is_open()) {
Err << "invalid file name: '" << fname << "'\n";
Out.flush();
Err.flush();
std::exit(1);
}
if (file_reporter == nullptr) {
default_file_reporter = internal::CreateReporter(
FLAGS_benchmark_out_format, FLAGS_benchmark_counters_tabular
? ConsoleReporter::OO_Tabular
: ConsoleReporter::OO_None);
file_reporter = default_file_reporter.get();
}
file_reporter->SetOutputStream(&output_file);
file_reporter->SetErrorStream(&output_file);
}
std::vector<internal::BenchmarkInstance> benchmarks;
if (!FindBenchmarksInternal(spec, &benchmarks, &Err)) {
Out.flush();
Err.flush();
return 0;
}
if (benchmarks.empty()) {
Err << "Failed to match any benchmarks against regex: " << spec << "\n";
Out.flush();
Err.flush();
return 0;
}
if (FLAGS_benchmark_list_tests) {
for (auto const& benchmark : benchmarks) {
Out << benchmark.name().str() << "\n";
}
} else {
internal::RunBenchmarks(benchmarks, display_reporter, file_reporter);
}
Out.flush();
Err.flush();
return benchmarks.size();
}
namespace {
// stores the time unit benchmarks use by default
TimeUnit default_time_unit = kNanosecond;
} // namespace
TimeUnit GetDefaultTimeUnit() { return default_time_unit; }
void SetDefaultTimeUnit(TimeUnit unit) { default_time_unit = unit; }
std::string GetBenchmarkFilter() { return FLAGS_benchmark_filter; }
void SetBenchmarkFilter(std::string value) {
FLAGS_benchmark_filter = std::move(value);
}
int32_t GetBenchmarkVerbosity() { return FLAGS_v; }
void RegisterMemoryManager(MemoryManager* manager) {
internal::memory_manager = manager;
}
void RegisterProfilerManager(ProfilerManager* manager) {
// Don't allow overwriting an existing manager.
if (manager != nullptr) {
BM_CHECK_EQ(internal::profiler_manager, nullptr);
}
internal::profiler_manager = manager;
}
void AddCustomContext(const std::string& key, const std::string& value) {
if (internal::global_context == nullptr) {
internal::global_context = new std::map<std::string, std::string>();
}
if (!internal::global_context->emplace(key, value).second) {
std::cerr << "Failed to add custom context \"" << key << "\" as it already "
<< "exists with value \"" << value << "\"\n";
}
}
namespace internal {
void (*HelperPrintf)();
void PrintUsageAndExit() {
HelperPrintf();
std::flush(std::cout);
std::flush(std::cerr);
std::exit(0);
}
void SetDefaultTimeUnitFromFlag(const std::string& time_unit_flag) {
if (time_unit_flag == "s") {
return SetDefaultTimeUnit(kSecond);
}
if (time_unit_flag == "ms") {
return SetDefaultTimeUnit(kMillisecond);
}
if (time_unit_flag == "us") {
return SetDefaultTimeUnit(kMicrosecond);
}
if (time_unit_flag == "ns") {
return SetDefaultTimeUnit(kNanosecond);
}
if (!time_unit_flag.empty()) {
PrintUsageAndExit();
}
}
void ParseCommandLineFlags(int* argc, char** argv) {
using namespace benchmark;
BenchmarkReporter::Context::executable_name =
((argc != nullptr) && *argc > 0) ? argv[0] : "unknown";
for (int i = 1; (argc != nullptr) && i < *argc; ++i) {
if (ParseBoolFlag(argv[i], "benchmark_list_tests",
&FLAGS_benchmark_list_tests) ||
ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) ||
ParseStringFlag(argv[i], "benchmark_min_time",
&FLAGS_benchmark_min_time) ||
ParseDoubleFlag(argv[i], "benchmark_min_warmup_time",
&FLAGS_benchmark_min_warmup_time) ||
ParseInt32Flag(argv[i], "benchmark_repetitions",
&FLAGS_benchmark_repetitions) ||
ParseBoolFlag(argv[i], "benchmark_dry_run", &FLAGS_benchmark_dry_run) ||
ParseBoolFlag(argv[i], "benchmark_enable_random_interleaving",
&FLAGS_benchmark_enable_random_interleaving) ||
ParseBoolFlag(argv[i], "benchmark_report_aggregates_only",
&FLAGS_benchmark_report_aggregates_only) ||
ParseBoolFlag(argv[i], "benchmark_display_aggregates_only",
&FLAGS_benchmark_display_aggregates_only) ||
ParseStringFlag(argv[i], "benchmark_format", &FLAGS_benchmark_format) ||
ParseStringFlag(argv[i], "benchmark_out", &FLAGS_benchmark_out) ||
ParseStringFlag(argv[i], "benchmark_out_format",
&FLAGS_benchmark_out_format) ||
ParseStringFlag(argv[i], "benchmark_color", &FLAGS_benchmark_color) ||
ParseBoolFlag(argv[i], "benchmark_counters_tabular",
&FLAGS_benchmark_counters_tabular) ||
ParseStringFlag(argv[i], "benchmark_perf_counters",
&FLAGS_benchmark_perf_counters) ||
ParseKeyValueFlag(argv[i], "benchmark_context",
&FLAGS_benchmark_context) ||
ParseStringFlag(argv[i], "benchmark_time_unit",
&FLAGS_benchmark_time_unit) ||
ParseInt32Flag(argv[i], "v", &FLAGS_v)) {
for (int j = i; j != *argc - 1; ++j) {
argv[j] = argv[j + 1];
}
--(*argc);
--i;
} else if (IsFlag(argv[i], "help")) {
PrintUsageAndExit();
}
}
for (auto const* flag :
{&FLAGS_benchmark_format, &FLAGS_benchmark_out_format}) {
if (*flag != "console" && *flag != "json" && *flag != "csv") {
PrintUsageAndExit();
}
}
SetDefaultTimeUnitFromFlag(FLAGS_benchmark_time_unit);
if (FLAGS_benchmark_color.empty()) {
PrintUsageAndExit();
}
if (FLAGS_benchmark_dry_run) {
AddCustomContext("dry_run", "true");
}
for (const auto& kv : FLAGS_benchmark_context) {
AddCustomContext(kv.first, kv.second);
}
}
int InitializeStreams() {
static std::ios_base::Init init;
return 0;
}
template <typename T>
std::make_unsigned_t<T> get_as_unsigned(T v) {
using UnsignedT = std::make_unsigned_t<T>;
return static_cast<UnsignedT>(v);
}
} // end namespace internal
void MaybeReenterWithoutASLR(int /*argc*/, char** argv) {
// On e.g. Hexagon simulator, argv may be NULL.
if (!argv) return;
#ifdef BENCHMARK_OS_LINUX
const auto curr_personality = personality(0xffffffff);
// We should never fail to read-only query the current personality,
// but let's be cautious.
if (curr_personality == -1) return;
// If ASLR is already disabled, we have nothing more to do.
if (internal::get_as_unsigned(curr_personality) & ADDR_NO_RANDOMIZE) return;
// Try to change the personality to disable ASLR.
const auto proposed_personality =
internal::get_as_unsigned(curr_personality) | ADDR_NO_RANDOMIZE;
const auto prev_personality = personality(proposed_personality);
// Have we failed to change the personality? That may happen.
if (prev_personality == -1) return;
// Make sure the parsona has been updated with the no-ASLR flag,
// otherwise we will try to reenter infinitely.
// This seems impossible, but can happen in some docker configurations.
const auto new_personality = personality(0xffffffff);
if ((internal::get_as_unsigned(new_personality) & ADDR_NO_RANDOMIZE) == 0)
return;
execv(argv[0], argv);
// The exec() functions return only if an error has occurred,
// in which case we want to just continue as-is.
#else
return;
#endif
}
std::string GetBenchmarkVersion() {
#ifdef BENCHMARK_VERSION
return {BENCHMARK_VERSION};
#else
return {""};
#endif
}
void PrintDefaultHelp() {
fprintf(stdout,
"benchmark"
" [--benchmark_list_tests={true|false}]\n"
" [--benchmark_filter=<regex>]\n"
" [--benchmark_min_time=`<integer>x` OR `<float>s` ]\n"
" [--benchmark_min_warmup_time=<min_warmup_time>]\n"
" [--benchmark_repetitions=<num_repetitions>]\n"
" [--benchmark_dry_run={true|false}]\n"
" [--benchmark_enable_random_interleaving={true|false}]\n"
" [--benchmark_report_aggregates_only={true|false}]\n"
" [--benchmark_display_aggregates_only={true|false}]\n"
" [--benchmark_format=<console|json|csv>]\n"
" [--benchmark_out=<filename>]\n"
" [--benchmark_out_format=<json|console|csv>]\n"
" [--benchmark_color={auto|true|false}]\n"
" [--benchmark_counters_tabular={true|false}]\n"
#if defined HAVE_LIBPFM
" [--benchmark_perf_counters=<counter>,...]\n"
#endif
" [--benchmark_context=<key>=<value>,...]\n"
" [--benchmark_time_unit={ns|us|ms|s}]\n"
" [--v=<verbosity>]\n");
}
void Initialize(int* argc, char** argv, void (*HelperPrintf)()) {
internal::HelperPrintf = HelperPrintf;
internal::ParseCommandLineFlags(argc, argv);
internal::LogLevel() = FLAGS_v;
}
void Shutdown() { delete internal::global_context; }
bool ReportUnrecognizedArguments(int argc, char** argv) {
for (int i = 1; i < argc; ++i) {
fprintf(stderr, "%s: error: unrecognized command-line flag: %s\n", argv[0],
argv[i]);
}
return argc > 1;
}
} // end namespace benchmark

View File

@@ -0,0 +1,118 @@
#include "benchmark_api_internal.h"
#include <cinttypes>
#include "string_util.h"
namespace benchmark {
namespace internal {
BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, int family_idx,
int per_family_instance_idx,
const std::vector<int64_t>& args,
int thread_count)
: benchmark_(*benchmark),
family_index_(family_idx),
per_family_instance_index_(per_family_instance_idx),
aggregation_report_mode_(benchmark_.aggregation_report_mode_),
args_(args),
time_unit_(benchmark_.GetTimeUnit()),
measure_process_cpu_time_(benchmark_.measure_process_cpu_time_),
use_real_time_(benchmark_.use_real_time_),
use_manual_time_(benchmark_.use_manual_time_),
complexity_(benchmark_.complexity_),
complexity_lambda_(benchmark_.complexity_lambda_),
statistics_(benchmark_.statistics_),
repetitions_(benchmark_.repetitions_),
min_time_(benchmark_.min_time_),
min_warmup_time_(benchmark_.min_warmup_time_),
iterations_(benchmark_.iterations_),
threads_(thread_count),
setup_(benchmark_.setup_),
teardown_(benchmark_.teardown_) {
name_.function_name = benchmark_.name_;
size_t arg_i = 0;
for (const auto& arg : args) {
if (!name_.args.empty()) {
name_.args += '/';
}
if (arg_i < benchmark->arg_names_.size()) {
const auto& arg_name = benchmark_.arg_names_[arg_i];
if (!arg_name.empty()) {
name_.args += StrFormat("%s:", arg_name.c_str());
}
}
name_.args += StrFormat("%" PRId64, arg);
++arg_i;
}
if (!IsZero(benchmark->min_time_)) {
name_.min_time = StrFormat("min_time:%0.3f", benchmark_.min_time_);
}
if (!IsZero(benchmark->min_warmup_time_)) {
name_.min_warmup_time =
StrFormat("min_warmup_time:%0.3f", benchmark_.min_warmup_time_);
}
if (benchmark_.iterations_ != 0) {
name_.iterations = StrFormat(
"iterations:%lu", static_cast<unsigned long>(benchmark_.iterations_));
}
if (benchmark_.repetitions_ != 0) {
name_.repetitions = StrFormat("repeats:%d", benchmark_.repetitions_);
}
if (benchmark_.measure_process_cpu_time_) {
name_.time_type = "process_time";
}
if (benchmark_.use_manual_time_) {
if (!name_.time_type.empty()) {
name_.time_type += '/';
}
name_.time_type += "manual_time";
} else if (benchmark_.use_real_time_) {
if (!name_.time_type.empty()) {
name_.time_type += '/';
}
name_.time_type += "real_time";
}
if (!benchmark_.thread_counts_.empty()) {
name_.threads = StrFormat("threads:%d", threads_);
}
}
State BenchmarkInstance::Run(
IterationCount iters, int thread_id, internal::ThreadTimer* timer,
internal::ThreadManager* manager,
internal::PerfCountersMeasurement* perf_counters_measurement,
ProfilerManager* profiler_manager) const {
State st(name_.function_name, iters, args_, thread_id, threads_, timer,
manager, perf_counters_measurement, profiler_manager);
benchmark_.Run(st);
return st;
}
void BenchmarkInstance::Setup() const {
if (setup_ != nullptr) {
State st(name_.function_name, /*iters*/ 1, args_, /*thread_id*/ 0, threads_,
nullptr, nullptr, nullptr, nullptr);
setup_(st);
}
}
void BenchmarkInstance::Teardown() const {
if (teardown_ != nullptr) {
State st(name_.function_name, /*iters*/ 1, args_, /*thread_id*/ 0, threads_,
nullptr, nullptr, nullptr, nullptr);
teardown_(st);
}
}
} // namespace internal
} // namespace benchmark

View File

@@ -0,0 +1,90 @@
#ifndef BENCHMARK_API_INTERNAL_H
#define BENCHMARK_API_INTERNAL_H
#include <cmath>
#include <iosfwd>
#include <limits>
#include <memory>
#include <string>
#include <vector>
#include "benchmark/benchmark.h"
#include "commandlineflags.h"
namespace benchmark {
namespace internal {
// Information kept per benchmark we may want to run
class BenchmarkInstance {
public:
BenchmarkInstance(Benchmark* benchmark, int family_idx,
int per_family_instance_idx,
const std::vector<int64_t>& args, int thread_count);
const BenchmarkName& name() const { return name_; }
int family_index() const { return family_index_; }
int per_family_instance_index() const { return per_family_instance_index_; }
AggregationReportMode aggregation_report_mode() const {
return aggregation_report_mode_;
}
TimeUnit time_unit() const { return time_unit_; }
bool measure_process_cpu_time() const { return measure_process_cpu_time_; }
bool use_real_time() const { return use_real_time_; }
bool use_manual_time() const { return use_manual_time_; }
BigO complexity() const { return complexity_; }
BigOFunc* complexity_lambda() const { return complexity_lambda_; }
const std::vector<Statistics>& statistics() const { return statistics_; }
int repetitions() const { return repetitions_; }
double min_time() const { return min_time_; }
double min_warmup_time() const { return min_warmup_time_; }
IterationCount iterations() const { return iterations_; }
int threads() const { return threads_; }
void Setup() const;
void Teardown() const;
const auto& GetUserThreadRunnerFactory() const {
return benchmark_.threadrunner_;
}
State Run(IterationCount iters, int thread_id, internal::ThreadTimer* timer,
internal::ThreadManager* manager,
internal::PerfCountersMeasurement* perf_counters_measurement,
ProfilerManager* profiler_manager) const;
private:
BenchmarkName name_;
Benchmark& benchmark_;
const int family_index_;
const int per_family_instance_index_;
AggregationReportMode aggregation_report_mode_;
const std::vector<int64_t>& args_;
TimeUnit time_unit_;
bool measure_process_cpu_time_;
bool use_real_time_;
bool use_manual_time_;
BigO complexity_;
BigOFunc* complexity_lambda_;
UserCounters counters_;
const std::vector<Statistics>& statistics_;
int repetitions_;
double min_time_;
double min_warmup_time_;
IterationCount iterations_;
int threads_; // Number of concurrent threads to us
callback_function setup_;
callback_function teardown_;
};
bool FindBenchmarksInternal(const std::string& re,
std::vector<BenchmarkInstance>* benchmarks,
std::ostream* Err);
bool IsZero(double n);
BENCHMARK_EXPORT
ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color = false);
} // end namespace internal
} // end namespace benchmark
#endif // BENCHMARK_API_INTERNAL_H

View File

@@ -0,0 +1,59 @@
// Copyright 2015 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <benchmark/benchmark.h>
namespace benchmark {
namespace {
// Compute the total size of a pack of std::strings
size_t size_impl() { return 0; }
template <typename Head, typename... Tail>
size_t size_impl(const Head& head, const Tail&... tail) {
return head.size() + size_impl(tail...);
}
// Join a pack of std::strings using a delimiter
// TODO(dominic): use absl::StrJoin
void join_impl(std::string& /*unused*/, char /*unused*/) {}
template <typename Head, typename... Tail>
void join_impl(std::string& s, const char delimiter, const Head& head,
const Tail&... tail) {
if (!s.empty() && !head.empty()) {
s += delimiter;
}
s += head;
join_impl(s, delimiter, tail...);
}
template <typename... Ts>
std::string join(char delimiter, const Ts&... ts) {
std::string s;
s.reserve(sizeof...(Ts) + size_impl(ts...));
join_impl(s, delimiter, ts...);
return s;
}
} // namespace
BENCHMARK_EXPORT
std::string BenchmarkName::str() const {
return join('/', function_name, args, min_time, min_warmup_time, iterations,
repetitions, time_type, threads);
}
} // namespace benchmark

View File

@@ -0,0 +1,544 @@
// Copyright 2015 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "benchmark_register.h"
#ifndef BENCHMARK_OS_WINDOWS
#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT)
#include <sys/resource.h>
#endif
#include <sys/time.h>
#include <unistd.h>
#endif
#include <algorithm>
#include <atomic>
#include <cinttypes>
#include <condition_variable>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <iostream>
#include <memory>
#include <numeric>
#include <sstream>
#include <thread>
#include "benchmark/benchmark.h"
#include "benchmark_api_internal.h"
#include "check.h"
#include "commandlineflags.h"
#include "complexity.h"
#include "internal_macros.h"
#include "log.h"
#include "mutex.h"
#include "re.h"
#include "statistics.h"
#include "string_util.h"
#include "timers.h"
namespace benchmark {
namespace {
// For non-dense Range, intermediate values are powers of kRangeMultiplier.
constexpr int kRangeMultiplier = 8;
// The size of a benchmark family determines is the number of inputs to repeat
// the benchmark on. If this is "large" then warn the user during configuration.
constexpr size_t kMaxFamilySize = 100;
constexpr char kDisabledPrefix[] = "DISABLED_";
} // end namespace
namespace internal {
//=============================================================================//
// BenchmarkFamilies
//=============================================================================//
// Class for managing registered benchmarks. Note that each registered
// benchmark identifies a family of related benchmarks to run.
class BenchmarkFamilies {
public:
static BenchmarkFamilies* GetInstance();
// Registers a benchmark family and returns the index assigned to it.
size_t AddBenchmark(std::unique_ptr<Benchmark> family);
// Clear all registered benchmark families.
void ClearBenchmarks();
// Extract the list of benchmark instances that match the specified
// regular expression.
bool FindBenchmarks(std::string spec,
std::vector<BenchmarkInstance>* benchmarks,
std::ostream* Err);
private:
BenchmarkFamilies() {}
std::vector<std::unique_ptr<Benchmark>> families_;
Mutex mutex_;
};
BenchmarkFamilies* BenchmarkFamilies::GetInstance() {
static BenchmarkFamilies instance;
return &instance;
}
size_t BenchmarkFamilies::AddBenchmark(std::unique_ptr<Benchmark> family) {
MutexLock l(mutex_);
size_t index = families_.size();
families_.push_back(std::move(family));
return index;
}
void BenchmarkFamilies::ClearBenchmarks() {
MutexLock l(mutex_);
families_.clear();
families_.shrink_to_fit();
}
bool BenchmarkFamilies::FindBenchmarks(
std::string spec, std::vector<BenchmarkInstance>* benchmarks,
std::ostream* ErrStream) {
BM_CHECK(ErrStream);
auto& Err = *ErrStream;
// Make regular expression out of command-line flag
std::string error_msg;
Regex re;
bool is_negative_filter = false;
if (spec[0] == '-') {
spec.replace(0, 1, "");
is_negative_filter = true;
}
if (!re.Init(spec, &error_msg)) {
Err << "Could not compile benchmark re: " << error_msg << '\n';
return false;
}
// Special list of thread counts to use when none are specified
const std::vector<int> one_thread = {1};
int next_family_index = 0;
MutexLock l(mutex_);
for (std::unique_ptr<Benchmark>& family : families_) {
int family_index = next_family_index;
int per_family_instance_index = 0;
// Family was deleted or benchmark doesn't match
if (!family) {
continue;
}
if (family->ArgsCnt() == -1) {
family->Args({});
}
const std::vector<int>* thread_counts =
(family->thread_counts_.empty()
? &one_thread
: &static_cast<const std::vector<int>&>(family->thread_counts_));
const size_t family_size = family->args_.size() * thread_counts->size();
// The benchmark will be run at least 'family_size' different inputs.
// If 'family_size' is very large warn the user.
if (family_size > kMaxFamilySize) {
Err << "The number of inputs is very large. " << family->name_
<< " will be repeated at least " << family_size << " times.\n";
}
// reserve in the special case the regex ".", since we know the final
// family size. this doesn't take into account any disabled benchmarks
// so worst case we reserve more than we need.
if (spec == ".") {
benchmarks->reserve(benchmarks->size() + family_size);
}
for (auto const& args : family->args_) {
for (int num_threads : *thread_counts) {
BenchmarkInstance instance(family.get(), family_index,
per_family_instance_index, args,
num_threads);
const auto full_name = instance.name().str();
if (full_name.rfind(kDisabledPrefix, 0) != 0 &&
((re.Match(full_name) && !is_negative_filter) ||
(!re.Match(full_name) && is_negative_filter))) {
benchmarks->push_back(std::move(instance));
++per_family_instance_index;
// Only bump the next family index once we've estabilished that
// at least one instance of this family will be run.
if (next_family_index == family_index) {
++next_family_index;
}
}
}
}
}
return true;
}
Benchmark* RegisterBenchmarkInternal(std::unique_ptr<Benchmark> bench) {
Benchmark* bench_ptr = bench.get();
BenchmarkFamilies* families = BenchmarkFamilies::GetInstance();
families->AddBenchmark(std::move(bench));
return bench_ptr;
}
// FIXME: This function is a hack so that benchmark.cc can access
// `BenchmarkFamilies`
bool FindBenchmarksInternal(const std::string& re,
std::vector<BenchmarkInstance>* benchmarks,
std::ostream* Err) {
return BenchmarkFamilies::GetInstance()->FindBenchmarks(re, benchmarks, Err);
}
//=============================================================================//
// Benchmark
//=============================================================================//
Benchmark::Benchmark(const std::string& name)
: name_(name),
aggregation_report_mode_(ARM_Unspecified),
time_unit_(GetDefaultTimeUnit()),
use_default_time_unit_(true),
range_multiplier_(kRangeMultiplier),
min_time_(0),
min_warmup_time_(0),
iterations_(0),
repetitions_(0),
measure_process_cpu_time_(false),
use_real_time_(false),
use_manual_time_(false),
complexity_(oNone),
complexity_lambda_(nullptr) {
ComputeStatistics("mean", StatisticsMean);
ComputeStatistics("median", StatisticsMedian);
ComputeStatistics("stddev", StatisticsStdDev);
ComputeStatistics("cv", StatisticsCV, kPercentage);
}
Benchmark::~Benchmark() {}
Benchmark* Benchmark::Name(const std::string& name) {
SetName(name);
return this;
}
Benchmark* Benchmark::Arg(int64_t x) {
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
args_.push_back({x});
return this;
}
Benchmark* Benchmark::Unit(TimeUnit unit) {
time_unit_ = unit;
use_default_time_unit_ = false;
return this;
}
Benchmark* Benchmark::Range(int64_t start, int64_t limit) {
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
std::vector<int64_t> arglist;
AddRange(&arglist, start, limit, range_multiplier_);
for (int64_t i : arglist) {
args_.push_back({i});
}
return this;
}
Benchmark* Benchmark::Ranges(
const std::vector<std::pair<int64_t, int64_t>>& ranges) {
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(ranges.size()));
std::vector<std::vector<int64_t>> arglists(ranges.size());
for (std::size_t i = 0; i < ranges.size(); i++) {
AddRange(&arglists[i], ranges[i].first, ranges[i].second,
range_multiplier_);
}
ArgsProduct(arglists);
return this;
}
Benchmark* Benchmark::ArgsProduct(
const std::vector<std::vector<int64_t>>& arglists) {
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(arglists.size()));
std::vector<std::size_t> indices(arglists.size());
const std::size_t total = std::accumulate(
std::begin(arglists), std::end(arglists), std::size_t{1},
[](const std::size_t res, const std::vector<int64_t>& arglist) {
return res * arglist.size();
});
std::vector<int64_t> args;
args.reserve(arglists.size());
for (std::size_t i = 0; i < total; i++) {
for (std::size_t arg = 0; arg < arglists.size(); arg++) {
args.push_back(arglists[arg][indices[arg]]);
}
args_.push_back(args);
args.clear();
std::size_t arg = 0;
do {
indices[arg] = (indices[arg] + 1) % arglists[arg].size();
} while (indices[arg++] == 0 && arg < arglists.size());
}
return this;
}
Benchmark* Benchmark::ArgName(const std::string& name) {
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
arg_names_ = {name};
return this;
}
Benchmark* Benchmark::ArgNames(const std::vector<std::string>& names) {
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(names.size()));
arg_names_ = names;
return this;
}
Benchmark* Benchmark::DenseRange(int64_t start, int64_t limit, int step) {
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
BM_CHECK_LE(start, limit);
for (int64_t arg = start; arg <= limit; arg += step) {
args_.push_back({arg});
}
return this;
}
Benchmark* Benchmark::Args(const std::vector<int64_t>& args) {
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(args.size()));
args_.push_back(args);
return this;
}
Benchmark* Benchmark::Apply(void (*custom_arguments)(Benchmark* benchmark)) {
custom_arguments(this);
return this;
}
Benchmark* Benchmark::Setup(callback_function&& setup) {
BM_CHECK(setup != nullptr);
setup_ = std::forward<callback_function>(setup);
return this;
}
Benchmark* Benchmark::Setup(const callback_function& setup) {
BM_CHECK(setup != nullptr);
setup_ = setup;
return this;
}
Benchmark* Benchmark::Teardown(callback_function&& teardown) {
BM_CHECK(teardown != nullptr);
teardown_ = std::forward<callback_function>(teardown);
return this;
}
Benchmark* Benchmark::Teardown(const callback_function& teardown) {
BM_CHECK(teardown != nullptr);
teardown_ = teardown;
return this;
}
Benchmark* Benchmark::RangeMultiplier(int multiplier) {
BM_CHECK(multiplier > 1);
range_multiplier_ = multiplier;
return this;
}
Benchmark* Benchmark::MinTime(double t) {
BM_CHECK(t > 0.0);
BM_CHECK(iterations_ == 0);
min_time_ = t;
return this;
}
Benchmark* Benchmark::MinWarmUpTime(double t) {
BM_CHECK(t >= 0.0);
BM_CHECK(iterations_ == 0);
min_warmup_time_ = t;
return this;
}
Benchmark* Benchmark::Iterations(IterationCount n) {
BM_CHECK(n > 0);
BM_CHECK(IsZero(min_time_));
BM_CHECK(IsZero(min_warmup_time_));
iterations_ = n;
return this;
}
Benchmark* Benchmark::Repetitions(int n) {
BM_CHECK(n > 0);
repetitions_ = n;
return this;
}
Benchmark* Benchmark::ReportAggregatesOnly(bool value) {
aggregation_report_mode_ = value ? ARM_ReportAggregatesOnly : ARM_Default;
return this;
}
Benchmark* Benchmark::DisplayAggregatesOnly(bool value) {
// If we were called, the report mode is no longer 'unspecified', in any case.
aggregation_report_mode_ = static_cast<AggregationReportMode>(
aggregation_report_mode_ | ARM_Default);
if (value) {
aggregation_report_mode_ = static_cast<AggregationReportMode>(
aggregation_report_mode_ | ARM_DisplayReportAggregatesOnly);
} else {
aggregation_report_mode_ = static_cast<AggregationReportMode>(
aggregation_report_mode_ & ~ARM_DisplayReportAggregatesOnly);
}
return this;
}
Benchmark* Benchmark::MeasureProcessCPUTime() {
// Can be used together with UseRealTime() / UseManualTime().
measure_process_cpu_time_ = true;
return this;
}
Benchmark* Benchmark::UseRealTime() {
BM_CHECK(!use_manual_time_)
<< "Cannot set UseRealTime and UseManualTime simultaneously.";
use_real_time_ = true;
return this;
}
Benchmark* Benchmark::UseManualTime() {
BM_CHECK(!use_real_time_)
<< "Cannot set UseRealTime and UseManualTime simultaneously.";
use_manual_time_ = true;
return this;
}
Benchmark* Benchmark::Complexity(BigO complexity) {
complexity_ = complexity;
return this;
}
Benchmark* Benchmark::Complexity(BigOFunc* complexity) {
complexity_lambda_ = complexity;
complexity_ = oLambda;
return this;
}
Benchmark* Benchmark::ComputeStatistics(const std::string& name,
StatisticsFunc* statistics,
StatisticUnit unit) {
statistics_.emplace_back(name, statistics, unit);
return this;
}
Benchmark* Benchmark::Threads(int t) {
BM_CHECK_GT(t, 0);
thread_counts_.push_back(t);
return this;
}
Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) {
BM_CHECK_GT(min_threads, 0);
BM_CHECK_GE(max_threads, min_threads);
AddRange(&thread_counts_, min_threads, max_threads, 2);
return this;
}
Benchmark* Benchmark::DenseThreadRange(int min_threads, int max_threads,
int stride) {
BM_CHECK_GT(min_threads, 0);
BM_CHECK_GE(max_threads, min_threads);
BM_CHECK_GE(stride, 1);
for (auto i = min_threads; i < max_threads; i += stride) {
thread_counts_.push_back(i);
}
thread_counts_.push_back(max_threads);
return this;
}
Benchmark* Benchmark::ThreadPerCpu() {
thread_counts_.push_back(CPUInfo::Get().num_cpus);
return this;
}
Benchmark* Benchmark::ThreadRunner(threadrunner_factory&& factory) {
threadrunner_ = std::move(factory);
return this;
}
void Benchmark::SetName(const std::string& name) { name_ = name; }
const char* Benchmark::GetName() const { return name_.c_str(); }
int Benchmark::ArgsCnt() const {
if (args_.empty()) {
if (arg_names_.empty()) {
return -1;
}
return static_cast<int>(arg_names_.size());
}
return static_cast<int>(args_.front().size());
}
const char* Benchmark::GetArgName(int arg) const {
BM_CHECK_GE(arg, 0);
size_t uarg = static_cast<size_t>(arg);
BM_CHECK_LT(uarg, arg_names_.size());
return arg_names_[uarg].c_str();
}
TimeUnit Benchmark::GetTimeUnit() const {
return use_default_time_unit_ ? GetDefaultTimeUnit() : time_unit_;
}
//=============================================================================//
// FunctionBenchmark
//=============================================================================//
void FunctionBenchmark::Run(State& st) { func_(st); }
} // end namespace internal
void ClearRegisteredBenchmarks() {
internal::BenchmarkFamilies::GetInstance()->ClearBenchmarks();
}
std::vector<int64_t> CreateRange(int64_t lo, int64_t hi, int multi) {
std::vector<int64_t> args;
internal::AddRange(&args, lo, hi, multi);
return args;
}
std::vector<int64_t> CreateDenseRange(int64_t start, int64_t limit, int step) {
BM_CHECK_LE(start, limit);
std::vector<int64_t> args;
for (int64_t arg = start; arg <= limit; arg += step) {
args.push_back(arg);
}
return args;
}
} // end namespace benchmark

View File

@@ -0,0 +1,109 @@
#ifndef BENCHMARK_REGISTER_H
#define BENCHMARK_REGISTER_H
#include <algorithm>
#include <limits>
#include <vector>
#include "check.h"
namespace benchmark {
namespace internal {
// Append the powers of 'mult' in the closed interval [lo, hi].
// Returns iterator to the start of the inserted range.
template <typename T>
typename std::vector<T>::iterator AddPowers(std::vector<T>* dst, T lo, T hi,
int mult) {
BM_CHECK_GE(lo, 0);
BM_CHECK_GE(hi, lo);
BM_CHECK_GE(mult, 2);
const size_t start_offset = dst->size();
static const T kmax = std::numeric_limits<T>::max();
// Space out the values in multiples of "mult"
for (T i = static_cast<T>(1); i <= hi; i = static_cast<T>(i * mult)) {
if (i >= lo) {
dst->push_back(i);
}
// Break the loop here since multiplying by
// 'mult' would move outside of the range of T
if (i > kmax / mult) break;
}
return dst->begin() + static_cast<int>(start_offset);
}
template <typename T>
void AddNegatedPowers(std::vector<T>* dst, T lo, T hi, int mult) {
// We negate lo and hi so we require that they cannot be equal to 'min'.
BM_CHECK_GT(lo, std::numeric_limits<T>::min());
BM_CHECK_GT(hi, std::numeric_limits<T>::min());
BM_CHECK_GE(hi, lo);
BM_CHECK_LE(hi, 0);
// Add positive powers, then negate and reverse.
// Casts necessary since small integers get promoted
// to 'int' when negating.
const auto lo_complement = static_cast<T>(-lo);
const auto hi_complement = static_cast<T>(-hi);
const auto it = AddPowers(dst, hi_complement, lo_complement, mult);
std::for_each(it, dst->end(), [](T& t) { t = static_cast<T>(t * -1); });
std::reverse(it, dst->end());
}
template <typename T>
void AddRange(std::vector<T>* dst, T lo, T hi, int mult) {
static_assert(std::is_integral<T>::value && std::is_signed<T>::value,
"Args type must be a signed integer");
BM_CHECK_GE(hi, lo);
BM_CHECK_GE(mult, 2);
// Add "lo"
dst->push_back(lo);
// Handle lo == hi as a special case, so we then know
// lo < hi and so it is safe to add 1 to lo and subtract 1
// from hi without falling outside of the range of T.
if (lo == hi) return;
// Ensure that lo_inner <= hi_inner below.
if (lo + 1 == hi) {
dst->push_back(hi);
return;
}
// Add all powers of 'mult' in the range [lo+1, hi-1] (inclusive).
const auto lo_inner = static_cast<T>(lo + 1);
const auto hi_inner = static_cast<T>(hi - 1);
// Insert negative values
if (lo_inner < 0) {
AddNegatedPowers(dst, lo_inner, std::min(hi_inner, T{-1}), mult);
}
// Treat 0 as a special case (see discussion on #762).
if (lo < 0 && hi >= 0) {
dst->push_back(0);
}
// Insert positive values
if (hi_inner > 0) {
AddPowers(dst, std::max(lo_inner, T{1}), hi_inner, mult);
}
// Add "hi" (if different from last value).
if (hi != dst->back()) {
dst->push_back(hi);
}
}
} // namespace internal
} // namespace benchmark
#endif // BENCHMARK_REGISTER_H

View File

@@ -0,0 +1,573 @@
// Copyright 2015 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "benchmark_runner.h"
#include "benchmark/benchmark.h"
#include "benchmark_api_internal.h"
#include "internal_macros.h"
#ifndef BENCHMARK_OS_WINDOWS
#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT)
#include <sys/resource.h>
#endif
#include <sys/time.h>
#include <unistd.h>
#endif
#include <algorithm>
#include <atomic>
#include <climits>
#include <cmath>
#include <condition_variable>
#include <cstdio>
#include <cstdlib>
#include <fstream>
#include <functional>
#include <iostream>
#include <limits>
#include <memory>
#include <string>
#include <thread>
#include <utility>
#include "check.h"
#include "colorprint.h"
#include "commandlineflags.h"
#include "complexity.h"
#include "counter.h"
#include "log.h"
#include "mutex.h"
#include "perf_counters.h"
#include "re.h"
#include "statistics.h"
#include "string_util.h"
#include "thread_manager.h"
#include "thread_timer.h"
namespace benchmark {
BM_DECLARE_bool(benchmark_dry_run);
BM_DECLARE_string(benchmark_min_time);
BM_DECLARE_double(benchmark_min_warmup_time);
BM_DECLARE_int32(benchmark_repetitions);
BM_DECLARE_bool(benchmark_report_aggregates_only);
BM_DECLARE_bool(benchmark_display_aggregates_only);
BM_DECLARE_string(benchmark_perf_counters);
namespace internal {
MemoryManager* memory_manager = nullptr;
ProfilerManager* profiler_manager = nullptr;
namespace {
constexpr IterationCount kMaxIterations = 1000000000000;
const double kDefaultMinTime =
std::strtod(::benchmark::kDefaultMinTimeStr, /*p_end*/ nullptr);
BenchmarkReporter::Run CreateRunReport(
const benchmark::internal::BenchmarkInstance& b,
const internal::ThreadManager::Result& results,
IterationCount memory_iterations,
const MemoryManager::Result& memory_result, double seconds,
int64_t repetition_index, int64_t repeats) {
// Create report about this benchmark run.
BenchmarkReporter::Run report;
report.run_name = b.name();
report.family_index = b.family_index();
report.per_family_instance_index = b.per_family_instance_index();
report.skipped = results.skipped_;
report.skip_message = results.skip_message_;
report.report_label = results.report_label_;
// This is the total iterations across all threads.
report.iterations = results.iterations;
report.time_unit = b.time_unit();
report.threads = b.threads();
report.repetition_index = repetition_index;
report.repetitions = repeats;
if (report.skipped == 0u) {
if (b.use_manual_time()) {
report.real_accumulated_time = results.manual_time_used;
} else {
report.real_accumulated_time = results.real_time_used;
}
report.use_real_time_for_initial_big_o = b.use_manual_time();
report.cpu_accumulated_time = results.cpu_time_used;
report.complexity_n = results.complexity_n;
report.complexity = b.complexity();
report.complexity_lambda = b.complexity_lambda();
report.statistics = &b.statistics();
report.counters = results.counters;
if (memory_iterations > 0) {
report.memory_result = memory_result;
report.allocs_per_iter =
memory_iterations != 0
? static_cast<double>(memory_result.num_allocs) /
static_cast<double>(memory_iterations)
: 0;
}
internal::Finish(&report.counters, results.iterations, seconds,
b.threads());
}
return report;
}
// Execute one thread of benchmark b for the specified number of iterations.
// Adds the stats collected for the thread into manager->results.
void RunInThread(const BenchmarkInstance* b, IterationCount iters,
int thread_id, ThreadManager* manager,
PerfCountersMeasurement* perf_counters_measurement,
ProfilerManager* profiler_manager_) {
internal::ThreadTimer timer(
b->measure_process_cpu_time()
? internal::ThreadTimer::CreateProcessCpuTime()
: internal::ThreadTimer::Create());
State st = b->Run(iters, thread_id, &timer, manager,
perf_counters_measurement, profiler_manager_);
if (!(st.skipped() || st.iterations() >= st.max_iterations)) {
st.SkipWithError(
"The benchmark didn't run, nor was it explicitly skipped. Please call "
"'SkipWithXXX` in your benchmark as appropriate.");
}
{
MutexLock l(manager->GetBenchmarkMutex());
internal::ThreadManager::Result& results = manager->results;
results.iterations += st.iterations();
results.cpu_time_used += timer.cpu_time_used();
results.real_time_used += timer.real_time_used();
results.manual_time_used += timer.manual_time_used();
results.complexity_n += st.complexity_length_n();
internal::Increment(&results.counters, st.counters);
}
manager->NotifyThreadComplete();
}
double ComputeMinTime(const benchmark::internal::BenchmarkInstance& b,
const BenchTimeType& iters_or_time) {
if (!IsZero(b.min_time())) {
return b.min_time();
}
// If the flag was used to specify number of iters, then return the default
// min_time.
if (iters_or_time.tag == BenchTimeType::ITERS) {
return kDefaultMinTime;
}
return iters_or_time.time;
}
IterationCount ComputeIters(const benchmark::internal::BenchmarkInstance& b,
const BenchTimeType& iters_or_time) {
if (b.iterations() != 0) {
return b.iterations();
}
// We've already concluded that this flag is currently used to pass
// iters but do a check here again anyway.
BM_CHECK(iters_or_time.tag == BenchTimeType::ITERS);
return iters_or_time.iters;
}
class ThreadRunnerDefault : public ThreadRunnerBase {
public:
explicit ThreadRunnerDefault(int num_threads)
: pool(static_cast<size_t>(num_threads - 1)) {}
void RunThreads(const std::function<void(int)>& fn) final {
// Run all but one thread in separate threads
for (std::size_t ti = 0; ti < pool.size(); ++ti) {
pool[ti] = std::thread(fn, static_cast<int>(ti + 1));
}
// And run one thread here directly.
// (If we were asked to run just one thread, we don't create new threads.)
// Yes, we need to do this here *after* we start the separate threads.
fn(0);
// The main thread has finished. Now let's wait for the other threads.
for (std::thread& thread : pool) {
thread.join();
}
}
private:
std::vector<std::thread> pool;
};
std::unique_ptr<ThreadRunnerBase> GetThreadRunner(
const threadrunner_factory& userThreadRunnerFactory, int num_threads) {
return userThreadRunnerFactory
? userThreadRunnerFactory(num_threads)
: std::make_unique<ThreadRunnerDefault>(num_threads);
}
} // end namespace
BenchTimeType ParseBenchMinTime(const std::string& value) {
BenchTimeType ret = {};
if (value.empty()) {
ret.tag = BenchTimeType::TIME;
ret.time = 0.0;
return ret;
}
if (value.back() == 'x') {
char* p_end = nullptr;
// Reset errno before it's changed by strtol.
errno = 0;
IterationCount num_iters = std::strtol(value.c_str(), &p_end, 10);
// After a valid parse, p_end should have been set to
// point to the 'x' suffix.
BM_CHECK(errno == 0 && p_end != nullptr && *p_end == 'x')
<< "Malformed iters value passed to --benchmark_min_time: `" << value
<< "`. Expected --benchmark_min_time=<integer>x.";
ret.tag = BenchTimeType::ITERS;
ret.iters = num_iters;
return ret;
}
bool has_suffix = value.back() == 's';
if (!has_suffix) {
BM_VLOG(0) << "Value passed to --benchmark_min_time should have a suffix. "
"Eg., `30s` for 30-seconds.";
}
char* p_end = nullptr;
// Reset errno before it's changed by strtod.
errno = 0;
double min_time = std::strtod(value.c_str(), &p_end);
// After a successful parse, p_end should point to the suffix 's',
// or the end of the string if the suffix was omitted.
BM_CHECK(errno == 0 && p_end != nullptr &&
((has_suffix && *p_end == 's') || *p_end == '\0'))
<< "Malformed seconds value passed to --benchmark_min_time: `" << value
<< "`. Expected --benchmark_min_time=<float>x.";
ret.tag = BenchTimeType::TIME;
ret.time = min_time;
return ret;
}
BenchmarkRunner::BenchmarkRunner(
const benchmark::internal::BenchmarkInstance& b_,
PerfCountersMeasurement* pcm_,
BenchmarkReporter::PerFamilyRunReports* reports_for_family_)
: b(b_),
reports_for_family(reports_for_family_),
parsed_benchtime_flag(ParseBenchMinTime(FLAGS_benchmark_min_time)),
min_time(FLAGS_benchmark_dry_run
? 0
: ComputeMinTime(b_, parsed_benchtime_flag)),
min_warmup_time(
FLAGS_benchmark_dry_run
? 0
: ((!IsZero(b.min_time()) && b.min_warmup_time() > 0.0)
? b.min_warmup_time()
: FLAGS_benchmark_min_warmup_time)),
warmup_done(FLAGS_benchmark_dry_run ? true : !(min_warmup_time > 0.0)),
repeats(FLAGS_benchmark_dry_run
? 1
: (b.repetitions() != 0 ? b.repetitions()
: FLAGS_benchmark_repetitions)),
has_explicit_iteration_count(b.iterations() != 0 ||
parsed_benchtime_flag.tag ==
BenchTimeType::ITERS),
thread_runner(
GetThreadRunner(b.GetUserThreadRunnerFactory(), b.threads())),
iters(FLAGS_benchmark_dry_run
? 1
: (has_explicit_iteration_count
? ComputeIters(b_, parsed_benchtime_flag)
: 1)),
perf_counters_measurement_ptr(pcm_) {
run_results.display_report_aggregates_only =
(FLAGS_benchmark_report_aggregates_only ||
FLAGS_benchmark_display_aggregates_only);
run_results.file_report_aggregates_only =
FLAGS_benchmark_report_aggregates_only;
if (b.aggregation_report_mode() != internal::ARM_Unspecified) {
run_results.display_report_aggregates_only =
((b.aggregation_report_mode() &
internal::ARM_DisplayReportAggregatesOnly) != 0u);
run_results.file_report_aggregates_only =
((b.aggregation_report_mode() &
internal::ARM_FileReportAggregatesOnly) != 0u);
BM_CHECK(FLAGS_benchmark_perf_counters.empty() ||
(perf_counters_measurement_ptr->num_counters() == 0))
<< "Perf counters were requested but could not be set up.";
}
}
BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() {
BM_VLOG(2) << "Running " << b.name().str() << " for " << iters << "\n";
std::unique_ptr<internal::ThreadManager> manager;
manager.reset(new internal::ThreadManager(b.threads()));
thread_runner->RunThreads([&](int thread_idx) {
RunInThread(&b, iters, thread_idx, manager.get(),
perf_counters_measurement_ptr, /*profiler_manager=*/nullptr);
});
IterationResults i;
// Acquire the measurements/counters from the manager, UNDER THE LOCK!
{
MutexLock l(manager->GetBenchmarkMutex());
i.results = manager->results;
}
// And get rid of the manager.
manager.reset();
BM_VLOG(2) << "Ran in " << i.results.cpu_time_used << "/"
<< i.results.real_time_used << "\n";
// By using KeepRunningBatch a benchmark can iterate more times than
// requested, so take the iteration count from i.results.
i.iters = i.results.iterations / b.threads();
// Base decisions off of real time if requested by this benchmark.
i.seconds = i.results.cpu_time_used;
if (b.use_manual_time()) {
i.seconds = i.results.manual_time_used;
} else if (b.use_real_time()) {
i.seconds = i.results.real_time_used;
}
return i;
}
IterationCount BenchmarkRunner::PredictNumItersNeeded(
const IterationResults& i) const {
// See how much iterations should be increased by.
// Note: Avoid division by zero with max(seconds, 1ns).
double multiplier = GetMinTimeToApply() * 1.4 / std::max(i.seconds, 1e-9);
// If our last run was at least 10% of FLAGS_benchmark_min_time then we
// use the multiplier directly.
// Otherwise we use at most 10 times expansion.
// NOTE: When the last run was at least 10% of the min time the max
// expansion should be 14x.
const bool is_significant = (i.seconds / GetMinTimeToApply()) > 0.1;
multiplier = is_significant ? multiplier : 10.0;
// So what seems to be the sufficiently-large iteration count? Round up.
const IterationCount max_next_iters = static_cast<IterationCount>(
std::llround(std::max(multiplier * static_cast<double>(i.iters),
static_cast<double>(i.iters) + 1.0)));
// But we do have *some* limits though..
const IterationCount next_iters = std::min(max_next_iters, kMaxIterations);
BM_VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n";
return next_iters; // round up before conversion to integer.
}
bool BenchmarkRunner::ShouldReportIterationResults(
const IterationResults& i) const {
// Determine if this run should be reported;
// Either it has run for a sufficient amount of time
// or because an error was reported.
return (i.results.skipped_ != 0u) || FLAGS_benchmark_dry_run ||
i.iters >= kMaxIterations || // Too many iterations already.
i.seconds >=
GetMinTimeToApply() || // The elapsed time is large enough.
// CPU time is specified but the elapsed real time greatly exceeds
// the minimum time.
// Note that user provided timers are except from this test.
((i.results.real_time_used >= 5 * GetMinTimeToApply()) &&
!b.use_manual_time());
}
double BenchmarkRunner::GetMinTimeToApply() const {
// In order to re-use functionality to run and measure benchmarks for running
// a warmup phase of the benchmark, we need a way of telling whether to apply
// min_time or min_warmup_time. This function will figure out if we are in the
// warmup phase and therefore need to apply min_warmup_time or if we already
// in the benchmarking phase and min_time needs to be applied.
return warmup_done ? min_time : min_warmup_time;
}
void BenchmarkRunner::FinishWarmUp(const IterationCount& i) {
warmup_done = true;
iters = i;
}
void BenchmarkRunner::RunWarmUp() {
// Use the same mechanisms for warming up the benchmark as used for actually
// running and measuring the benchmark.
IterationResults i_warmup;
// Dont use the iterations determined in the warmup phase for the actual
// measured benchmark phase. While this may be a good starting point for the
// benchmark and it would therefore get rid of the need to figure out how many
// iterations are needed if min_time is set again, this may also be a complete
// wrong guess since the warmup loops might be considerably slower (e.g
// because of caching effects).
const IterationCount i_backup = iters;
for (;;) {
b.Setup();
i_warmup = DoNIterations();
b.Teardown();
const bool finish = ShouldReportIterationResults(i_warmup);
if (finish) {
FinishWarmUp(i_backup);
break;
}
// Although we are running "only" a warmup phase where running enough
// iterations at once without measuring time isn't as important as it is for
// the benchmarking phase, we still do it the same way as otherwise it is
// very confusing for the user to know how to choose a proper value for
// min_warmup_time if a different approach on running it is used.
iters = PredictNumItersNeeded(i_warmup);
assert(iters > i_warmup.iters &&
"if we did more iterations than we want to do the next time, "
"then we should have accepted the current iteration run.");
}
}
MemoryManager::Result BenchmarkRunner::RunMemoryManager(
IterationCount memory_iterations) {
memory_manager->Start();
std::unique_ptr<internal::ThreadManager> manager;
manager.reset(new internal::ThreadManager(1));
b.Setup();
RunInThread(&b, memory_iterations, 0, manager.get(),
perf_counters_measurement_ptr,
/*profiler_manager=*/nullptr);
manager.reset();
b.Teardown();
MemoryManager::Result memory_result;
memory_manager->Stop(memory_result);
memory_result.memory_iterations = memory_iterations;
return memory_result;
}
void BenchmarkRunner::RunProfilerManager(IterationCount profile_iterations) {
std::unique_ptr<internal::ThreadManager> manager;
manager.reset(new internal::ThreadManager(1));
b.Setup();
RunInThread(&b, profile_iterations, 0, manager.get(),
/*perf_counters_measurement_ptr=*/nullptr,
/*profiler_manager=*/profiler_manager);
manager.reset();
b.Teardown();
}
void BenchmarkRunner::DoOneRepetition() {
assert(HasRepeatsRemaining() && "Already done all repetitions?");
const bool is_the_first_repetition = num_repetitions_done == 0;
// In case a warmup phase is requested by the benchmark, run it now.
// After running the warmup phase the BenchmarkRunner should be in a state as
// this warmup never happened except the fact that warmup_done is set. Every
// other manipulation of the BenchmarkRunner instance would be a bug! Please
// fix it.
if (!warmup_done) {
RunWarmUp();
}
IterationResults i;
// We *may* be gradually increasing the length (iteration count)
// of the benchmark until we decide the results are significant.
// And once we do, we report those last results and exit.
// Please do note that the if there are repetitions, the iteration count
// is *only* calculated for the *first* repetition, and other repetitions
// simply use that precomputed iteration count.
for (;;) {
b.Setup();
i = DoNIterations();
b.Teardown();
// Do we consider the results to be significant?
// If we are doing repetitions, and the first repetition was already done,
// it has calculated the correct iteration time, so we have run that very
// iteration count just now. No need to calculate anything. Just report.
// Else, the normal rules apply.
const bool results_are_significant = !is_the_first_repetition ||
has_explicit_iteration_count ||
ShouldReportIterationResults(i);
// Good, let's report them!
if (results_are_significant) {
break;
}
// Nope, bad iteration. Let's re-estimate the hopefully-sufficient
// iteration count, and run the benchmark again...
iters = PredictNumItersNeeded(i);
assert(iters > i.iters &&
"if we did more iterations than we want to do the next time, "
"then we should have accepted the current iteration run.");
}
// Produce memory measurements if requested.
MemoryManager::Result memory_result;
IterationCount memory_iterations = 0;
if (memory_manager != nullptr) {
// Only run a few iterations to reduce the impact of one-time
// allocations in benchmarks that are not properly managed.
memory_iterations = std::min<IterationCount>(16, iters);
memory_result = RunMemoryManager(memory_iterations);
}
if (profiler_manager != nullptr) {
// We want to externally profile the benchmark for the same number of
// iterations because, for example, if we're tracing the benchmark then we
// want trace data to reasonably match PMU data.
RunProfilerManager(iters);
}
// Ok, now actually report.
BenchmarkReporter::Run report =
CreateRunReport(b, i.results, memory_iterations, memory_result, i.seconds,
num_repetitions_done, repeats);
if (reports_for_family != nullptr) {
++reports_for_family->num_runs_done;
if (report.skipped == 0u) {
reports_for_family->Runs.push_back(report);
}
}
run_results.non_aggregates.push_back(report);
++num_repetitions_done;
}
RunResults&& BenchmarkRunner::GetResults() {
assert(!HasRepeatsRemaining() && "Did not run all repetitions yet?");
// Calculate additional statistics over the repetitions of this instance.
run_results.aggregates_only = ComputeStats(run_results.non_aggregates);
return std::move(run_results);
}
} // end namespace internal
} // end namespace benchmark

View File

@@ -0,0 +1,127 @@
// Copyright 2015 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef BENCHMARK_RUNNER_H_
#define BENCHMARK_RUNNER_H_
#include <memory>
#include <thread>
#include <vector>
#include "benchmark_api_internal.h"
#include "perf_counters.h"
#include "thread_manager.h"
namespace benchmark {
namespace internal {
extern MemoryManager* memory_manager;
extern ProfilerManager* profiler_manager;
struct RunResults {
std::vector<BenchmarkReporter::Run> non_aggregates;
std::vector<BenchmarkReporter::Run> aggregates_only;
bool display_report_aggregates_only = false;
bool file_report_aggregates_only = false;
};
struct BENCHMARK_EXPORT BenchTimeType {
enum { UNSPECIFIED, ITERS, TIME } tag;
union {
IterationCount iters;
double time;
};
};
BENCHMARK_EXPORT
BenchTimeType ParseBenchMinTime(const std::string& value);
class BenchmarkRunner {
public:
BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_,
benchmark::internal::PerfCountersMeasurement* pcm_,
BenchmarkReporter::PerFamilyRunReports* reports_for_family);
int GetNumRepeats() const { return repeats; }
bool HasRepeatsRemaining() const {
return GetNumRepeats() != num_repetitions_done;
}
void DoOneRepetition();
RunResults&& GetResults();
BenchmarkReporter::PerFamilyRunReports* GetReportsForFamily() const {
return reports_for_family;
}
double GetMinTime() const { return min_time; }
bool HasExplicitIters() const { return has_explicit_iteration_count; }
IterationCount GetIters() const { return iters; }
private:
RunResults run_results;
const benchmark::internal::BenchmarkInstance& b;
BenchmarkReporter::PerFamilyRunReports* reports_for_family;
BenchTimeType parsed_benchtime_flag;
const double min_time;
const double min_warmup_time;
bool warmup_done;
const int repeats;
const bool has_explicit_iteration_count;
int num_repetitions_done = 0;
std::unique_ptr<ThreadRunnerBase> thread_runner;
IterationCount iters; // preserved between repetitions!
// So only the first repetition has to find/calculate it,
// the other repetitions will just use that precomputed iteration count.
PerfCountersMeasurement* const perf_counters_measurement_ptr = nullptr;
struct IterationResults {
internal::ThreadManager::Result results;
IterationCount iters;
double seconds;
};
IterationResults DoNIterations();
MemoryManager::Result RunMemoryManager(IterationCount memory_iterations);
void RunProfilerManager(IterationCount profile_iterations);
IterationCount PredictNumItersNeeded(const IterationResults& i) const;
bool ShouldReportIterationResults(const IterationResults& i) const;
double GetMinTimeToApply() const;
void FinishWarmUp(const IterationCount& i);
void RunWarmUp();
};
} // namespace internal
} // end namespace benchmark
#endif // BENCHMARK_RUNNER_H_

View File

@@ -0,0 +1,14 @@
#include "check.h"
namespace benchmark {
namespace internal {
namespace {
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
AbortHandlerT* handler = &std::abort;
} // namespace
BENCHMARK_EXPORT AbortHandlerT*& GetAbortHandler() { return handler; }
} // namespace internal
} // namespace benchmark

View File

@@ -0,0 +1,112 @@
#ifndef CHECK_H_
#define CHECK_H_
#include <cmath>
#include <cstdlib>
#include <ostream>
#include <string_view>
#include "benchmark/export.h"
#include "internal_macros.h"
#include "log.h"
#if defined(__GNUC__) || defined(__clang__)
#define BENCHMARK_NOEXCEPT noexcept
#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
#elif defined(_MSC_VER) && !defined(__clang__)
#if _MSC_VER >= 1900
#define BENCHMARK_NOEXCEPT noexcept
#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
#else
#define BENCHMARK_NOEXCEPT
#define BENCHMARK_NOEXCEPT_OP(x)
#endif
#define __func__ __FUNCTION__
#else
#define BENCHMARK_NOEXCEPT
#define BENCHMARK_NOEXCEPT_OP(x)
#endif
namespace benchmark {
namespace internal {
typedef void(AbortHandlerT)();
BENCHMARK_EXPORT
AbortHandlerT*& GetAbortHandler();
BENCHMARK_NORETURN inline void CallAbortHandler() {
GetAbortHandler()();
std::flush(std::cout);
std::flush(std::cerr);
std::abort(); // fallback to enforce noreturn
}
// CheckHandler is the class constructed by failing BM_CHECK macros.
// CheckHandler will log information about the failures and abort when it is
// destructed.
class CheckHandler {
public:
CheckHandler(std::string_view check, std::string_view file,
std::string_view func, int line)
: log_(GetErrorLogInstance()) {
log_ << file << ":" << line << ": " << func << ": Check `" << check
<< "' failed. ";
}
LogType& GetLog() { return log_; }
#if defined(COMPILER_MSVC)
#pragma warning(push)
#pragma warning(disable : 4722)
#endif
BENCHMARK_NORETURN ~CheckHandler() BENCHMARK_NOEXCEPT_OP(false) {
log_ << '\n';
CallAbortHandler();
}
#if defined(COMPILER_MSVC)
#pragma warning(pop)
#endif
CheckHandler& operator=(const CheckHandler&) = delete;
CheckHandler(const CheckHandler&) = delete;
CheckHandler() = delete;
private:
LogType& log_;
};
} // end namespace internal
} // end namespace benchmark
// The BM_CHECK macro returns a std::ostream object that can have extra
// information written to it.
#ifndef NDEBUG
#define BM_CHECK(b) \
(b ? ::benchmark::internal::GetNullLogInstance() \
: ::benchmark::internal::CheckHandler( \
std::string_view(#b), std::string_view(__FILE__), \
std::string_view(__func__), __LINE__) \
.GetLog())
#else
#define BM_CHECK(b) ::benchmark::internal::GetNullLogInstance()
#endif
// clang-format off
// preserve whitespacing between operators for alignment
#define BM_CHECK_EQ(a, b) BM_CHECK((a) == (b))
#define BM_CHECK_NE(a, b) BM_CHECK((a) != (b))
#define BM_CHECK_GE(a, b) BM_CHECK((a) >= (b))
#define BM_CHECK_LE(a, b) BM_CHECK((a) <= (b))
#define BM_CHECK_GT(a, b) BM_CHECK((a) > (b))
#define BM_CHECK_LT(a, b) BM_CHECK((a) < (b))
#define BM_CHECK_FLOAT_EQ(a, b, eps) BM_CHECK(std::fabs((a) - (b)) < (eps))
#define BM_CHECK_FLOAT_NE(a, b, eps) BM_CHECK(std::fabs((a) - (b)) >= (eps))
#define BM_CHECK_FLOAT_GE(a, b, eps) BM_CHECK((a) - (b) > -(eps))
#define BM_CHECK_FLOAT_LE(a, b, eps) BM_CHECK((b) - (a) > -(eps))
#define BM_CHECK_FLOAT_GT(a, b, eps) BM_CHECK((a) - (b) > (eps))
#define BM_CHECK_FLOAT_LT(a, b, eps) BM_CHECK((b) - (a) > (eps))
//clang-format on
#endif // CHECK_H_

View File

@@ -0,0 +1,222 @@
// Copyright 2015 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "colorprint.h"
#include <cstdarg>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <memory>
#include <string>
#include "check.h"
#include "internal_macros.h"
#ifdef BENCHMARK_OS_WINDOWS
#include <io.h>
#include <windows.h>
#else
#include <unistd.h>
#endif // BENCHMARK_OS_WINDOWS
namespace benchmark {
namespace {
#ifdef BENCHMARK_OS_WINDOWS
typedef WORD PlatformColorCode;
#else
typedef const char* PlatformColorCode;
#endif
PlatformColorCode GetPlatformColorCode(LogColor color) {
#ifdef BENCHMARK_OS_WINDOWS
switch (color) {
case COLOR_RED:
return FOREGROUND_RED;
case COLOR_GREEN:
return FOREGROUND_GREEN;
case COLOR_YELLOW:
return FOREGROUND_RED | FOREGROUND_GREEN;
case COLOR_BLUE:
return FOREGROUND_BLUE;
case COLOR_MAGENTA:
return FOREGROUND_BLUE | FOREGROUND_RED;
case COLOR_CYAN:
return FOREGROUND_BLUE | FOREGROUND_GREEN;
case COLOR_WHITE: // fall through to default
default:
return 0;
}
#else
switch (color) {
case COLOR_RED:
return "1";
case COLOR_GREEN:
return "2";
case COLOR_YELLOW:
return "3";
case COLOR_BLUE:
return "4";
case COLOR_MAGENTA:
return "5";
case COLOR_CYAN:
return "6";
case COLOR_WHITE:
return "7";
default:
return nullptr;
};
#endif
}
} // end namespace
std::string FormatString(const char* msg, va_list args) {
// we might need a second shot at this, so pre-emptivly make a copy
va_list args_cp;
va_copy(args_cp, args);
std::size_t size = 256;
char local_buff[256];
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wformat-nonliteral"
#endif // __GNUC__
auto ret = vsnprintf(local_buff, size, msg, args_cp);
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif // __GNUC__
va_end(args_cp);
// currently there is no error handling for failure, so this is hack.
BM_CHECK(ret >= 0);
if (ret == 0) { // handle empty expansion
return {};
}
if (static_cast<size_t>(ret) < size) {
return local_buff;
}
// we did not provide a long enough buffer on our first attempt.
size = static_cast<size_t>(ret) + 1; // + 1 for the null byte
std::unique_ptr<char[]> buff(new char[size]);
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wformat-nonliteral"
#endif // __GNUC__
ret = vsnprintf(buff.get(), size, msg, args);
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif // __GNUC__
BM_CHECK(ret > 0 && (static_cast<size_t>(ret)) < size);
return buff.get();
}
std::string FormatString(const char* msg, ...) {
va_list args;
va_start(args, msg);
auto tmp = FormatString(msg, args);
va_end(args);
return tmp;
}
void ColorPrintf(std::ostream& out, LogColor color, const char* fmt, ...) {
va_list args;
va_start(args, fmt);
ColorPrintf(out, color, fmt, args);
va_end(args);
}
void ColorPrintf(std::ostream& out, LogColor color, const char* fmt,
va_list args) {
#ifdef BENCHMARK_OS_WINDOWS
((void)out); // suppress unused warning
const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE);
// Gets the current text color.
CONSOLE_SCREEN_BUFFER_INFO buffer_info;
GetConsoleScreenBufferInfo(stdout_handle, &buffer_info);
const WORD original_color_attrs = buffer_info.wAttributes;
// We need to flush the stream buffers into the console before each
// SetConsoleTextAttribute call lest it affect the text that is already
// printed but has not yet reached the console.
out.flush();
const WORD original_background_attrs =
original_color_attrs & (BACKGROUND_RED | BACKGROUND_GREEN |
BACKGROUND_BLUE | BACKGROUND_INTENSITY);
SetConsoleTextAttribute(stdout_handle, GetPlatformColorCode(color) |
FOREGROUND_INTENSITY |
original_background_attrs);
out << FormatString(fmt, args);
out.flush();
// Restores the text and background color.
SetConsoleTextAttribute(stdout_handle, original_color_attrs);
#else
const char* color_code = GetPlatformColorCode(color);
if (color_code != nullptr) {
out << FormatString("\033[0;3%sm", color_code);
}
out << FormatString(fmt, args) << "\033[m";
#endif
}
bool IsColorTerminal() {
#if BENCHMARK_OS_WINDOWS
// On Windows the TERM variable is usually not set, but the
// console there does support colors.
return 0 != _isatty(_fileno(stdout));
#else
// On non-Windows platforms, we rely on the TERM variable. This list of
// supported TERM values is copied from Google Test:
// <https://github.com/google/googletest/blob/v1.13.0/googletest/src/gtest.cc#L3225-L3259>.
const char* const SUPPORTED_TERM_VALUES[] = {
"xterm",
"xterm-color",
"xterm-256color",
"screen",
"screen-256color",
"tmux",
"tmux-256color",
"rxvt-unicode",
"rxvt-unicode-256color",
"linux",
"cygwin",
"xterm-kitty",
"alacritty",
"foot",
"foot-extra",
"wezterm",
};
const char* const term = getenv("TERM");
bool term_supports_color = false;
for (const char* candidate : SUPPORTED_TERM_VALUES) {
if ((term != nullptr) && 0 == strcmp(term, candidate)) {
term_supports_color = true;
break;
}
}
return 0 != isatty(fileno(stdout)) && term_supports_color;
#endif // BENCHMARK_OS_WINDOWS
}
} // end namespace benchmark

View File

@@ -0,0 +1,33 @@
#ifndef BENCHMARK_COLORPRINT_H_
#define BENCHMARK_COLORPRINT_H_
#include <cstdarg>
#include <iostream>
#include <string>
namespace benchmark {
enum LogColor {
COLOR_DEFAULT,
COLOR_RED,
COLOR_GREEN,
COLOR_YELLOW,
COLOR_BLUE,
COLOR_MAGENTA,
COLOR_CYAN,
COLOR_WHITE
};
std::string FormatString(const char* msg, va_list args);
std::string FormatString(const char* msg, ...);
void ColorPrintf(std::ostream& out, LogColor color, const char* fmt,
va_list args);
void ColorPrintf(std::ostream& out, LogColor color, const char* fmt, ...);
// Returns true if stdout appears to be a terminal that supports colored
// output, false otherwise.
bool IsColorTerminal();
} // end namespace benchmark
#endif // BENCHMARK_COLORPRINT_H_

View File

@@ -0,0 +1,321 @@
// Copyright 2015 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "commandlineflags.h"
#include <algorithm>
#include <cctype>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <limits>
#include <map>
#include <utility>
#include "../src/string_util.h"
namespace benchmark {
namespace {
// Parses 'str' for a 32-bit signed integer. If successful, writes
// the result to *value and returns true; otherwise leaves *value
// unchanged and returns false.
bool ParseInt32(const std::string& src_text, const char* str, int32_t* value) {
// Parses the environment variable as a decimal integer.
char* end = nullptr;
const long long_value = strtol(str, &end, 10); // NOLINT
// Has strtol() consumed all characters in the string?
if (*end != '\0') {
// No - an invalid character was encountered.
std::cerr << src_text << " is expected to be a 32-bit integer, "
<< "but actually has value \"" << str << "\".\n";
return false;
}
// Is the parsed value in the range of an Int32?
const int32_t result = static_cast<int32_t>(long_value);
if (long_value == std::numeric_limits<long>::max() ||
long_value == std::numeric_limits<long>::min() ||
// The parsed value overflows as a long. (strtol() returns
// LONG_MAX or LONG_MIN when the input overflows.)
result != long_value
// The parsed value overflows as an Int32.
) {
std::cerr << src_text << " is expected to be a 32-bit integer, "
<< "but actually has value \"" << str << "\", "
<< "which overflows.\n";
return false;
}
*value = result;
return true;
}
// Parses 'str' for a double. If successful, writes the result to *value and
// returns true; otherwise leaves *value unchanged and returns false.
bool ParseDouble(const std::string& src_text, const char* str, double* value) {
// Parses the environment variable as a decimal integer.
char* end = nullptr;
const double double_value = strtod(str, &end); // NOLINT
// Has strtol() consumed all characters in the string?
if (*end != '\0') {
// No - an invalid character was encountered.
std::cerr << src_text << " is expected to be a double, "
<< "but actually has value \"" << str << "\".\n";
return false;
}
*value = double_value;
return true;
}
// Parses 'str' into KV pairs. If successful, writes the result to *value and
// returns true; otherwise leaves *value unchanged and returns false.
bool ParseKvPairs(const std::string& src_text, const char* str,
std::map<std::string, std::string>* value) {
std::map<std::string, std::string> kvs;
for (const auto& kvpair : StrSplit(str, ',')) {
const auto kv = StrSplit(kvpair, '=');
if (kv.size() != 2) {
std::cerr << src_text << " is expected to be a comma-separated list of "
<< "<key>=<value> strings, but actually has value \"" << str
<< "\".\n";
return false;
}
if (!kvs.emplace(kv[0], kv[1]).second) {
std::cerr << src_text << " is expected to contain unique keys but key \""
<< kv[0] << "\" was repeated.\n";
return false;
}
}
*value = kvs;
return true;
}
// Returns the name of the environment variable corresponding to the
// given flag. For example, FlagToEnvVar("foo") will return
// "BENCHMARK_FOO" in the open-source version.
std::string FlagToEnvVar(const char* flag) {
const std::string flag_str(flag);
std::string env_var;
for (size_t i = 0; i != flag_str.length(); ++i) {
env_var += static_cast<char>(::toupper(flag_str.c_str()[i]));
}
return env_var;
}
} // namespace
BENCHMARK_EXPORT
bool BoolFromEnv(const char* flag, bool default_val) {
const std::string env_var = FlagToEnvVar(flag);
const char* const value_str = getenv(env_var.c_str());
return value_str == nullptr ? default_val : IsTruthyFlagValue(value_str);
}
BENCHMARK_EXPORT
int32_t Int32FromEnv(const char* flag, int32_t default_val) {
const std::string env_var = FlagToEnvVar(flag);
const char* const value_str = getenv(env_var.c_str());
int32_t value = default_val;
if (value_str == nullptr ||
!ParseInt32(std::string("Environment variable ") + env_var, value_str,
&value)) {
return default_val;
}
return value;
}
BENCHMARK_EXPORT
double DoubleFromEnv(const char* flag, double default_val) {
const std::string env_var = FlagToEnvVar(flag);
const char* const value_str = getenv(env_var.c_str());
double value = default_val;
if (value_str == nullptr ||
!ParseDouble(std::string("Environment variable ") + env_var, value_str,
&value)) {
return default_val;
}
return value;
}
BENCHMARK_EXPORT
const char* StringFromEnv(const char* flag, const char* default_val) {
const std::string env_var = FlagToEnvVar(flag);
const char* const value = getenv(env_var.c_str());
return value == nullptr ? default_val : value;
}
BENCHMARK_EXPORT
std::map<std::string, std::string> KvPairsFromEnv(
const char* flag, std::map<std::string, std::string> default_val) {
const std::string env_var = FlagToEnvVar(flag);
const char* const value_str = getenv(env_var.c_str());
if (value_str == nullptr) {
return default_val;
}
std::map<std::string, std::string> value;
if (!ParseKvPairs("Environment variable " + env_var, value_str, &value)) {
return default_val;
}
return value;
}
// Parses a string as a command line flag. The string should have
// the format "--flag=value". When def_optional is true, the "=value"
// part can be omitted.
//
// Returns the value of the flag, or nullptr if the parsing failed.
const char* ParseFlagValue(const char* str, const char* flag,
bool def_optional) {
// str and flag must not be nullptr.
if (str == nullptr || flag == nullptr) {
return nullptr;
}
// The flag must start with "--".
const std::string flag_str = std::string("--") + std::string(flag);
const size_t flag_len = flag_str.length();
if (strncmp(str, flag_str.c_str(), flag_len) != 0) {
return nullptr;
}
// Skips the flag name.
const char* flag_end = str + flag_len;
// When def_optional is true, it's OK to not have a "=value" part.
if (def_optional && (flag_end[0] == '\0')) {
return flag_end;
}
// If def_optional is true and there are more characters after the
// flag name, or if def_optional is false, there must be a '=' after
// the flag name.
if (flag_end[0] != '=') {
return nullptr;
}
// Returns the string after "=".
return flag_end + 1;
}
BENCHMARK_EXPORT
bool ParseBoolFlag(const char* str, const char* flag, bool* value) {
// Gets the value of the flag as a string.
const char* const value_str = ParseFlagValue(str, flag, true);
// Aborts if the parsing failed.
if (value_str == nullptr) {
return false;
}
// Converts the string value to a bool.
*value = IsTruthyFlagValue(value_str);
return true;
}
BENCHMARK_EXPORT
bool ParseInt32Flag(const char* str, const char* flag, int32_t* value) {
// Gets the value of the flag as a string.
const char* const value_str = ParseFlagValue(str, flag, false);
// Aborts if the parsing failed.
if (value_str == nullptr) {
return false;
}
// Sets *value to the value of the flag.
return ParseInt32(std::string("The value of flag --") + flag, value_str,
value);
}
BENCHMARK_EXPORT
bool ParseDoubleFlag(const char* str, const char* flag, double* value) {
// Gets the value of the flag as a string.
const char* const value_str = ParseFlagValue(str, flag, false);
// Aborts if the parsing failed.
if (value_str == nullptr) {
return false;
}
// Sets *value to the value of the flag.
return ParseDouble(std::string("The value of flag --") + flag, value_str,
value);
}
BENCHMARK_EXPORT
bool ParseStringFlag(const char* str, const char* flag, std::string* value) {
// Gets the value of the flag as a string.
const char* const value_str = ParseFlagValue(str, flag, false);
// Aborts if the parsing failed.
if (value_str == nullptr) {
return false;
}
*value = value_str;
return true;
}
BENCHMARK_EXPORT
bool ParseKeyValueFlag(const char* str, const char* flag,
std::map<std::string, std::string>* value) {
const char* const value_str = ParseFlagValue(str, flag, false);
if (value_str == nullptr) {
return false;
}
for (const auto& kvpair : StrSplit(value_str, ',')) {
const auto kv = StrSplit(kvpair, '=');
if (kv.size() != 2) {
return false;
}
value->emplace(kv[0], kv[1]);
}
return true;
}
BENCHMARK_EXPORT
bool IsFlag(const char* str, const char* flag) {
return (ParseFlagValue(str, flag, true) != nullptr);
}
BENCHMARK_EXPORT
bool IsTruthyFlagValue(const std::string& value) {
if (value.size() == 1) {
char v = value[0];
return isalnum(v) &&
!(v == '0' || v == 'f' || v == 'F' || v == 'n' || v == 'N');
}
if (!value.empty()) {
std::string value_lower(value);
std::transform(value_lower.begin(), value_lower.end(), value_lower.begin(),
[](char c) { return static_cast<char>(::tolower(c)); });
return !(value_lower == "false" || value_lower == "no" ||
value_lower == "off");
}
return true;
}
} // end namespace benchmark

View File

@@ -0,0 +1,137 @@
#ifndef BENCHMARK_COMMANDLINEFLAGS_H_
#define BENCHMARK_COMMANDLINEFLAGS_H_
#include <cstdint>
#include <map>
#include <string>
#include "benchmark/export.h"
// Macro for referencing flags.
#define FLAG(name) FLAGS_##name
// Macros for declaring flags.
// NOLINTBEGIN(cppcoreguidelines-avoid-non-const-global-variables)
#define BM_DECLARE_bool(name) BENCHMARK_EXPORT extern bool FLAG(name)
#define BM_DECLARE_int32(name) BENCHMARK_EXPORT extern int32_t FLAG(name)
#define BM_DECLARE_double(name) BENCHMARK_EXPORT extern double FLAG(name)
#define BM_DECLARE_string(name) BENCHMARK_EXPORT extern std::string FLAG(name)
#define BM_DECLARE_kvpairs(name) \
BENCHMARK_EXPORT extern std::map<std::string, std::string> FLAG(name)
// NOLINTEND(cppcoreguidelines-avoid-non-const-global-variables)
// Macros for defining flags.
// NOLINTBEGIN(cppcoreguidelines-avoid-non-const-global-variables)
#define BM_DEFINE_bool(name, default_val) \
BENCHMARK_EXPORT bool FLAG(name) = benchmark::BoolFromEnv(#name, default_val)
#define BM_DEFINE_int32(name, default_val) \
BENCHMARK_EXPORT int32_t FLAG(name) = \
benchmark::Int32FromEnv(#name, default_val)
#define BM_DEFINE_double(name, default_val) \
BENCHMARK_EXPORT double FLAG(name) = \
benchmark::DoubleFromEnv(#name, default_val)
#define BM_DEFINE_string(name, default_val) \
BENCHMARK_EXPORT std::string FLAG(name) = \
benchmark::StringFromEnv(#name, default_val)
#define BM_DEFINE_kvpairs(name, default_val) \
BENCHMARK_EXPORT std::map<std::string, std::string> FLAG(name) = \
benchmark::KvPairsFromEnv(#name, default_val)
// NOLINTEND(cppcoreguidelines-avoid-non-const-global-variables)
namespace benchmark {
// Parses a bool from the environment variable corresponding to the given flag.
//
// If the variable exists, returns IsTruthyFlagValue() value; if not,
// returns the given default value.
BENCHMARK_EXPORT
bool BoolFromEnv(const char* flag, bool default_val);
// Parses an Int32 from the environment variable corresponding to the given
// flag.
//
// If the variable exists, returns ParseInt32() value; if not, returns
// the given default value.
BENCHMARK_EXPORT
int32_t Int32FromEnv(const char* flag, int32_t default_val);
// Parses an Double from the environment variable corresponding to the given
// flag.
//
// If the variable exists, returns ParseDouble(); if not, returns
// the given default value.
BENCHMARK_EXPORT
double DoubleFromEnv(const char* flag, double default_val);
// Parses a string from the environment variable corresponding to the given
// flag.
//
// If variable exists, returns its value; if not, returns
// the given default value.
BENCHMARK_EXPORT
const char* StringFromEnv(const char* flag, const char* default_val);
// Parses a set of kvpairs from the environment variable corresponding to the
// given flag.
//
// If variable exists, returns its value; if not, returns
// the given default value.
BENCHMARK_EXPORT
std::map<std::string, std::string> KvPairsFromEnv(
const char* flag, std::map<std::string, std::string> default_val);
// Parses a string for a bool flag, in the form of either
// "--flag=value" or "--flag".
//
// In the former case, the value is taken as true if it passes IsTruthyValue().
//
// In the latter case, the value is taken as true.
//
// On success, stores the value of the flag in *value, and returns
// true. On failure, returns false without changing *value.
BENCHMARK_EXPORT
bool ParseBoolFlag(const char* str, const char* flag, bool* value);
// Parses a string for an Int32 flag, in the form of "--flag=value".
//
// On success, stores the value of the flag in *value, and returns
// true. On failure, returns false without changing *value.
BENCHMARK_EXPORT
bool ParseInt32Flag(const char* str, const char* flag, int32_t* value);
// Parses a string for a Double flag, in the form of "--flag=value".
//
// On success, stores the value of the flag in *value, and returns
// true. On failure, returns false without changing *value.
BENCHMARK_EXPORT
bool ParseDoubleFlag(const char* str, const char* flag, double* value);
// Parses a string for a string flag, in the form of "--flag=value".
//
// On success, stores the value of the flag in *value, and returns
// true. On failure, returns false without changing *value.
BENCHMARK_EXPORT
bool ParseStringFlag(const char* str, const char* flag, std::string* value);
// Parses a string for a kvpairs flag in the form "--flag=key=value,key=value"
//
// On success, stores the value of the flag in *value and returns true. On
// failure returns false, though *value may have been mutated.
BENCHMARK_EXPORT
bool ParseKeyValueFlag(const char* str, const char* flag,
std::map<std::string, std::string>* value);
// Returns true if the string matches the flag.
BENCHMARK_EXPORT
bool IsFlag(const char* str, const char* flag);
// Returns true unless value starts with one of: '0', 'f', 'F', 'n' or 'N', or
// some non-alphanumeric character. Also returns false if the value matches
// one of 'no', 'false', 'off' (case-insensitive). As a special case, also
// returns true if value is the empty string.
BENCHMARK_EXPORT
bool IsTruthyFlagValue(const std::string& value);
} // end namespace benchmark
#endif // BENCHMARK_COMMANDLINEFLAGS_H_

View File

@@ -0,0 +1,257 @@
// Copyright 2016 Ismael Jimenez Martinez. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Source project : https://github.com/ismaelJimenez/cpp.leastsq
// Adapted to be used with google benchmark
#include "complexity.h"
#include <algorithm>
#include <cmath>
#include "benchmark/benchmark.h"
#include "check.h"
namespace benchmark {
// Internal function to calculate the different scalability forms
BigOFunc* FittingCurve(BigO complexity) {
switch (complexity) {
case oN:
return [](IterationCount n) -> double { return static_cast<double>(n); };
case oNSquared:
return [](IterationCount n) -> double { return std::pow(n, 2); };
case oNCubed:
return [](IterationCount n) -> double { return std::pow(n, 3); };
case oLogN:
return [](IterationCount n) -> double {
return std::log2(static_cast<double>(n));
};
case oNLogN:
return [](IterationCount n) -> double {
return static_cast<double>(n) * std::log2(static_cast<double>(n));
};
case o1:
default:
return [](IterationCount) { return 1.0; };
}
}
// Function to return an string for the calculated complexity
std::string GetBigOString(BigO complexity) {
switch (complexity) {
case oN:
return "N";
case oNSquared:
return "N^2";
case oNCubed:
return "N^3";
case oLogN:
return "lgN";
case oNLogN:
return "NlgN";
case o1:
return "(1)";
default:
return "f(N)";
}
}
// Find the coefficient for the high-order term in the running time, by
// minimizing the sum of squares of relative error, for the fitting curve
// given by the lambda expression.
// - n : Vector containing the size of the benchmark tests.
// - time : Vector containing the times for the benchmark tests.
// - fitting_curve : lambda expression (e.g. [](ComplexityN n) {return n; };).
// For a deeper explanation on the algorithm logic, please refer to
// https://en.wikipedia.org/wiki/Least_squares#Least_squares,_regression_analysis_and_statistics
LeastSq MinimalLeastSq(const std::vector<ComplexityN>& n,
const std::vector<double>& time,
BigOFunc* fitting_curve) {
double sigma_gn_squared = 0.0;
double sigma_time = 0.0;
double sigma_time_gn = 0.0;
// Calculate least square fitting parameter
for (size_t i = 0; i < n.size(); ++i) {
double gn_i = fitting_curve(n[i]);
sigma_gn_squared += gn_i * gn_i;
sigma_time += time[i];
sigma_time_gn += time[i] * gn_i;
}
LeastSq result;
result.complexity = oLambda;
// Calculate complexity.
result.coef = sigma_time_gn / sigma_gn_squared;
// Calculate RMS
double rms = 0.0;
for (size_t i = 0; i < n.size(); ++i) {
double fit = result.coef * fitting_curve(n[i]);
rms += std::pow((time[i] - fit), 2);
}
// Normalized RMS by the mean of the observed values
double mean = sigma_time / static_cast<double>(n.size());
result.rms = std::sqrt(rms / static_cast<double>(n.size())) / mean;
return result;
}
// Find the coefficient for the high-order term in the running time, by
// minimizing the sum of squares of relative error.
// - n : Vector containing the size of the benchmark tests.
// - time : Vector containing the times for the benchmark tests.
// - complexity : If different than oAuto, the fitting curve will stick to
// this one. If it is oAuto, it will be calculated the best
// fitting curve.
LeastSq MinimalLeastSq(const std::vector<ComplexityN>& n,
const std::vector<double>& time, const BigO complexity) {
BM_CHECK_EQ(n.size(), time.size());
BM_CHECK_GE(n.size(), 2); // Do not compute fitting curve is less than two
// benchmark runs are given
BM_CHECK_NE(complexity, oNone);
LeastSq best_fit;
if (complexity == oAuto) {
std::vector<BigO> fit_curves = {oLogN, oN, oNLogN, oNSquared, oNCubed};
// Take o1 as default best fitting curve
best_fit = MinimalLeastSq(n, time, FittingCurve(o1));
best_fit.complexity = o1;
// Compute all possible fitting curves and stick to the best one
for (const auto& fit : fit_curves) {
LeastSq current_fit = MinimalLeastSq(n, time, FittingCurve(fit));
if (current_fit.rms < best_fit.rms) {
best_fit = current_fit;
best_fit.complexity = fit;
}
}
} else {
best_fit = MinimalLeastSq(n, time, FittingCurve(complexity));
best_fit.complexity = complexity;
}
return best_fit;
}
std::vector<BenchmarkReporter::Run> ComputeBigO(
const std::vector<BenchmarkReporter::Run>& reports) {
typedef BenchmarkReporter::Run Run;
std::vector<Run> results;
if (reports.size() < 2) {
return results;
}
// Accumulators.
std::vector<ComplexityN> n;
std::vector<double> real_time;
std::vector<double> cpu_time;
// Populate the accumulators.
for (const Run& run : reports) {
BM_CHECK_GT(run.complexity_n, 0)
<< "Did you forget to call SetComplexityN?";
n.push_back(run.complexity_n);
real_time.push_back(run.real_accumulated_time /
static_cast<double>(run.iterations));
cpu_time.push_back(run.cpu_accumulated_time /
static_cast<double>(run.iterations));
}
LeastSq result_cpu;
LeastSq result_real;
if (reports[0].complexity == oLambda) {
result_cpu = MinimalLeastSq(n, cpu_time, reports[0].complexity_lambda);
result_real = MinimalLeastSq(n, real_time, reports[0].complexity_lambda);
} else {
const BigO* InitialBigO = &reports[0].complexity;
const bool use_real_time_for_initial_big_o =
reports[0].use_real_time_for_initial_big_o;
if (use_real_time_for_initial_big_o) {
result_real = MinimalLeastSq(n, real_time, *InitialBigO);
InitialBigO = &result_real.complexity;
// The Big-O complexity for CPU time must have the same Big-O function!
}
result_cpu = MinimalLeastSq(n, cpu_time, *InitialBigO);
InitialBigO = &result_cpu.complexity;
if (!use_real_time_for_initial_big_o) {
result_real = MinimalLeastSq(n, real_time, *InitialBigO);
}
}
// Drop the 'args' when reporting complexity.
auto run_name = reports[0].run_name;
run_name.args.clear();
// Get the data from the accumulator to BenchmarkReporter::Run's.
Run big_o;
big_o.run_name = run_name;
big_o.family_index = reports[0].family_index;
big_o.per_family_instance_index = reports[0].per_family_instance_index;
big_o.run_type = BenchmarkReporter::Run::RT_Aggregate;
big_o.repetitions = reports[0].repetitions;
big_o.repetition_index = Run::no_repetition_index;
big_o.threads = reports[0].threads;
big_o.aggregate_name = "BigO";
big_o.aggregate_unit = StatisticUnit::kTime;
big_o.report_label = reports[0].report_label;
big_o.iterations = 0;
big_o.real_accumulated_time = result_real.coef;
big_o.cpu_accumulated_time = result_cpu.coef;
big_o.report_big_o = true;
big_o.complexity = result_cpu.complexity;
// All the time results are reported after being multiplied by the
// time unit multiplier. But since RMS is a relative quantity it
// should not be multiplied at all. So, here, we _divide_ it by the
// multiplier so that when it is multiplied later the result is the
// correct one.
double multiplier = GetTimeUnitMultiplier(reports[0].time_unit);
// Only add label to mean/stddev if it is same for all runs
Run rms;
rms.run_name = run_name;
rms.family_index = reports[0].family_index;
rms.per_family_instance_index = reports[0].per_family_instance_index;
rms.run_type = BenchmarkReporter::Run::RT_Aggregate;
rms.aggregate_name = "RMS";
rms.aggregate_unit = StatisticUnit::kPercentage;
rms.report_label = big_o.report_label;
rms.iterations = 0;
rms.repetition_index = Run::no_repetition_index;
rms.repetitions = reports[0].repetitions;
rms.threads = reports[0].threads;
rms.real_accumulated_time = result_real.rms / multiplier;
rms.cpu_accumulated_time = result_cpu.rms / multiplier;
rms.report_rms = true;
rms.complexity = result_cpu.complexity;
// don't forget to keep the time unit, or we won't be able to
// recover the correct value.
rms.time_unit = reports[0].time_unit;
results.push_back(big_o);
results.push_back(rms);
return results;
}
} // end namespace benchmark

View File

@@ -0,0 +1,55 @@
// Copyright 2016 Ismael Jimenez Martinez. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Source project : https://github.com/ismaelJimenez/cpp.leastsq
// Adapted to be used with google benchmark
#ifndef COMPLEXITY_H_
#define COMPLEXITY_H_
#include <string>
#include <vector>
#include "benchmark/benchmark.h"
namespace benchmark {
// Return a vector containing the bigO and RMS information for the specified
// list of reports. If 'reports.size() < 2' an empty vector is returned.
std::vector<BenchmarkReporter::Run> ComputeBigO(
const std::vector<BenchmarkReporter::Run>& reports);
// This data structure will contain the result returned by MinimalLeastSq
// - coef : Estimated coefficient for the high-order term as
// interpolated from data.
// - rms : Normalized Root Mean Squared Error.
// - complexity : Scalability form (e.g. oN, oNLogN). In case a scalability
// form has been provided to MinimalLeastSq this will return
// the same value. In case BigO::oAuto has been selected, this
// parameter will return the best fitting curve detected.
struct LeastSq {
LeastSq() : coef(0.0), rms(0.0), complexity(oNone) {}
double coef;
double rms;
BigO complexity;
};
// Function to return an string for the calculated complexity
std::string GetBigOString(BigO complexity);
} // end namespace benchmark
#endif // COMPLEXITY_H_

View File

@@ -0,0 +1,212 @@
// Copyright 2015 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <iostream>
#include <string>
#include <tuple>
#include <vector>
#include "benchmark/benchmark.h"
#include "check.h"
#include "colorprint.h"
#include "commandlineflags.h"
#include "complexity.h"
#include "counter.h"
#include "internal_macros.h"
#include "string_util.h"
#include "timers.h"
namespace benchmark {
BENCHMARK_EXPORT
bool ConsoleReporter::ReportContext(const Context& context) {
name_field_width_ = context.name_field_width;
printed_header_ = false;
prev_counters_.clear();
PrintBasicContext(&GetErrorStream(), context);
#ifdef BENCHMARK_OS_WINDOWS
if ((output_options_ & OO_Color)) {
auto stdOutBuf = std::cout.rdbuf();
auto outStreamBuf = GetOutputStream().rdbuf();
if (stdOutBuf != outStreamBuf) {
GetErrorStream()
<< "Color printing is only supported for stdout on windows."
" Disabling color printing\n";
output_options_ = static_cast<OutputOptions>(output_options_ & ~OO_Color);
}
}
#endif
return true;
}
BENCHMARK_EXPORT
void ConsoleReporter::PrintHeader(const Run& run) {
std::string str =
FormatString("%-*s %13s %15s %12s", static_cast<int>(name_field_width_),
"Benchmark", "Time", "CPU", "Iterations");
if (!run.counters.empty()) {
if ((output_options_ & OO_Tabular) != 0) {
for (auto const& c : run.counters) {
str += FormatString(" %10s", c.first.c_str());
}
} else {
str += " UserCounters...";
}
}
std::string line = std::string(str.length(), '-');
GetOutputStream() << line << "\n" << str << "\n" << line << "\n";
}
BENCHMARK_EXPORT
void ConsoleReporter::ReportRuns(const std::vector<Run>& reports) {
for (const auto& run : reports) {
// print the header:
// --- if none was printed yet
bool print_header = !printed_header_;
// --- or if the format is tabular and this run
// has different fields from the prev header
print_header |= ((output_options_ & OO_Tabular) != 0) &&
(!internal::SameNames(run.counters, prev_counters_));
if (print_header) {
printed_header_ = true;
prev_counters_ = run.counters;
PrintHeader(run);
}
// As an alternative to printing the headers like this, we could sort
// the benchmarks by header and then print. But this would require
// waiting for the full results before printing, or printing twice.
PrintRunData(run);
}
}
static void IgnoreColorPrint(std::ostream& out, LogColor /*unused*/,
const char* fmt, ...) {
va_list args;
va_start(args, fmt);
out << FormatString(fmt, args);
va_end(args);
}
static std::string FormatTime(double time) {
// For the time columns of the console printer 13 digits are reserved. One of
// them is a space and max two of them are the time unit (e.g ns). That puts
// us at 10 digits usable for the number.
// Align decimal places...
if (time < 1.0) {
return FormatString("%10.3f", time);
}
if (time < 10.0) {
return FormatString("%10.2f", time);
}
if (time < 100.0) {
return FormatString("%10.1f", time);
}
// Assuming the time is at max 9.9999e+99 and we have 10 digits for the
// number, we get 10-1(.)-1(e)-1(sign)-2(exponent) = 5 digits to print.
if (time > 9999999999 /*max 10 digit number*/) {
return FormatString("%1.4e", time);
}
return FormatString("%10.0f", time);
}
BENCHMARK_EXPORT
void ConsoleReporter::PrintRunData(const Run& result) {
typedef void(PrinterFn)(std::ostream&, LogColor, const char*, ...);
auto& Out = GetOutputStream();
PrinterFn* printer = (output_options_ & OO_Color) != 0
? static_cast<PrinterFn*>(ColorPrintf)
: IgnoreColorPrint;
auto name_color =
(result.report_big_o || result.report_rms) ? COLOR_BLUE : COLOR_GREEN;
printer(Out, name_color, "%-*s ", name_field_width_,
result.benchmark_name().c_str());
if (internal::SkippedWithError == result.skipped) {
printer(Out, COLOR_RED, "ERROR OCCURRED: \'%s\'",
result.skip_message.c_str());
printer(Out, COLOR_DEFAULT, "\n");
return;
}
if (internal::SkippedWithMessage == result.skipped) {
printer(Out, COLOR_WHITE, "SKIPPED: \'%s\'", result.skip_message.c_str());
printer(Out, COLOR_DEFAULT, "\n");
return;
}
const double real_time = result.GetAdjustedRealTime();
const double cpu_time = result.GetAdjustedCPUTime();
const std::string real_time_str = FormatTime(real_time);
const std::string cpu_time_str = FormatTime(cpu_time);
if (result.report_big_o) {
std::string big_o = GetBigOString(result.complexity);
printer(Out, COLOR_YELLOW, "%10.2f %-4s %10.2f %-4s ", real_time,
big_o.c_str(), cpu_time, big_o.c_str());
} else if (result.report_rms) {
printer(Out, COLOR_YELLOW, "%10.0f %-4s %10.0f %-4s ", real_time * 100, "%",
cpu_time * 100, "%");
} else if (result.run_type != Run::RT_Aggregate ||
result.aggregate_unit == StatisticUnit::kTime) {
const char* timeLabel = GetTimeUnitString(result.time_unit);
printer(Out, COLOR_YELLOW, "%s %-4s %s %-4s ", real_time_str.c_str(),
timeLabel, cpu_time_str.c_str(), timeLabel);
} else {
assert(result.aggregate_unit == StatisticUnit::kPercentage);
printer(Out, COLOR_YELLOW, "%10.2f %-4s %10.2f %-4s ",
(100. * result.real_accumulated_time), "%",
(100. * result.cpu_accumulated_time), "%");
}
if (!result.report_big_o && !result.report_rms) {
printer(Out, COLOR_CYAN, "%10lld", result.iterations);
}
for (const auto& c : result.counters) {
const std::size_t cNameLen =
std::max(static_cast<std::size_t>(10), c.first.length());
std::string s;
const char* unit = "";
if (result.run_type == Run::RT_Aggregate &&
result.aggregate_unit == StatisticUnit::kPercentage) {
s = StrFormat("%.2f", 100. * c.second.value);
unit = "%";
} else {
s = HumanReadableNumber(c.second.value, c.second.oneK);
if ((c.second.flags & Counter::kIsRate) != 0) {
unit = (c.second.flags & Counter::kInvert) != 0 ? "s" : "/s";
}
}
if ((output_options_ & OO_Tabular) != 0) {
printer(Out, COLOR_DEFAULT, " %*s%s", cNameLen - strlen(unit), s.c_str(),
unit);
} else {
printer(Out, COLOR_DEFAULT, " %s=%s%s", c.first.c_str(), s.c_str(), unit);
}
}
if (!result.report_label.empty()) {
printer(Out, COLOR_DEFAULT, " %s", result.report_label.c_str());
}
printer(Out, COLOR_DEFAULT, "\n");
}
} // end namespace benchmark

View File

@@ -0,0 +1,82 @@
// Copyright 2015 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "counter.h"
namespace benchmark {
namespace internal {
double Finish(Counter const& c, IterationCount iterations, double cpu_time,
double num_threads) {
double v = c.value;
if ((c.flags & Counter::kIsRate) != 0) {
v /= cpu_time;
}
if ((c.flags & Counter::kAvgThreads) != 0) {
v /= num_threads;
}
if ((c.flags & Counter::kIsIterationInvariant) != 0) {
v *= static_cast<double>(iterations);
}
if ((c.flags & Counter::kAvgIterations) != 0) {
v /= static_cast<double>(iterations);
}
if ((c.flags & Counter::kInvert) != 0) { // Invert is *always* last.
v = 1.0 / v;
}
return v;
}
void Finish(UserCounters* l, IterationCount iterations, double cpu_time,
double num_threads) {
for (auto& c : *l) {
c.second.value = Finish(c.second, iterations, cpu_time, num_threads);
}
}
void Increment(UserCounters* l, UserCounters const& r) {
// add counters present in both or just in *l
for (auto& c : *l) {
auto it = r.find(c.first);
if (it != r.end()) {
c.second.value = c.second + it->second;
}
}
// add counters present in r, but not in *l
for (auto const& tc : r) {
auto it = l->find(tc.first);
if (it == l->end()) {
(*l)[tc.first] = tc.second;
}
}
}
bool SameNames(UserCounters const& l, UserCounters const& r) {
if (&l == &r) {
return true;
}
if (l.size() != r.size()) {
return false;
}
for (auto const& c : l) {
if (r.find(c.first) == r.end()) {
return false;
}
}
return true;
}
} // end namespace internal
} // end namespace benchmark

View File

@@ -0,0 +1,32 @@
// Copyright 2015 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef BENCHMARK_COUNTER_H_
#define BENCHMARK_COUNTER_H_
#include "benchmark/benchmark.h"
namespace benchmark {
// these counter-related functions are hidden to reduce API surface.
namespace internal {
void Finish(UserCounters* l, IterationCount iterations, double time,
double num_threads);
void Increment(UserCounters* l, UserCounters const& r);
bool SameNames(UserCounters const& l, UserCounters const& r);
} // end namespace internal
} // end namespace benchmark
#endif // BENCHMARK_COUNTER_H_

View File

@@ -0,0 +1,175 @@
// Copyright 2015 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include <cstdint>
#include <iostream>
#include <string>
#include <tuple>
#include <vector>
#include "benchmark/benchmark.h"
#include "check.h"
#include "complexity.h"
#include "string_util.h"
#include "timers.h"
// File format reference: http://edoceo.com/utilitas/csv-file-format.
namespace benchmark {
namespace {
std::vector<std::string> elements = {
"name", "iterations", "real_time", "cpu_time",
"time_unit", "bytes_per_second", "items_per_second", "label",
"error_occurred", "error_message"};
} // namespace
std::string CsvEscape(const std::string& s) {
std::string tmp;
tmp.reserve(s.size() + 2);
for (char c : s) {
switch (c) {
case '"':
tmp += "\"\"";
break;
default:
tmp += c;
break;
}
}
return '"' + tmp + '"';
}
BENCHMARK_EXPORT
bool CSVReporter::ReportContext(const Context& context) {
PrintBasicContext(&GetErrorStream(), context);
return true;
}
BENCHMARK_EXPORT
void CSVReporter::ReportRuns(const std::vector<Run>& reports) {
std::ostream& Out = GetOutputStream();
if (!printed_header_) {
// save the names of all the user counters
for (const auto& run : reports) {
for (const auto& cnt : run.counters) {
if (cnt.first == "bytes_per_second" ||
cnt.first == "items_per_second") {
continue;
}
user_counter_names_.insert(cnt.first);
}
}
// print the header
for (auto B = elements.begin(); B != elements.end();) {
Out << *B++;
if (B != elements.end()) {
Out << ",";
}
}
for (auto B = user_counter_names_.begin();
B != user_counter_names_.end();) {
Out << ",\"" << *B++ << "\"";
}
Out << "\n";
printed_header_ = true;
} else {
// check that all the current counters are saved in the name set
for (const auto& run : reports) {
for (const auto& cnt : run.counters) {
if (cnt.first == "bytes_per_second" ||
cnt.first == "items_per_second") {
continue;
}
BM_CHECK(user_counter_names_.find(cnt.first) !=
user_counter_names_.end())
<< "All counters must be present in each run. "
<< "Counter named \"" << cnt.first
<< "\" was not in a run after being added to the header";
}
}
}
// print results for each run
for (const auto& run : reports) {
PrintRunData(run);
}
}
BENCHMARK_EXPORT
void CSVReporter::PrintRunData(const Run& run) {
std::ostream& Out = GetOutputStream();
Out << CsvEscape(run.benchmark_name()) << ",";
if (run.skipped != 0u) {
Out << std::string(elements.size() - 3, ',');
Out << std::boolalpha << (internal::SkippedWithError == run.skipped) << ",";
Out << CsvEscape(run.skip_message) << "\n";
return;
}
// Do not print iteration on bigO and RMS report
if (!run.report_big_o && !run.report_rms) {
Out << run.iterations;
}
Out << ",";
if (run.run_type != Run::RT_Aggregate ||
run.aggregate_unit == StatisticUnit::kTime) {
Out << run.GetAdjustedRealTime() << ",";
Out << run.GetAdjustedCPUTime() << ",";
} else {
assert(run.aggregate_unit == StatisticUnit::kPercentage);
Out << run.real_accumulated_time << ",";
Out << run.cpu_accumulated_time << ",";
}
// Do not print timeLabel on bigO and RMS report
if (run.report_big_o) {
Out << GetBigOString(run.complexity);
} else if (!run.report_rms &&
run.aggregate_unit != StatisticUnit::kPercentage) {
Out << GetTimeUnitString(run.time_unit);
}
Out << ",";
if (run.counters.find("bytes_per_second") != run.counters.end()) {
Out << run.counters.at("bytes_per_second");
}
Out << ",";
if (run.counters.find("items_per_second") != run.counters.end()) {
Out << run.counters.at("items_per_second");
}
Out << ",";
if (!run.report_label.empty()) {
Out << CsvEscape(run.report_label);
}
Out << ",,"; // for error_occurred and error_message
// Print user counters
for (const auto& ucn : user_counter_names_) {
auto it = run.counters.find(ucn);
if (it == run.counters.end()) {
Out << ",";
} else {
Out << "," << it->second;
}
}
Out << '\n';
}
} // end namespace benchmark

View File

@@ -0,0 +1,255 @@
// ----------------------------------------------------------------------
// CycleClock
// A CycleClock tells you the current time in Cycles. The "time"
// is actually time since power-on. This is like time() but doesn't
// involve a system call and is much more precise.
//
// NOTE: Not all cpu/platform/kernel combinations guarantee that this
// clock increments at a constant rate or is synchronized across all logical
// cpus in a system.
//
// If you need the above guarantees, please consider using a different
// API. There are efforts to provide an interface which provides a millisecond
// granularity and implemented as a memory read. A memory read is generally
// cheaper than the CycleClock for many architectures.
//
// Also, in some out of order CPU implementations, the CycleClock is not
// serializing. So if you're trying to count at cycles granularity, your
// data might be inaccurate due to out of order instruction execution.
// ----------------------------------------------------------------------
#ifndef BENCHMARK_CYCLECLOCK_H_
#define BENCHMARK_CYCLECLOCK_H_
#include <cstdint>
#include "benchmark/benchmark.h"
#include "internal_macros.h"
#if defined(BENCHMARK_OS_MACOSX)
#include <mach/mach_time.h>
#endif
// For MSVC, we want to use '_asm rdtsc' when possible (since it works
// with even ancient MSVC compilers), and when not possible the
// __rdtsc intrinsic, declared in <intrin.h>. Unfortunately, in some
// environments, <windows.h> and <intrin.h> have conflicting
// declarations of some other intrinsics, breaking compilation.
// Therefore, we simply declare __rdtsc ourselves. See also
// http://connect.microsoft.com/VisualStudio/feedback/details/262047
#if defined(COMPILER_MSVC) && !defined(_M_IX86) && !defined(_M_ARM64) && \
!defined(_M_ARM64EC)
extern "C" uint64_t __rdtsc();
#pragma intrinsic(__rdtsc)
#endif
#if !defined(BENCHMARK_OS_WINDOWS) || defined(BENCHMARK_OS_MINGW)
#include <sys/time.h>
#include <time.h>
#endif
#ifdef BENCHMARK_OS_EMSCRIPTEN
#include <emscripten.h>
#endif
namespace benchmark {
// NOTE: only i386 and x86_64 have been well tested.
// PPC, sparc, alpha, and ia64 are based on
// http://peter.kuscsik.com/wordpress/?p=14
// with modifications by m3b. See also
// https://setisvn.ssl.berkeley.edu/svn/lib/fftw-3.0.1/kernel/cycle.h
namespace cycleclock {
// This should return the number of cycles since power-on. Thread-safe.
inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
#if defined(BENCHMARK_OS_MACOSX)
// this goes at the top because we need ALL Macs, regardless of
// architecture, to return the number of "mach time units" that
// have passed since startup. See sysinfo.cc where
// InitializeSystemInfo() sets the supposed cpu clock frequency of
// macs to the number of mach time units per second, not actual
// CPU clock frequency (which can change in the face of CPU
// frequency scaling). Also note that when the Mac sleeps, this
// counter pauses; it does not continue counting, nor does it
// reset to zero.
return static_cast<int64_t>(mach_absolute_time());
#elif defined(BENCHMARK_OS_EMSCRIPTEN)
// this goes above x86-specific code because old versions of Emscripten
// define __x86_64__, although they have nothing to do with it.
return static_cast<int64_t>(emscripten_get_now() * 1e+6);
#elif defined(__i386__)
int64_t ret;
__asm__ volatile("rdtsc" : "=A"(ret));
return ret;
#elif defined(__x86_64__) || defined(__amd64__)
uint64_t low, high;
__asm__ volatile("rdtsc" : "=a"(low), "=d"(high));
return static_cast<int64_t>((high << 32) | low);
#elif defined(__powerpc__) || defined(__ppc__)
// This returns a time-base, which is not always precisely a cycle-count.
#if defined(__powerpc64__) || defined(__ppc64__)
int64_t tb;
asm volatile("mfspr %0, 268" : "=r"(tb));
return tb;
#else
uint32_t tbl, tbu0, tbu1;
asm volatile(
"mftbu %0\n"
"mftb %1\n"
"mftbu %2"
: "=r"(tbu0), "=r"(tbl), "=r"(tbu1));
tbl &= -static_cast<int32_t>(tbu0 == tbu1);
// high 32 bits in tbu1; low 32 bits in tbl (tbu0 is no longer needed)
return (static_cast<uint64_t>(tbu1) << 32) | tbl;
#endif
#elif defined(__sparc__)
int64_t tick;
asm(".byte 0x83, 0x41, 0x00, 0x00");
asm("mov %%g1, %0" : "=r"(tick));
return tick;
#elif defined(__ia64__)
int64_t itc;
asm("mov %0 = ar.itc" : "=r"(itc));
return itc;
#elif defined(COMPILER_MSVC) && defined(_M_IX86)
// Older MSVC compilers (like 7.x) don't seem to support the
// __rdtsc intrinsic properly, so I prefer to use _asm instead
// when I know it will work. Otherwise, I'll use __rdtsc and hope
// the code is being compiled with a non-ancient compiler.
_asm rdtsc
#elif defined(COMPILER_MSVC) && (defined(_M_ARM64) || defined(_M_ARM64EC))
// See // https://docs.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics
// and https://reviews.llvm.org/D53115
int64_t virtual_timer_value;
virtual_timer_value = _ReadStatusReg(ARM64_CNTVCT);
return virtual_timer_value;
#elif defined(COMPILER_MSVC)
return __rdtsc();
#elif defined(BENCHMARK_OS_NACL)
// Native Client validator on x86/x86-64 allows RDTSC instructions,
// and this case is handled above. Native Client validator on ARM
// rejects MRC instructions (used in the ARM-specific sequence below),
// so we handle it here. Portable Native Client compiles to
// architecture-agnostic bytecode, which doesn't provide any
// cycle counter access mnemonics.
// Native Client does not provide any API to access cycle counter.
// Use clock_gettime(CLOCK_MONOTONIC, ...) instead of gettimeofday
// because is provides nanosecond resolution (which is noticeable at
// least for PNaCl modules running on x86 Mac & Linux).
// Initialize to always return 0 if clock_gettime fails.
struct timespec ts = {0, 0};
clock_gettime(CLOCK_MONOTONIC, &ts);
return static_cast<int64_t>(ts.tv_sec) * 1000000000 + ts.tv_nsec;
#elif defined(__aarch64__)
// System timer of ARMv8 runs at a different frequency than the CPU's.
// The frequency is fixed, typically in the range 1-50MHz. It can be
// read at CNTFRQ special register. We assume the OS has set up
// the virtual timer properly.
int64_t virtual_timer_value;
asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value));
return virtual_timer_value;
#elif defined(__ARM_ARCH)
// V6 is the earliest arch that has a standard cyclecount
// Native Client validator doesn't allow MRC instructions.
#if (__ARM_ARCH >= 6)
uint32_t pmccntr;
uint32_t pmuseren;
uint32_t pmcntenset;
// Read the user mode perf monitor counter access permissions.
asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r"(pmuseren));
if (pmuseren & 1) { // Allows reading perfmon counters for user mode code.
asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r"(pmcntenset));
if (pmcntenset & 0x80000000ul) { // Is it counting?
asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(pmccntr));
// The counter is set up to count every 64th cycle
return static_cast<int64_t>(pmccntr) * 64; // Should optimize to << 6
}
}
#endif
struct timeval tv;
gettimeofday(&tv, nullptr);
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
#elif defined(__mips__) || defined(__m68k__)
// mips apparently only allows rdtsc for superusers, so we fall
// back to gettimeofday. It's possible clock_gettime would be better.
struct timeval tv;
gettimeofday(&tv, nullptr);
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
#elif defined(__loongarch__) || defined(__csky__)
struct timeval tv;
gettimeofday(&tv, nullptr);
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
#elif defined(__s390__) // Covers both s390 and s390x.
// Return the CPU clock.
uint64_t tsc;
#if defined(BENCHMARK_OS_ZOS)
// z/OS HLASM syntax.
asm(" stck %0" : "=m"(tsc) : : "cc");
#else
// Linux on Z syntax.
asm("stck %0" : "=Q"(tsc) : : "cc");
#endif
return tsc;
#elif defined(__riscv) // RISC-V
// Use RDTIME (and RDTIMEH on riscv32).
// RDCYCLE is a privileged instruction since Linux 6.6.
#if __riscv_xlen == 32
uint32_t cycles_lo, cycles_hi0, cycles_hi1;
// This asm also includes the PowerPC overflow handling strategy, as above.
// Implemented in assembly because Clang insisted on branching.
asm volatile(
"rdtimeh %0\n"
"rdtime %1\n"
"rdtimeh %2\n"
"sub %0, %0, %2\n"
"seqz %0, %0\n"
"sub %0, zero, %0\n"
"and %1, %1, %0\n"
: "=r"(cycles_hi0), "=r"(cycles_lo), "=r"(cycles_hi1));
return static_cast<int64_t>((static_cast<uint64_t>(cycles_hi1) << 32) |
cycles_lo);
#else
uint64_t cycles;
asm volatile("rdtime %0" : "=r"(cycles));
return static_cast<int64_t>(cycles);
#endif
#elif defined(__e2k__) || defined(__elbrus__)
struct timeval tv;
gettimeofday(&tv, nullptr);
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
#elif defined(__hexagon__)
uint64_t pcycle;
asm volatile("%0 = C15:14" : "=r"(pcycle));
return static_cast<int64_t>(pcycle);
#elif defined(__alpha__)
// Alpha has a cycle counter, the PCC register, but it is an unsigned 32-bit
// integer and thus wraps every ~4s, making using it for tick counts
// unreliable beyond this time range. The real-time clock is low-precision,
// roughtly ~1ms, but it is the only option that can reasonable count
// indefinitely.
struct timeval tv;
gettimeofday(&tv, nullptr);
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
#elif defined(__hppa__) || defined(__linux__)
// Fallback for all other architectures with a recent Linux kernel, e.g.:
// HP PA-RISC provides a user-readable clock counter (cr16), but
// it's not syncronized across CPUs and only 32-bit wide when programs
// are built as 32-bit binaries.
// Same for SH-4 and possibly others.
// Use clock_gettime(CLOCK_MONOTONIC, ...) instead of gettimeofday
// because is provides nanosecond resolution.
// Initialize to always return 0 if clock_gettime fails.
struct timespec ts = {0, 0};
clock_gettime(CLOCK_MONOTONIC, &ts);
return static_cast<int64_t>(ts.tv_sec) * 1000000000 + ts.tv_nsec;
#else
// The soft failover to a generic implementation is automatic only for ARM.
// For other platforms the developer is expected to make an attempt to create
// a fast implementation and use generic version if nothing better is
// available.
#error You need to define CycleTimer for your OS and CPU
#endif
}
} // end namespace cycleclock
} // end namespace benchmark
#endif // BENCHMARK_CYCLECLOCK_H_

View File

@@ -0,0 +1,111 @@
#ifndef BENCHMARK_INTERNAL_MACROS_H_
#define BENCHMARK_INTERNAL_MACROS_H_
/* Needed to detect STL */
#include <cstdlib>
// clang-format off
#ifndef __has_feature
#define __has_feature(x) 0
#endif
#if defined(__clang__)
#if !defined(COMPILER_CLANG)
#define COMPILER_CLANG
#endif
#elif defined(_MSC_VER)
#if !defined(COMPILER_MSVC)
#define COMPILER_MSVC
#endif
#elif defined(__GNUC__)
#if !defined(COMPILER_GCC)
#define COMPILER_GCC
#endif
#endif
#if __has_feature(cxx_attributes)
#define BENCHMARK_NORETURN [[noreturn]]
#elif defined(__GNUC__)
#define BENCHMARK_NORETURN __attribute__((noreturn))
#elif defined(COMPILER_MSVC)
#define BENCHMARK_NORETURN __declspec(noreturn)
#else
#define BENCHMARK_NORETURN
#endif
#if defined(__CYGWIN__)
#define BENCHMARK_OS_CYGWIN 1
#elif defined(_WIN32)
#define BENCHMARK_OS_WINDOWS 1
// WINAPI_FAMILY_PARTITION is defined in winapifamily.h.
// We include windows.h which implicitly includes winapifamily.h for compatibility.
#ifndef NOMINMAX
#define NOMINMAX
#endif
#include <windows.h>
#if defined(WINAPI_FAMILY_PARTITION)
#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
#define BENCHMARK_OS_WINDOWS_WIN32 1
#elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP)
#define BENCHMARK_OS_WINDOWS_RT 1
#endif
#endif
#if defined(__MINGW32__)
#define BENCHMARK_OS_MINGW 1
#endif
#elif defined(__APPLE__)
#define BENCHMARK_OS_APPLE 1
#include "TargetConditionals.h"
#if defined(TARGET_OS_MAC)
#define BENCHMARK_OS_MACOSX 1
#if defined(TARGET_OS_IPHONE)
#define BENCHMARK_OS_IOS 1
#endif
#endif
#elif defined(__FreeBSD__)
#define BENCHMARK_OS_FREEBSD 1
#elif defined(__NetBSD__)
#define BENCHMARK_OS_NETBSD 1
#elif defined(__OpenBSD__)
#define BENCHMARK_OS_OPENBSD 1
#elif defined(__DragonFly__)
#define BENCHMARK_OS_DRAGONFLY 1
#elif defined(__linux__)
#define BENCHMARK_OS_LINUX 1
#elif defined(__native_client__)
#define BENCHMARK_OS_NACL 1
#elif defined(__EMSCRIPTEN__)
#define BENCHMARK_OS_EMSCRIPTEN 1
#elif defined(__rtems__)
#define BENCHMARK_OS_RTEMS 1
#elif defined(__Fuchsia__)
#define BENCHMARK_OS_FUCHSIA 1
#elif defined (__SVR4) && defined (__sun)
#define BENCHMARK_OS_SOLARIS 1
#elif defined(__QNX__)
#define BENCHMARK_OS_QNX 1
#elif defined(__MVS__)
#define BENCHMARK_OS_ZOS 1
#elif defined(__hexagon__)
#define BENCHMARK_OS_QURT 1
#endif
#if defined(__ANDROID__) && defined(__GLIBCXX__)
#define BENCHMARK_STL_ANDROID_GNUSTL 1
#endif
#if !__has_feature(cxx_exceptions) && !defined(__cpp_exceptions) \
&& !defined(__EXCEPTIONS)
#define BENCHMARK_HAS_NO_EXCEPTIONS
#endif
#if defined(COMPILER_CLANG) || defined(COMPILER_GCC)
#define BENCHMARK_MAYBE_UNUSED __attribute__((unused))
#else
#define BENCHMARK_MAYBE_UNUSED
#endif
// clang-format on
#endif // BENCHMARK_INTERNAL_MACROS_H_

View File

@@ -0,0 +1,341 @@
// Copyright 2015 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include <cmath>
#include <cstdint>
#include <iomanip> // for setprecision
#include <iostream>
#include <limits>
#include <string>
#include <tuple>
#include <vector>
#include "benchmark/benchmark.h"
#include "complexity.h"
#include "string_util.h"
#include "timers.h"
namespace benchmark {
namespace {
std::string StrEscape(const std::string& s) {
std::string tmp;
tmp.reserve(s.size());
for (char c : s) {
switch (c) {
case '\b':
tmp += "\\b";
break;
case '\f':
tmp += "\\f";
break;
case '\n':
tmp += "\\n";
break;
case '\r':
tmp += "\\r";
break;
case '\t':
tmp += "\\t";
break;
case '\\':
tmp += "\\\\";
break;
case '"':
tmp += "\\\"";
break;
default:
tmp += c;
break;
}
}
return tmp;
}
std::string FormatKV(std::string const& key, std::string const& value) {
return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(),
StrEscape(value).c_str());
}
std::string FormatKV(std::string const& key, const char* value) {
return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(),
StrEscape(value).c_str());
}
std::string FormatKV(std::string const& key, bool value) {
return StrFormat("\"%s\": %s", StrEscape(key).c_str(),
value ? "true" : "false");
}
std::string FormatKV(std::string const& key, int64_t value) {
std::stringstream ss;
ss << '"' << StrEscape(key) << "\": " << value;
return ss.str();
}
std::string FormatKV(std::string const& key, int value) {
return FormatKV(key, static_cast<int64_t>(value));
}
std::string FormatKV(std::string const& key, double value) {
std::stringstream ss;
ss << '"' << StrEscape(key) << "\": ";
if (std::isnan(value)) {
ss << (value < 0 ? "-" : "") << "NaN";
} else if (std::isinf(value)) {
ss << (value < 0 ? "-" : "") << "Infinity";
} else {
const auto max_digits10 =
std::numeric_limits<decltype(value)>::max_digits10;
const auto max_fractional_digits10 = max_digits10 - 1;
ss << std::scientific << std::setprecision(max_fractional_digits10)
<< value;
}
return ss.str();
}
int64_t RoundDouble(double v) { return std::lround(v); }
} // end namespace
bool JSONReporter::ReportContext(const Context& context) {
std::ostream& out = GetOutputStream();
out << "{\n";
std::string inner_indent(2, ' ');
// Open context block and print context information.
out << inner_indent << "\"context\": {\n";
std::string indent(4, ' ');
std::string walltime_value = LocalDateTimeString();
out << indent << FormatKV("date", walltime_value) << ",\n";
out << indent << FormatKV("host_name", context.sys_info.name) << ",\n";
if (Context::executable_name != nullptr) {
out << indent << FormatKV("executable", Context::executable_name) << ",\n";
}
CPUInfo const& info = context.cpu_info;
out << indent << FormatKV("num_cpus", static_cast<int64_t>(info.num_cpus))
<< ",\n";
out << indent
<< FormatKV("mhz_per_cpu",
RoundDouble(info.cycles_per_second / 1000000.0))
<< ",\n";
if (CPUInfo::Scaling::UNKNOWN != info.scaling) {
out << indent
<< FormatKV("cpu_scaling_enabled",
info.scaling == CPUInfo::Scaling::ENABLED)
<< ",\n";
}
const SystemInfo& sysinfo = context.sys_info;
if (SystemInfo::ASLR::UNKNOWN != sysinfo.ASLRStatus) {
out << indent
<< FormatKV("aslr_enabled",
sysinfo.ASLRStatus == SystemInfo::ASLR::ENABLED)
<< ",\n";
}
out << indent << "\"caches\": [\n";
indent = std::string(6, ' ');
std::string cache_indent(8, ' ');
for (size_t i = 0; i < info.caches.size(); ++i) {
const auto& CI = info.caches[i];
out << indent << "{\n";
out << cache_indent << FormatKV("type", CI.type) << ",\n";
out << cache_indent << FormatKV("level", static_cast<int64_t>(CI.level))
<< ",\n";
out << cache_indent << FormatKV("size", static_cast<int64_t>(CI.size))
<< ",\n";
out << cache_indent
<< FormatKV("num_sharing", static_cast<int64_t>(CI.num_sharing))
<< "\n";
out << indent << "}";
if (i != info.caches.size() - 1) {
out << ",";
}
out << "\n";
}
indent = std::string(4, ' ');
out << indent << "],\n";
out << indent << "\"load_avg\": [";
for (auto it = info.load_avg.begin(); it != info.load_avg.end();) {
out << *it++;
if (it != info.load_avg.end()) {
out << ",";
}
}
out << "],\n";
out << indent << FormatKV("library_version", GetBenchmarkVersion());
out << ",\n";
#if defined(NDEBUG)
const char build_type[] = "release";
#else
const char build_type[] = "debug";
#endif
out << indent << FormatKV("library_build_type", build_type);
out << ",\n";
// NOTE: our json schema is not strictly tied to the library version!
out << indent << FormatKV("json_schema_version", 1);
std::map<std::string, std::string>* global_context =
internal::GetGlobalContext();
if (global_context != nullptr) {
for (const auto& kv : *global_context) {
out << ",\n";
out << indent << FormatKV(kv.first, kv.second);
}
}
out << "\n";
// Close context block and open the list of benchmarks.
out << inner_indent << "},\n";
out << inner_indent << "\"benchmarks\": [\n";
return true;
}
void JSONReporter::ReportRuns(std::vector<Run> const& reports) {
if (reports.empty()) {
return;
}
std::string indent(4, ' ');
std::ostream& out = GetOutputStream();
if (!first_report_) {
out << ",\n";
}
first_report_ = false;
for (auto it = reports.begin(); it != reports.end(); ++it) {
out << indent << "{\n";
PrintRunData(*it);
out << indent << '}';
auto it_cp = it;
if (++it_cp != reports.end()) {
out << ",\n";
}
}
}
void JSONReporter::Finalize() {
// Close the list of benchmarks and the top level object.
GetOutputStream() << "\n ]\n}\n";
}
void JSONReporter::PrintRunData(Run const& run) {
std::string indent(6, ' ');
std::ostream& out = GetOutputStream();
out << indent << FormatKV("name", run.benchmark_name()) << ",\n";
out << indent << FormatKV("family_index", run.family_index) << ",\n";
out << indent
<< FormatKV("per_family_instance_index", run.per_family_instance_index)
<< ",\n";
out << indent << FormatKV("run_name", run.run_name.str()) << ",\n";
out << indent << FormatKV("run_type", [&run]() -> const char* {
switch (run.run_type) {
case BenchmarkReporter::Run::RT_Iteration:
return "iteration";
case BenchmarkReporter::Run::RT_Aggregate:
return "aggregate";
}
BENCHMARK_UNREACHABLE();
}()) << ",\n";
out << indent << FormatKV("repetitions", run.repetitions) << ",\n";
if (run.run_type != BenchmarkReporter::Run::RT_Aggregate) {
out << indent << FormatKV("repetition_index", run.repetition_index)
<< ",\n";
}
out << indent << FormatKV("threads", run.threads) << ",\n";
if (run.run_type == BenchmarkReporter::Run::RT_Aggregate) {
out << indent << FormatKV("aggregate_name", run.aggregate_name) << ",\n";
out << indent << FormatKV("aggregate_unit", [&run]() -> const char* {
switch (run.aggregate_unit) {
case StatisticUnit::kTime:
return "time";
case StatisticUnit::kPercentage:
return "percentage";
}
BENCHMARK_UNREACHABLE();
}()) << ",\n";
}
if (internal::SkippedWithError == run.skipped) {
out << indent << FormatKV("error_occurred", true) << ",\n";
out << indent << FormatKV("error_message", run.skip_message) << ",\n";
} else if (internal::SkippedWithMessage == run.skipped) {
out << indent << FormatKV("skipped", true) << ",\n";
out << indent << FormatKV("skip_message", run.skip_message) << ",\n";
}
if (!run.report_big_o && !run.report_rms) {
out << indent << FormatKV("iterations", run.iterations) << ",\n";
if (run.run_type != Run::RT_Aggregate ||
run.aggregate_unit == StatisticUnit::kTime) {
out << indent << FormatKV("real_time", run.GetAdjustedRealTime())
<< ",\n";
out << indent << FormatKV("cpu_time", run.GetAdjustedCPUTime());
} else {
assert(run.aggregate_unit == StatisticUnit::kPercentage);
out << indent << FormatKV("real_time", run.real_accumulated_time)
<< ",\n";
out << indent << FormatKV("cpu_time", run.cpu_accumulated_time);
}
out << ",\n"
<< indent << FormatKV("time_unit", GetTimeUnitString(run.time_unit));
} else if (run.report_big_o) {
out << indent << FormatKV("cpu_coefficient", run.GetAdjustedCPUTime())
<< ",\n";
out << indent << FormatKV("real_coefficient", run.GetAdjustedRealTime())
<< ",\n";
out << indent << FormatKV("big_o", GetBigOString(run.complexity)) << ",\n";
out << indent << FormatKV("time_unit", GetTimeUnitString(run.time_unit));
} else if (run.report_rms) {
out << indent << FormatKV("rms", run.GetAdjustedCPUTime());
}
for (const auto& c : run.counters) {
out << ",\n" << indent << FormatKV(c.first, c.second);
}
if (run.memory_result.memory_iterations > 0) {
const auto& memory_result = run.memory_result;
out << ",\n" << indent << FormatKV("allocs_per_iter", run.allocs_per_iter);
out << ",\n"
<< indent << FormatKV("max_bytes_used", memory_result.max_bytes_used);
auto report_if_present = [&out, &indent](const std::string& label,
int64_t val) {
if (val != MemoryManager::TombstoneValue) {
out << ",\n" << indent << FormatKV(label, val);
}
};
report_if_present("total_allocated_bytes",
memory_result.total_allocated_bytes);
report_if_present("net_heap_growth", memory_result.net_heap_growth);
}
if (!run.report_label.empty()) {
out << ",\n" << indent << FormatKV("label", run.report_label);
}
out << '\n';
}
} // end namespace benchmark

View File

@@ -0,0 +1,76 @@
#ifndef BENCHMARK_LOG_H_
#define BENCHMARK_LOG_H_
#include <iostream>
#include <ostream>
namespace benchmark {
namespace internal {
typedef std::basic_ostream<char>&(EndLType)(std::basic_ostream<char>&);
class LogType {
friend LogType& GetNullLogInstance();
friend LogType& GetErrorLogInstance();
// FIXME: Add locking to output.
template <class Tp>
friend LogType& operator<<(LogType&, Tp const&);
friend LogType& operator<<(LogType&, EndLType*);
private:
LogType(std::ostream* out) : out_(out) {}
std::ostream* out_;
// NOTE: we could use BENCHMARK_DISALLOW_COPY_AND_ASSIGN but we shouldn't have
// a dependency on benchmark.h from here.
LogType(const LogType&) = delete;
LogType& operator=(const LogType&) = delete;
};
template <class Tp>
LogType& operator<<(LogType& log, Tp const& value) {
if (log.out_) {
*log.out_ << value;
}
return log;
}
inline LogType& operator<<(LogType& log, EndLType* m) {
if (log.out_) {
*log.out_ << m;
}
return log;
}
inline int& LogLevel() {
static int log_level = 0;
return log_level;
}
inline LogType& GetNullLogInstance() {
static LogType null_log(static_cast<std::ostream*>(nullptr));
return null_log;
}
inline LogType& GetErrorLogInstance() {
static LogType error_log(&std::clog);
return error_log;
}
inline LogType& GetLogInstanceForLevel(int level) {
if (level <= LogLevel()) {
return GetErrorLogInstance();
}
return GetNullLogInstance();
}
} // end namespace internal
} // end namespace benchmark
// clang-format off
#define BM_VLOG(x) \
(::benchmark::internal::GetLogInstanceForLevel(x) << "-- LOG(" << x << "):" \
" ")
// clang-format on
#endif

View File

@@ -0,0 +1,155 @@
#ifndef BENCHMARK_MUTEX_H_
#define BENCHMARK_MUTEX_H_
#include <condition_variable>
#include <mutex>
#include "check.h"
// Enable thread safety attributes only with clang.
// The attributes can be safely erased when compiling with other compilers.
#if defined(HAVE_THREAD_SAFETY_ATTRIBUTES)
#define THREAD_ANNOTATION_ATTRIBUTE_(x) __attribute__((x))
#else
#define THREAD_ANNOTATION_ATTRIBUTE_(x) // no-op
#endif
#define CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(capability(x))
#define SCOPED_CAPABILITY THREAD_ANNOTATION_ATTRIBUTE_(scoped_lockable)
#define GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE_(guarded_by(x))
#define PT_GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE_(pt_guarded_by(x))
#define ACQUIRED_BEFORE(...) \
THREAD_ANNOTATION_ATTRIBUTE_(acquired_before(__VA_ARGS__))
#define ACQUIRED_AFTER(...) \
THREAD_ANNOTATION_ATTRIBUTE_(acquired_after(__VA_ARGS__))
#define REQUIRES(...) \
THREAD_ANNOTATION_ATTRIBUTE_(requires_capability(__VA_ARGS__))
#define REQUIRES_SHARED(...) \
THREAD_ANNOTATION_ATTRIBUTE_(requires_shared_capability(__VA_ARGS__))
#define ACQUIRE(...) \
THREAD_ANNOTATION_ATTRIBUTE_(acquire_capability(__VA_ARGS__))
#define ACQUIRE_SHARED(...) \
THREAD_ANNOTATION_ATTRIBUTE_(acquire_shared_capability(__VA_ARGS__))
#define RELEASE(...) \
THREAD_ANNOTATION_ATTRIBUTE_(release_capability(__VA_ARGS__))
#define RELEASE_SHARED(...) \
THREAD_ANNOTATION_ATTRIBUTE_(release_shared_capability(__VA_ARGS__))
#define TRY_ACQUIRE(...) \
THREAD_ANNOTATION_ATTRIBUTE_(try_acquire_capability(__VA_ARGS__))
#define TRY_ACQUIRE_SHARED(...) \
THREAD_ANNOTATION_ATTRIBUTE_(try_acquire_shared_capability(__VA_ARGS__))
#define EXCLUDES(...) THREAD_ANNOTATION_ATTRIBUTE_(locks_excluded(__VA_ARGS__))
#define ASSERT_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(assert_capability(x))
#define ASSERT_SHARED_CAPABILITY(x) \
THREAD_ANNOTATION_ATTRIBUTE_(assert_shared_capability(x))
#define RETURN_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(lock_returned(x))
#define NO_THREAD_SAFETY_ANALYSIS \
THREAD_ANNOTATION_ATTRIBUTE_(no_thread_safety_analysis)
namespace benchmark {
typedef std::condition_variable Condition;
// NOTE: Wrappers for std::mutex and std::unique_lock are provided so that
// we can annotate them with thread safety attributes and use the
// -Wthread-safety warning with clang. The standard library types cannot be
// used directly because they do not provide the required annotations.
class CAPABILITY("mutex") Mutex {
public:
Mutex() {}
void lock() ACQUIRE() { mut_.lock(); }
void unlock() RELEASE() { mut_.unlock(); }
std::mutex& native_handle() { return mut_; }
private:
std::mutex mut_;
};
class SCOPED_CAPABILITY MutexLock {
typedef std::unique_lock<std::mutex> MutexLockImp;
public:
MutexLock(Mutex& m) ACQUIRE(m) : ml_(m.native_handle()) {}
~MutexLock() RELEASE() {}
MutexLockImp& native_handle() { return ml_; }
private:
MutexLockImp ml_;
};
class Barrier {
public:
Barrier(int num_threads) : running_threads_(num_threads) {}
// Called by each thread
bool wait() EXCLUDES(lock_) {
bool last_thread = false;
{
MutexLock ml(lock_);
last_thread = createBarrier(ml);
}
if (last_thread) phase_condition_.notify_all();
return last_thread;
}
void removeThread() EXCLUDES(lock_) {
MutexLock ml(lock_);
--running_threads_;
if (entered_ != 0) phase_condition_.notify_all();
}
private:
Mutex lock_;
Condition phase_condition_;
int running_threads_;
// State for barrier management
int phase_number_ = 0;
int entered_ = 0; // Number of threads that have entered this barrier
// Enter the barrier and wait until all other threads have also
// entered the barrier. Returns iff this is the last thread to
// enter the barrier.
bool createBarrier(MutexLock& ml) REQUIRES(lock_) {
BM_CHECK_LT(entered_, running_threads_);
entered_++;
if (entered_ < running_threads_) {
// Wait for all threads to enter
int phase_number_cp = phase_number_;
auto cb = [this, phase_number_cp]() {
return this->phase_number_ > phase_number_cp ||
entered_ == running_threads_; // A thread has aborted in error
};
phase_condition_.wait(ml.native_handle(), cb);
if (phase_number_ > phase_number_cp) return false;
// else (running_threads_ == entered_) and we are the last thread.
}
// Last thread has reached the barrier
phase_number_++;
entered_ = 0;
return true;
}
};
} // end namespace benchmark
#endif // BENCHMARK_MUTEX_H_

View File

@@ -0,0 +1,282 @@
// Copyright 2021 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "perf_counters.h"
#include <cstring>
#include <memory>
#include <vector>
#if defined HAVE_LIBPFM
#include "perfmon/pfmlib.h"
#include "perfmon/pfmlib_perf_event.h"
#endif
namespace benchmark {
namespace internal {
#if defined HAVE_LIBPFM
size_t PerfCounterValues::Read(const std::vector<int>& leaders) {
// Create a pointer for multiple reads
const size_t bufsize = values_.size() * sizeof(values_[0]);
char* ptr = reinterpret_cast<char*>(values_.data());
size_t size = bufsize;
for (int lead : leaders) {
auto read_bytes = ::read(lead, ptr, size);
if (read_bytes >= ssize_t(sizeof(uint64_t))) {
// Actual data bytes are all bytes minus initial padding
std::size_t data_bytes =
static_cast<std::size_t>(read_bytes) - sizeof(uint64_t);
// This should be very cheap since it's in hot cache
std::memmove(ptr, ptr + sizeof(uint64_t), data_bytes);
// Increment our counters
ptr += data_bytes;
size -= data_bytes;
} else {
int err = errno;
GetErrorLogInstance() << "Error reading lead " << lead << " errno:" << err
<< " " << ::strerror(err) << "\n";
return 0;
}
}
return (bufsize - size) / sizeof(uint64_t);
}
const bool PerfCounters::kSupported = true;
// Initializes libpfm only on the first call. Returns whether that single
// initialization was successful.
bool PerfCounters::Initialize() {
// Function-scope static gets initialized only once on first call.
static const bool success = []() {
return pfm_initialize() == PFM_SUCCESS;
}();
return success;
}
bool PerfCounters::IsCounterSupported(const std::string& name) {
Initialize();
perf_event_attr_t attr;
std::memset(&attr, 0, sizeof(attr));
pfm_perf_encode_arg_t arg;
std::memset(&arg, 0, sizeof(arg));
arg.attr = &attr;
const int mode = PFM_PLM3; // user mode only
int ret = pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT_EXT,
&arg);
return (ret == PFM_SUCCESS);
}
PerfCounters PerfCounters::Create(
const std::vector<std::string>& counter_names) {
if (!counter_names.empty()) {
Initialize();
}
// Valid counters will populate these arrays but we start empty
std::vector<std::string> valid_names;
std::vector<int> counter_ids;
std::vector<int> leader_ids;
// Resize to the maximum possible
valid_names.reserve(counter_names.size());
counter_ids.reserve(counter_names.size());
const int kCounterMode = PFM_PLM3; // user mode only
// Group leads will be assigned on demand. The idea is that once we cannot
// create a counter descriptor, the reason is that this group has maxed out
// so we set the group_id again to -1 and retry - giving the algorithm a
// chance to create a new group leader to hold the next set of counters.
int group_id = -1;
// Loop through all performance counters
for (size_t i = 0; i < counter_names.size(); ++i) {
// we are about to push into the valid names vector
// check if we did not reach the maximum
if (valid_names.size() == PerfCounterValues::kMaxCounters) {
// Log a message if we maxed out and stop adding
GetErrorLogInstance()
<< counter_names.size() << " counters were requested. The maximum is "
<< PerfCounterValues::kMaxCounters << " and " << valid_names.size()
<< " were already added. All remaining counters will be ignored\n";
// stop the loop and return what we have already
break;
}
// Check if this name is empty
const auto& name = counter_names[i];
if (name.empty()) {
GetErrorLogInstance()
<< "A performance counter name was the empty string\n";
continue;
}
// Here first means first in group, ie the group leader
const bool is_first = (group_id < 0);
// This struct will be populated by libpfm from the counter string
// and then fed into the syscall perf_event_open
struct perf_event_attr attr {};
attr.size = sizeof(attr);
// This is the input struct to libpfm.
pfm_perf_encode_arg_t arg{};
arg.attr = &attr;
const int pfm_get = pfm_get_os_event_encoding(name.c_str(), kCounterMode,
PFM_OS_PERF_EVENT, &arg);
if (pfm_get != PFM_SUCCESS) {
GetErrorLogInstance()
<< "Unknown performance counter name: " << name << "\n";
continue;
}
// We then proceed to populate the remaining fields in our attribute struct
// Note: the man page for perf_event_create suggests inherit = true and
// read_format = PERF_FORMAT_GROUP don't work together, but that's not the
// case.
attr.disabled = is_first;
attr.inherit = true;
attr.pinned = is_first;
attr.exclude_kernel = true;
attr.exclude_user = false;
attr.exclude_hv = true;
// Read all counters in a group in one read.
attr.read_format = PERF_FORMAT_GROUP; //| PERF_FORMAT_TOTAL_TIME_ENABLED |
// PERF_FORMAT_TOTAL_TIME_RUNNING;
int id = -1;
while (id < 0) {
static constexpr size_t kNrOfSyscallRetries = 5;
// Retry syscall as it was interrupted often (b/64774091).
for (size_t num_retries = 0; num_retries < kNrOfSyscallRetries;
++num_retries) {
id = perf_event_open(&attr, 0, -1, group_id, 0);
if (id >= 0 || errno != EINTR) {
break;
}
}
if (id < 0) {
// If the file descriptor is negative we might have reached a limit
// in the current group. Set the group_id to -1 and retry
if (group_id >= 0) {
// Create a new group
group_id = -1;
} else {
// At this point we have already retried to set a new group id and
// failed. We then give up.
break;
}
}
}
// We failed to get a new file descriptor. We might have reached a hard
// hardware limit that cannot be resolved even with group multiplexing
if (id < 0) {
GetErrorLogInstance() << "***WARNING** Failed to get a file descriptor "
"for performance counter "
<< name << ". Ignoring\n";
// We give up on this counter but try to keep going
// as the others would be fine
continue;
}
if (group_id < 0) {
// This is a leader, store and assign it to the current file descriptor
leader_ids.push_back(id);
group_id = id;
}
// This is a valid counter, add it to our descriptor's list
counter_ids.push_back(id);
valid_names.push_back(name);
}
// Loop through all group leaders activating them
// There is another option of starting ALL counters in a process but
// that would be far reaching an intrusion. If the user is using PMCs
// by themselves then this would have a side effect on them. It is
// friendlier to loop through all groups individually.
for (int lead : leader_ids) {
if (ioctl(lead, PERF_EVENT_IOC_ENABLE) != 0) {
// This should never happen but if it does, we give up on the
// entire batch as recovery would be a mess.
GetErrorLogInstance() << "***WARNING*** Failed to start counters. "
"Claring out all counters.\n";
// Close all performance counters
for (int id : counter_ids) {
::close(id);
}
// Return an empty object so our internal state is still good and
// the process can continue normally without impact
return NoCounters();
}
}
return PerfCounters(std::move(valid_names), std::move(counter_ids),
std::move(leader_ids));
}
void PerfCounters::CloseCounters() const {
if (counter_ids_.empty()) {
return;
}
for (int lead : leader_ids_) {
ioctl(lead, PERF_EVENT_IOC_DISABLE);
}
for (int fd : counter_ids_) {
close(fd);
}
}
#else // defined HAVE_LIBPFM
size_t PerfCounterValues::Read(const std::vector<int>&) { return 0; }
const bool PerfCounters::kSupported = false;
bool PerfCounters::Initialize() { return false; }
bool PerfCounters::IsCounterSupported(const std::string&) { return false; }
PerfCounters PerfCounters::Create(
const std::vector<std::string>& counter_names) {
if (!counter_names.empty()) {
GetErrorLogInstance() << "Performance counters not supported.\n";
}
return NoCounters();
}
void PerfCounters::CloseCounters() const {}
#endif // defined HAVE_LIBPFM
PerfCountersMeasurement::PerfCountersMeasurement(
const std::vector<std::string>& counter_names)
: start_values_(counter_names.size()), end_values_(counter_names.size()) {
counters_ = PerfCounters::Create(counter_names);
}
PerfCounters& PerfCounters::operator=(PerfCounters&& other) noexcept {
if (this != &other) {
CloseCounters();
counter_ids_ = std::move(other.counter_ids_);
leader_ids_ = std::move(other.leader_ids_);
counter_names_ = std::move(other.counter_names_);
}
return *this;
}
} // namespace internal
} // namespace benchmark

View File

@@ -0,0 +1,200 @@
// Copyright 2021 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef BENCHMARK_PERF_COUNTERS_H
#define BENCHMARK_PERF_COUNTERS_H
#include <array>
#include <cstdint>
#include <cstring>
#include <memory>
#include <vector>
#include "benchmark/benchmark.h"
#include "check.h"
#include "log.h"
#include "mutex.h"
#ifndef BENCHMARK_OS_WINDOWS
#include <unistd.h>
#endif
#if defined(_MSC_VER)
#pragma warning(push)
// C4251: <symbol> needs to have dll-interface to be used by clients of class
#pragma warning(disable : 4251)
#endif
namespace benchmark {
namespace internal {
// Typically, we can only read a small number of counters. There is also a
// padding preceding counter values, when reading multiple counters with one
// syscall (which is desirable). PerfCounterValues abstracts these details.
// The implementation ensures the storage is inlined, and allows 0-based
// indexing into the counter values.
// The object is used in conjunction with a PerfCounters object, by passing it
// to Snapshot(). The Read() method relocates individual reads, discarding
// the initial padding from each group leader in the values buffer such that
// all user accesses through the [] operator are correct.
class BENCHMARK_EXPORT PerfCounterValues {
public:
explicit PerfCounterValues(size_t nr_counters) : nr_counters_(nr_counters) {
BM_CHECK_LE(nr_counters_, kMaxCounters);
}
// We are reading correctly now so the values don't need to skip padding
uint64_t operator[](size_t pos) const { return values_[pos]; }
// Increased the maximum to 32 only since the buffer
// is std::array<> backed
static constexpr size_t kMaxCounters = 32;
private:
friend class PerfCounters;
// Get the byte buffer in which perf counters can be captured.
// This is used by PerfCounters::Read
std::pair<char*, size_t> get_data_buffer() {
return {reinterpret_cast<char*>(values_.data()),
sizeof(uint64_t) * (kPadding + nr_counters_)};
}
// This reading is complex and as the goal of this class is to
// abstract away the intrincacies of the reading process, this is
// a better place for it
size_t Read(const std::vector<int>& leaders);
// Move the padding to 2 due to the reading algorithm (1st padding plus a
// current read padding)
static constexpr size_t kPadding = 2;
std::array<uint64_t, kPadding + kMaxCounters> values_;
const size_t nr_counters_;
};
// Collect PMU counters. The object, once constructed, is ready to be used by
// calling read(). PMU counter collection is enabled from the time create() is
// called, to obtain the object, until the object's destructor is called.
class BENCHMARK_EXPORT PerfCounters final {
public:
// True iff this platform supports performance counters.
static const bool kSupported;
// Returns an empty object
static PerfCounters NoCounters() { return PerfCounters(); }
~PerfCounters() { CloseCounters(); }
PerfCounters() = default;
PerfCounters(PerfCounters&&) = default;
PerfCounters(const PerfCounters&) = delete;
PerfCounters& operator=(PerfCounters&&) noexcept;
PerfCounters& operator=(const PerfCounters&) = delete;
// Platform-specific implementations may choose to do some library
// initialization here.
static bool Initialize();
// Check if the given counter is supported, if the app wants to
// check before passing
static bool IsCounterSupported(const std::string& name);
// Return a PerfCounters object ready to read the counters with the names
// specified. The values are user-mode only. The counter name format is
// implementation and OS specific.
// In case of failure, this method will in the worst case return an
// empty object whose state will still be valid.
static PerfCounters Create(const std::vector<std::string>& counter_names);
// Take a snapshot of the current value of the counters into the provided
// valid PerfCounterValues storage. The values are populated such that:
// names()[i]'s value is (*values)[i]
BENCHMARK_ALWAYS_INLINE bool Snapshot(PerfCounterValues* values) const {
#ifndef BENCHMARK_OS_WINDOWS
assert(values != nullptr);
return values->Read(leader_ids_) == counter_ids_.size();
#else
(void)values;
return false;
#endif
}
const std::vector<std::string>& names() const { return counter_names_; }
size_t num_counters() const { return counter_names_.size(); }
private:
PerfCounters(const std::vector<std::string>& counter_names,
std::vector<int>&& counter_ids, std::vector<int>&& leader_ids)
: counter_ids_(std::move(counter_ids)),
leader_ids_(std::move(leader_ids)),
counter_names_(counter_names) {}
void CloseCounters() const;
std::vector<int> counter_ids_;
std::vector<int> leader_ids_;
std::vector<std::string> counter_names_;
};
// Typical usage of the above primitives.
class BENCHMARK_EXPORT PerfCountersMeasurement final {
public:
PerfCountersMeasurement(const std::vector<std::string>& counter_names);
size_t num_counters() const { return counters_.num_counters(); }
std::vector<std::string> names() const { return counters_.names(); }
BENCHMARK_ALWAYS_INLINE bool Start() {
if (num_counters() == 0) return true;
// Tell the compiler to not move instructions above/below where we take
// the snapshot.
ClobberMemory();
valid_read_ &= counters_.Snapshot(&start_values_);
ClobberMemory();
return valid_read_;
}
BENCHMARK_ALWAYS_INLINE bool Stop(
std::vector<std::pair<std::string, double>>& measurements) {
if (num_counters() == 0) return true;
// Tell the compiler to not move instructions above/below where we take
// the snapshot.
ClobberMemory();
valid_read_ &= counters_.Snapshot(&end_values_);
ClobberMemory();
for (size_t i = 0; i < counters_.names().size(); ++i) {
double measurement = static_cast<double>(end_values_[i]) -
static_cast<double>(start_values_[i]);
measurements.push_back({counters_.names()[i], measurement});
}
return valid_read_;
}
private:
PerfCounters counters_;
bool valid_read_ = true;
PerfCounterValues start_values_;
PerfCounterValues end_values_;
};
} // namespace internal
} // namespace benchmark
#if defined(_MSC_VER)
#pragma warning(pop)
#endif
#endif // BENCHMARK_PERF_COUNTERS_H

View File

@@ -0,0 +1,156 @@
// Copyright 2015 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef BENCHMARK_RE_H_
#define BENCHMARK_RE_H_
#include "internal_macros.h"
// clang-format off
#if !defined(HAVE_STD_REGEX) && \
!defined(HAVE_GNU_POSIX_REGEX) && \
!defined(HAVE_POSIX_REGEX)
// No explicit regex selection; detect based on builtin hints.
#if defined(BENCHMARK_OS_LINUX) || defined(BENCHMARK_OS_APPLE)
#define HAVE_POSIX_REGEX 1
#elif __cplusplus >= 199711L
#define HAVE_STD_REGEX 1
#endif
#endif
// Prefer C regex libraries when compiling w/o exceptions so that we can
// correctly report errors.
#if defined(BENCHMARK_HAS_NO_EXCEPTIONS) && \
defined(HAVE_STD_REGEX) && \
(defined(HAVE_GNU_POSIX_REGEX) || defined(HAVE_POSIX_REGEX))
#undef HAVE_STD_REGEX
#endif
#if defined(HAVE_STD_REGEX)
#include <regex>
#elif defined(HAVE_GNU_POSIX_REGEX)
#include <gnuregex.h>
#elif defined(HAVE_POSIX_REGEX)
#include <regex.h>
#else
#error No regular expression backend was found!
#endif
// clang-format on
#include <string>
#include "check.h"
namespace benchmark {
// A wrapper around the POSIX regular expression API that provides automatic
// cleanup
class Regex {
public:
Regex() : init_(false) {}
~Regex();
// Compile a regular expression matcher from spec. Returns true on success.
//
// On failure (and if error is not nullptr), error is populated with a human
// readable error message if an error occurs.
bool Init(const std::string& spec, std::string* error);
// Returns whether str matches the compiled regular expression.
bool Match(const std::string& str);
private:
bool init_;
// Underlying regular expression object
#if defined(HAVE_STD_REGEX)
std::regex re_;
#elif defined(HAVE_POSIX_REGEX) || defined(HAVE_GNU_POSIX_REGEX)
regex_t re_;
#else
#error No regular expression backend implementation available
#endif
};
#if defined(HAVE_STD_REGEX)
inline bool Regex::Init(const std::string& spec, std::string* error) {
#ifdef BENCHMARK_HAS_NO_EXCEPTIONS
((void)error); // suppress unused warning
#else
try {
#endif
re_ = std::regex(spec, std::regex_constants::extended);
init_ = true;
#ifndef BENCHMARK_HAS_NO_EXCEPTIONS
}
catch (const std::regex_error& e) {
if (error) {
*error = e.what();
}
}
#endif
return init_;
}
inline Regex::~Regex() {}
inline bool Regex::Match(const std::string& str) {
if (!init_) {
return false;
}
return std::regex_search(str, re_);
}
#else
inline bool Regex::Init(const std::string& spec, std::string* error) {
int ec = regcomp(&re_, spec.c_str(), REG_EXTENDED | REG_NOSUB);
if (ec != 0) {
if (error) {
size_t needed = regerror(ec, &re_, nullptr, 0);
std::vector<char> errbuf(needed);
regerror(ec, &re_, errbuf.data(), needed);
// regerror returns the number of bytes necessary to null terminate
// the string, so we move that when assigning to error.
BM_CHECK_NE(needed, 0);
error->assign(errbuf.data(), needed - 1);
}
return false;
}
init_ = true;
return true;
}
inline Regex::~Regex() {
if (init_) {
regfree(&re_);
}
}
inline bool Regex::Match(const std::string& str) {
if (!init_) {
return false;
}
return regexec(&re_, str.c_str(), 0, nullptr, 0) == 0;
}
#endif
} // end namespace benchmark
#endif // BENCHMARK_RE_H_

View File

@@ -0,0 +1,133 @@
// Copyright 2015 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cstdlib>
#include <iostream>
#include <map>
#include <string>
#include <tuple>
#include <vector>
#include "benchmark/benchmark.h"
#include "check.h"
#include "string_util.h"
#include "timers.h"
namespace benchmark {
BenchmarkReporter::BenchmarkReporter()
: output_stream_(&std::cout), error_stream_(&std::cerr) {}
BenchmarkReporter::~BenchmarkReporter() {}
void BenchmarkReporter::PrintBasicContext(std::ostream *out,
Context const &context) {
BM_CHECK(out) << "cannot be null";
auto &Out = *out;
#ifndef BENCHMARK_OS_QURT
// Date/time information is not available on QuRT.
// Attempting to get it via this call cause the binary to crash.
Out << LocalDateTimeString() << "\n";
#endif
if (benchmark::BenchmarkReporter::Context::executable_name != nullptr) {
Out << "Running " << benchmark::BenchmarkReporter::Context::executable_name
<< "\n";
}
const CPUInfo &info = context.cpu_info;
Out << "Run on (" << info.num_cpus << " X "
<< (info.cycles_per_second / 1000000.0) << " MHz CPU "
<< ((info.num_cpus > 1) ? "s" : "") << ")\n";
if (!info.caches.empty()) {
Out << "CPU Caches:\n";
for (const auto &CInfo : info.caches) {
Out << " L" << CInfo.level << " " << CInfo.type << " "
<< (CInfo.size / 1024) << " KiB";
if (CInfo.num_sharing != 0) {
Out << " (x" << (info.num_cpus / CInfo.num_sharing) << ")";
}
Out << "\n";
}
}
if (!info.load_avg.empty()) {
Out << "Load Average: ";
for (auto It = info.load_avg.begin(); It != info.load_avg.end();) {
Out << StrFormat("%.2f", *It++);
if (It != info.load_avg.end()) {
Out << ", ";
}
}
Out << "\n";
}
std::map<std::string, std::string> *global_context =
internal::GetGlobalContext();
if (global_context != nullptr) {
for (const auto &kv : *global_context) {
Out << kv.first << ": " << kv.second << "\n";
}
}
if (CPUInfo::Scaling::ENABLED == info.scaling) {
Out << "***WARNING*** CPU scaling is enabled, the benchmark "
"real time measurements may be noisy and will incur extra "
"overhead.\n";
}
const SystemInfo &sysinfo = context.sys_info;
if (SystemInfo::ASLR::ENABLED == sysinfo.ASLRStatus) {
Out << "***WARNING*** ASLR is enabled, the results may have unreproducible "
"noise in them.\n";
}
#ifndef NDEBUG
Out << "***WARNING*** Library was built as DEBUG. Timings may be "
"affected.\n";
#endif
}
// No initializer because it's already initialized to NULL.
const char *BenchmarkReporter::Context::executable_name;
BenchmarkReporter::Context::Context()
: cpu_info(CPUInfo::Get()), sys_info(SystemInfo::Get()) {}
std::string BenchmarkReporter::Run::benchmark_name() const {
std::string name = run_name.str();
if (run_type == RT_Aggregate) {
name += "_" + aggregate_name;
}
return name;
}
double BenchmarkReporter::Run::GetAdjustedRealTime() const {
double new_time = real_accumulated_time * GetTimeUnitMultiplier(time_unit);
if (iterations != 0) {
new_time /= static_cast<double>(iterations);
}
return new_time;
}
double BenchmarkReporter::Run::GetAdjustedCPUTime() const {
double new_time = cpu_accumulated_time * GetTimeUnitMultiplier(time_unit);
if (iterations != 0) {
new_time /= static_cast<double>(iterations);
}
return new_time;
}
} // end namespace benchmark

View File

@@ -0,0 +1,232 @@
// Copyright 2016 Ismael Jimenez Martinez. All rights reserved.
// Copyright 2017 Roman Lebedev. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "statistics.h"
#include <algorithm>
#include <cmath>
#include <numeric>
#include <string>
#include <vector>
#include "benchmark/benchmark.h"
#include "check.h"
namespace benchmark {
const auto StatisticsSum = [](const std::vector<double>& v) {
return std::accumulate(v.begin(), v.end(), 0.0);
};
double StatisticsMean(const std::vector<double>& v) {
if (v.empty()) {
return 0.0;
}
return StatisticsSum(v) * (1.0 / static_cast<double>(v.size()));
}
double StatisticsMedian(const std::vector<double>& v) {
if (v.size() < 3) {
return StatisticsMean(v);
}
std::vector<double> copy(v);
auto center = copy.begin() + v.size() / 2;
std::nth_element(copy.begin(), center, copy.end());
// Did we have an odd number of samples? If yes, then center is the median.
// If not, then we are looking for the average between center and the value
// before. Instead of resorting, we just look for the max value before it,
// which is not necessarily the element immediately preceding `center` Since
// `copy` is only partially sorted by `nth_element`.
if (v.size() % 2 == 1) {
return *center;
}
auto center2 = std::max_element(copy.begin(), center);
return (*center + *center2) / 2.0;
}
// Return the sum of the squares of this sample set
const auto SumSquares = [](const std::vector<double>& v) {
return std::inner_product(v.begin(), v.end(), v.begin(), 0.0);
};
const auto Sqr = [](const double dat) { return dat * dat; };
const auto Sqrt = [](const double dat) {
// Avoid NaN due to imprecision in the calculations
if (dat < 0.0) {
return 0.0;
}
return std::sqrt(dat);
};
double StatisticsStdDev(const std::vector<double>& v) {
const auto mean = StatisticsMean(v);
if (v.empty()) {
return mean;
}
// Sample standard deviation is undefined for n = 1
if (v.size() == 1) {
return 0.0;
}
const double avg_squares =
SumSquares(v) * (1.0 / static_cast<double>(v.size()));
return Sqrt(static_cast<double>(v.size()) /
(static_cast<double>(v.size()) - 1.0) *
(avg_squares - Sqr(mean)));
}
double StatisticsCV(const std::vector<double>& v) {
if (v.size() < 2) {
return 0.0;
}
const auto stddev = StatisticsStdDev(v);
const auto mean = StatisticsMean(v);
if (std::fpclassify(mean) == FP_ZERO) {
return 0.0;
}
return stddev / mean;
}
std::vector<BenchmarkReporter::Run> ComputeStats(
const std::vector<BenchmarkReporter::Run>& reports) {
typedef BenchmarkReporter::Run Run;
std::vector<Run> results;
auto error_count = std::count_if(reports.begin(), reports.end(),
[](Run const& run) { return run.skipped; });
if (reports.size() - static_cast<size_t>(error_count) < 2) {
// We don't report aggregated data if there was a single run.
return results;
}
// Accumulators.
std::vector<double> real_accumulated_time_stat;
std::vector<double> cpu_accumulated_time_stat;
real_accumulated_time_stat.reserve(reports.size());
cpu_accumulated_time_stat.reserve(reports.size());
// All repetitions should be run with the same number of iterations so we
// can take this information from the first benchmark.
const IterationCount run_iterations = reports.front().iterations;
// create stats for user counters
struct CounterStat {
Counter c;
std::vector<double> s;
};
std::map<std::string, CounterStat> counter_stats;
for (Run const& r : reports) {
for (auto const& cnt : r.counters) {
auto it = counter_stats.find(cnt.first);
if (it == counter_stats.end()) {
it = counter_stats
.emplace(cnt.first,
CounterStat{cnt.second, std::vector<double>{}})
.first;
it->second.s.reserve(reports.size());
} else {
BM_CHECK_EQ(it->second.c.flags, cnt.second.flags);
}
}
}
// Populate the accumulators.
for (Run const& run : reports) {
BM_CHECK_EQ(reports[0].benchmark_name(), run.benchmark_name());
BM_CHECK_EQ(run_iterations, run.iterations);
if (run.skipped != 0u) {
continue;
}
real_accumulated_time_stat.emplace_back(run.real_accumulated_time);
cpu_accumulated_time_stat.emplace_back(run.cpu_accumulated_time);
// user counters
for (auto const& cnt : run.counters) {
auto it = counter_stats.find(cnt.first);
BM_CHECK_NE(it, counter_stats.end());
it->second.s.emplace_back(cnt.second);
}
}
// Only add label if it is same for all runs
std::string report_label = reports[0].report_label;
for (std::size_t i = 1; i < reports.size(); i++) {
if (reports[i].report_label != report_label) {
report_label = "";
break;
}
}
const double iteration_rescale_factor =
static_cast<double>(reports.size()) / static_cast<double>(run_iterations);
for (const auto& Stat : *reports[0].statistics) {
// Get the data from the accumulator to BenchmarkReporter::Run's.
Run data;
data.run_name = reports[0].run_name;
data.family_index = reports[0].family_index;
data.per_family_instance_index = reports[0].per_family_instance_index;
data.run_type = BenchmarkReporter::Run::RT_Aggregate;
data.threads = reports[0].threads;
data.repetitions = reports[0].repetitions;
data.repetition_index = Run::no_repetition_index;
data.aggregate_name = Stat.name_;
data.aggregate_unit = Stat.unit_;
data.report_label = report_label;
// It is incorrect to say that an aggregate is computed over
// run's iterations, because those iterations already got averaged.
// Similarly, if there are N repetitions with 1 iterations each,
// an aggregate will be computed over N measurements, not 1.
// Thus it is best to simply use the count of separate reports.
data.iterations = static_cast<IterationCount>(reports.size());
data.real_accumulated_time = Stat.compute_(real_accumulated_time_stat);
data.cpu_accumulated_time = Stat.compute_(cpu_accumulated_time_stat);
if (data.aggregate_unit == StatisticUnit::kTime) {
// We will divide these times by data.iterations when reporting, but the
// data.iterations is not necessarily the scale of these measurements,
// because in each repetition, these timers are sum over all the iters.
// And if we want to say that the stats are over N repetitions and not
// M iterations, we need to multiply these by (N/M).
data.real_accumulated_time *= iteration_rescale_factor;
data.cpu_accumulated_time *= iteration_rescale_factor;
}
data.time_unit = reports[0].time_unit;
// user counters
for (auto const& kv : counter_stats) {
// Do *NOT* rescale the custom counters. They are already properly scaled.
const auto uc_stat = Stat.compute_(kv.second.s);
auto c = Counter(uc_stat, counter_stats[kv.first].c.flags,
counter_stats[kv.first].c.oneK);
data.counters[kv.first] = c;
}
results.push_back(data);
}
return results;
}
} // end namespace benchmark

View File

@@ -0,0 +1,44 @@
// Copyright 2016 Ismael Jimenez Martinez. All rights reserved.
// Copyright 2017 Roman Lebedev. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef STATISTICS_H_
#define STATISTICS_H_
#include <vector>
#include "benchmark/benchmark.h"
namespace benchmark {
// Return a vector containing the mean, median and standard deviation
// information (and any user-specified info) for the specified list of reports.
// If 'reports' contains less than two non-errored runs an empty vector is
// returned
BENCHMARK_EXPORT
std::vector<BenchmarkReporter::Run> ComputeStats(
const std::vector<BenchmarkReporter::Run>& reports);
BENCHMARK_EXPORT
double StatisticsMean(const std::vector<double>& v);
BENCHMARK_EXPORT
double StatisticsMedian(const std::vector<double>& v);
BENCHMARK_EXPORT
double StatisticsStdDev(const std::vector<double>& v);
BENCHMARK_EXPORT
double StatisticsCV(const std::vector<double>& v);
} // end namespace benchmark
#endif // STATISTICS_H_

View File

@@ -0,0 +1,277 @@
#include "string_util.h"
#include <array>
#ifdef BENCHMARK_STL_ANDROID_GNUSTL
#include <cerrno>
#endif
#include <cmath>
#include <cstdarg>
#include <cstdio>
#include <memory>
#include <sstream>
#include "arraysize.h"
#include "benchmark/benchmark.h"
namespace benchmark {
namespace {
// kilo, Mega, Giga, Tera, Peta, Exa, Zetta, Yotta.
const char* const kBigSIUnits[] = {"k", "M", "G", "T", "P", "E", "Z", "Y"};
// Kibi, Mebi, Gibi, Tebi, Pebi, Exbi, Zebi, Yobi.
const char* const kBigIECUnits[] = {"Ki", "Mi", "Gi", "Ti",
"Pi", "Ei", "Zi", "Yi"};
// milli, micro, nano, pico, femto, atto, zepto, yocto.
const char* const kSmallSIUnits[] = {"m", "u", "n", "p", "f", "a", "z", "y"};
// We require that all three arrays have the same size.
static_assert(arraysize(kBigSIUnits) == arraysize(kBigIECUnits),
"SI and IEC unit arrays must be the same size");
static_assert(arraysize(kSmallSIUnits) == arraysize(kBigSIUnits),
"Small SI and Big SI unit arrays must be the same size");
const int64_t kUnitsSize = arraysize(kBigSIUnits);
void ToExponentAndMantissa(double val, int precision, double one_k,
std::string* mantissa, int64_t* exponent) {
std::stringstream mantissa_stream;
if (val < 0) {
mantissa_stream << "-";
val = -val;
}
// Adjust threshold so that it never excludes things which can't be rendered
// in 'precision' digits.
const double adjusted_threshold =
std::max(1.0, 1.0 / std::pow(10.0, precision));
const double big_threshold = (adjusted_threshold * one_k) - 1;
const double small_threshold = adjusted_threshold;
// Values in ]simple_threshold,small_threshold[ will be printed as-is
const double simple_threshold = 0.01;
if (val > big_threshold) {
// Positive powers
double scaled = val;
for (size_t i = 0; i < arraysize(kBigSIUnits); ++i) {
scaled /= one_k;
if (scaled <= big_threshold) {
mantissa_stream << scaled;
*exponent = static_cast<int64_t>(i + 1);
*mantissa = mantissa_stream.str();
return;
}
}
mantissa_stream << val;
*exponent = 0;
} else if (val < small_threshold) {
// Negative powers
if (val < simple_threshold) {
double scaled = val;
for (size_t i = 0; i < arraysize(kSmallSIUnits); ++i) {
scaled *= one_k;
if (scaled >= small_threshold) {
mantissa_stream << scaled;
*exponent = -static_cast<int64_t>(i + 1);
*mantissa = mantissa_stream.str();
return;
}
}
}
mantissa_stream << val;
*exponent = 0;
} else {
mantissa_stream << val;
*exponent = 0;
}
*mantissa = mantissa_stream.str();
}
std::string ExponentToPrefix(int64_t exponent, bool iec) {
if (exponent == 0) {
return {};
}
const int64_t index = (exponent > 0 ? exponent - 1 : -exponent - 1);
if (index >= kUnitsSize) {
return {};
}
const char* const* array =
(exponent > 0 ? (iec ? kBigIECUnits : kBigSIUnits) : kSmallSIUnits);
return std::string(array[index]);
}
std::string ToBinaryStringFullySpecified(double value, int precision,
Counter::OneK one_k) {
std::string mantissa;
int64_t exponent = 0;
ToExponentAndMantissa(value, precision,
one_k == Counter::kIs1024 ? 1024.0 : 1000.0, &mantissa,
&exponent);
return mantissa + ExponentToPrefix(exponent, one_k == Counter::kIs1024);
}
std::string StrFormatImp(const char* msg, va_list args) {
// we might need a second shot at this, so pre-emptivly make a copy
va_list args_cp;
va_copy(args_cp, args);
// TODO(ericwf): use std::array for first attempt to avoid one memory
// allocation guess what the size might be
std::array<char, 256> local_buff = {};
// 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation
// in the android-ndk
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wformat-nonliteral"
#endif // __GNUC__
auto ret = vsnprintf(local_buff.data(), local_buff.size(), msg, args_cp);
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif // __GNUC__
va_end(args_cp);
// handle empty expansion
if (ret == 0) {
return {};
}
if (static_cast<std::size_t>(ret) < local_buff.size()) {
return std::string(local_buff.data());
}
// we did not provide a long enough buffer on our first attempt.
// add 1 to size to account for null-byte in size cast to prevent overflow
std::size_t size = static_cast<std::size_t>(ret) + 1;
auto buff_ptr = std::unique_ptr<char[]>(new char[size]);
// 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation
// in the android-ndk
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wformat-nonliteral"
#endif // __GNUC__
vsnprintf(buff_ptr.get(), size, msg, args);
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif // __GNUC__
return std::string(buff_ptr.get());
}
} // end namespace
std::string HumanReadableNumber(double n, Counter::OneK one_k) {
return ToBinaryStringFullySpecified(n, 1, one_k);
}
std::string StrFormat(const char* format, ...) {
va_list args;
va_start(args, format);
std::string tmp = StrFormatImp(format, args);
va_end(args);
return tmp;
}
std::vector<std::string> StrSplit(const std::string& str, char delim) {
if (str.empty()) {
return {};
}
std::vector<std::string> ret;
size_t first = 0;
size_t next = str.find(delim);
for (; next != std::string::npos;
first = next + 1, next = str.find(delim, first)) {
ret.push_back(str.substr(first, next - first));
}
ret.push_back(str.substr(first));
return ret;
}
#ifdef BENCHMARK_STL_ANDROID_GNUSTL
/*
* GNU STL in Android NDK lacks support for some C++11 functions, including
* stoul, stoi, stod. We reimplement them here using C functions strtoul,
* strtol, strtod. Note that reimplemented functions are in benchmark::
* namespace, not std:: namespace.
*/
unsigned long stoul(const std::string& str, size_t* pos, int base) {
/* Record previous errno */
const int oldErrno = errno;
errno = 0;
const char* strStart = str.c_str();
char* strEnd = const_cast<char*>(strStart);
const unsigned long result = strtoul(strStart, &strEnd, base);
const int strtoulErrno = errno;
/* Restore previous errno */
errno = oldErrno;
/* Check for errors and return */
if (strtoulErrno == ERANGE) {
throw std::out_of_range("stoul failed: " + str +
" is outside of range of unsigned long");
} else if (strEnd == strStart || strtoulErrno != 0) {
throw std::invalid_argument("stoul failed: " + str + " is not an integer");
}
if (pos != nullptr) {
*pos = static_cast<size_t>(strEnd - strStart);
}
return result;
}
int stoi(const std::string& str, size_t* pos, int base) {
/* Record previous errno */
const int oldErrno = errno;
errno = 0;
const char* strStart = str.c_str();
char* strEnd = const_cast<char*>(strStart);
const long result = strtol(strStart, &strEnd, base);
const int strtolErrno = errno;
/* Restore previous errno */
errno = oldErrno;
/* Check for errors and return */
if (strtolErrno == ERANGE || long(int(result)) != result) {
throw std::out_of_range("stoul failed: " + str +
" is outside of range of int");
} else if (strEnd == strStart || strtolErrno != 0) {
throw std::invalid_argument("stoul failed: " + str + " is not an integer");
}
if (pos != nullptr) {
*pos = static_cast<size_t>(strEnd - strStart);
}
return int(result);
}
double stod(const std::string& str, size_t* pos) {
/* Record previous errno */
const int oldErrno = errno;
errno = 0;
const char* strStart = str.c_str();
char* strEnd = const_cast<char*>(strStart);
const double result = strtod(strStart, &strEnd);
/* Restore previous errno */
const int strtodErrno = errno;
errno = oldErrno;
/* Check for errors and return */
if (strtodErrno == ERANGE) {
throw std::out_of_range("stoul failed: " + str +
" is outside of range of int");
} else if (strEnd == strStart || strtodErrno != 0) {
throw std::invalid_argument("stoul failed: " + str + " is not an integer");
}
if (pos != nullptr) {
*pos = static_cast<size_t>(strEnd - strStart);
}
return result;
}
#endif
} // end namespace benchmark

View File

@@ -0,0 +1,69 @@
#ifndef BENCHMARK_STRING_UTIL_H_
#define BENCHMARK_STRING_UTIL_H_
#include <sstream>
#include <string>
#include <utility>
#include <vector>
#include "benchmark/benchmark.h"
#include "benchmark/export.h"
#include "check.h"
namespace benchmark {
BENCHMARK_EXPORT
std::string HumanReadableNumber(double n, Counter::OneK one_k);
BENCHMARK_EXPORT
#if defined(__MINGW32__)
__attribute__((format(__MINGW_PRINTF_FORMAT, 1, 2)))
#elif defined(__GNUC__)
__attribute__((format(printf, 1, 2)))
#endif
std::string
StrFormat(const char* format, ...);
inline std::ostream& StrCatImp(std::ostream& out) BENCHMARK_NOEXCEPT {
return out;
}
template <class First, class... Rest>
inline std::ostream& StrCatImp(std::ostream& out, First&& f, Rest&&... rest) {
out << std::forward<First>(f);
return StrCatImp(out, std::forward<Rest>(rest)...);
}
template <class... Args>
inline std::string StrCat(Args&&... args) {
std::ostringstream ss;
StrCatImp(ss, std::forward<Args>(args)...);
return ss.str();
}
BENCHMARK_EXPORT
std::vector<std::string> StrSplit(const std::string& str, char delim);
// Disable lint checking for this block since it re-implements C functions.
// NOLINTBEGIN
#ifdef BENCHMARK_STL_ANDROID_GNUSTL
/*
* GNU STL in Android NDK lacks support for some C++11 functions, including
* stoul, stoi, stod. We reimplement them here using C functions strtoul,
* strtol, strtod. Note that reimplemented functions are in benchmark::
* namespace, not std:: namespace.
*/
unsigned long stoul(const std::string& str, size_t* pos = nullptr,
int base = 10);
int stoi(const std::string& str, size_t* pos = nullptr, int base = 10);
double stod(const std::string& str, size_t* pos = nullptr);
#else
using std::stod; // NOLINT(misc-unused-using-decls)
using std::stoi; // NOLINT(misc-unused-using-decls)
using std::stoul; // NOLINT(misc-unused-using-decls)
#endif
// NOLINTEND
} // end namespace benchmark
#endif // BENCHMARK_STRING_UTIL_H_

View File

@@ -0,0 +1,902 @@
// Copyright 2015 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "internal_macros.h"
#ifdef BENCHMARK_OS_WINDOWS
#if !defined(WINVER) || WINVER < 0x0600
#undef WINVER
#define WINVER 0x0600
#endif // WINVER handling
#include <shlwapi.h>
#undef StrCat // Don't let StrCat in string_util.h be renamed to lstrcatA
#include <versionhelpers.h>
#include <windows.h>
#include <codecvt>
#else
#include <fcntl.h>
#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT)
#include <sys/resource.h>
#endif
#include <sys/time.h>
#include <sys/types.h> // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD
#include <unistd.h>
#if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX || \
defined BENCHMARK_OS_NETBSD || defined BENCHMARK_OS_OPENBSD || \
defined BENCHMARK_OS_DRAGONFLY
#define BENCHMARK_HAS_SYSCTL
#include <sys/sysctl.h>
#endif
#endif
#if defined(BENCHMARK_OS_SOLARIS)
#include <kstat.h>
#include <netdb.h>
#endif
#if defined(BENCHMARK_OS_QNX)
#include <sys/syspage.h>
#endif
#if defined(BENCHMARK_OS_QURT)
#include <qurt.h>
#endif
#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY)
#include <pthread.h>
#endif
#if defined(BENCHMARK_OS_LINUX)
#include <sys/personality.h>
#endif
#include <algorithm>
#include <array>
#include <bitset>
#include <cerrno>
#include <climits>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <iostream>
#include <iterator>
#include <limits>
#include <locale>
#include <memory>
#include <random>
#include <sstream>
#include <utility>
#include "benchmark/benchmark.h"
#include "check.h"
#include "cycleclock.h"
#include "log.h"
#include "string_util.h"
#include "timers.h"
namespace benchmark {
namespace {
void PrintImp(std::ostream& out) { out << '\n'; }
template <class First, class... Rest>
void PrintImp(std::ostream& out, First&& f, Rest&&... rest) {
out << std::forward<First>(f);
PrintImp(out, std::forward<Rest>(rest)...);
}
template <class... Args>
BENCHMARK_NORETURN void PrintErrorAndDie(Args&&... args) {
PrintImp(std::cerr, std::forward<Args>(args)...);
std::cerr << std::flush;
std::exit(EXIT_FAILURE);
}
#ifdef BENCHMARK_HAS_SYSCTL
/// ValueUnion - A type used to correctly alias the byte-for-byte output of
/// `sysctl` with the result type it's to be interpreted as.
struct ValueUnion {
union DataT {
int32_t int32_value;
int64_t int64_value;
// For correct aliasing of union members from bytes.
char bytes[8];
};
using DataPtr = std::unique_ptr<DataT, decltype(&std::free)>;
// The size of the data union member + its trailing array size.
std::size_t size;
DataPtr buff;
public:
ValueUnion() : size(0), buff(nullptr, &std::free) {}
explicit ValueUnion(std::size_t buff_size)
: size(sizeof(DataT) + buff_size),
buff(::new(std::malloc(size)) DataT(), &std::free) {}
ValueUnion(ValueUnion&& other) = default;
explicit operator bool() const { return bool(buff); }
char* data() const { return buff->bytes; }
std::string GetAsString() const { return std::string(data()); }
int64_t GetAsInteger() const {
if (size == sizeof(buff->int32_value))
return buff->int32_value;
else if (size == sizeof(buff->int64_value))
return buff->int64_value;
BENCHMARK_UNREACHABLE();
}
template <class T, int N>
std::array<T, N> GetAsArray() {
const int arr_size = sizeof(T) * N;
BM_CHECK_LE(arr_size, size);
std::array<T, N> arr;
std::memcpy(arr.data(), data(), arr_size);
return arr;
}
};
ValueUnion GetSysctlImp(std::string const& name) {
#if defined BENCHMARK_OS_OPENBSD
int mib[2];
mib[0] = CTL_HW;
if ((name == "hw.ncpuonline") || (name == "hw.cpuspeed")) {
ValueUnion buff(sizeof(int));
if (name == "hw.ncpuonline") {
mib[1] = HW_NCPUONLINE;
} else {
mib[1] = HW_CPUSPEED;
}
if (sysctl(mib, 2, buff.data(), &buff.size, nullptr, 0) == -1) {
return ValueUnion();
}
return buff;
}
return ValueUnion();
#else
std::size_t cur_buff_size = 0;
if (sysctlbyname(name.c_str(), nullptr, &cur_buff_size, nullptr, 0) == -1)
return ValueUnion();
ValueUnion buff(cur_buff_size);
if (sysctlbyname(name.c_str(), buff.data(), &buff.size, nullptr, 0) == 0)
return buff;
return ValueUnion();
#endif
}
BENCHMARK_MAYBE_UNUSED
bool GetSysctl(std::string const& name, std::string* out) {
out->clear();
auto buff = GetSysctlImp(name);
if (!buff) return false;
out->assign(buff.data());
return true;
}
template <class Tp,
class = typename std::enable_if<std::is_integral<Tp>::value>::type>
bool GetSysctl(std::string const& name, Tp* out) {
*out = 0;
auto buff = GetSysctlImp(name);
if (!buff) return false;
*out = static_cast<Tp>(buff.GetAsInteger());
return true;
}
template <class Tp, size_t N>
bool GetSysctl(std::string const& name, std::array<Tp, N>* out) {
auto buff = GetSysctlImp(name);
if (!buff) return false;
*out = buff.GetAsArray<Tp, N>();
return true;
}
#endif
template <class ArgT>
bool ReadFromFile(std::string const& fname, ArgT* arg) {
*arg = ArgT();
std::ifstream f(fname.c_str());
if (!f.is_open()) {
return false;
}
f >> *arg;
return f.good();
}
CPUInfo::Scaling CpuScaling(int num_cpus) {
// We don't have a valid CPU count, so don't even bother.
if (num_cpus <= 0) {
return CPUInfo::Scaling::UNKNOWN;
}
#if defined(BENCHMARK_OS_QNX)
return CPUInfo::Scaling::UNKNOWN;
#elif !defined(BENCHMARK_OS_WINDOWS)
// On Linux, the CPUfreq subsystem exposes CPU information as files on the
// local file system. If reading the exported files fails, then we may not be
// running on Linux, so we silently ignore all the read errors.
std::string res;
for (int cpu = 0; cpu < num_cpus; ++cpu) {
std::string governor_file =
StrCat("/sys/devices/system/cpu/cpu", cpu, "/cpufreq/scaling_governor");
if (ReadFromFile(governor_file, &res) && res != "performance") {
return CPUInfo::Scaling::ENABLED;
}
}
return CPUInfo::Scaling::DISABLED;
#else
return CPUInfo::Scaling::UNKNOWN;
#endif
}
int CountSetBitsInCPUMap(std::string val) {
auto CountBits = [](std::string part) {
using CPUMask = std::bitset<sizeof(std::uintptr_t) * CHAR_BIT>;
part = "0x" + part;
CPUMask mask(benchmark::stoul(part, nullptr, 16));
return static_cast<int>(mask.count());
};
std::size_t pos = 0;
int total = 0;
while ((pos = val.find(',')) != std::string::npos) {
total += CountBits(val.substr(0, pos));
val = val.substr(pos + 1);
}
if (!val.empty()) {
total += CountBits(val);
}
return total;
}
BENCHMARK_MAYBE_UNUSED
std::vector<CPUInfo::CacheInfo> GetCacheSizesFromKVFS() {
std::vector<CPUInfo::CacheInfo> res;
std::string dir = "/sys/devices/system/cpu/cpu0/cache/";
int idx = 0;
while (true) {
CPUInfo::CacheInfo info;
std::string fpath = StrCat(dir, "index", idx++, "/");
std::ifstream f(StrCat(fpath, "size").c_str());
if (!f.is_open()) {
break;
}
std::string suffix;
f >> info.size;
if (f.fail()) {
PrintErrorAndDie("Failed while reading file '", fpath, "size'");
}
if (f.good()) {
f >> suffix;
if (f.bad()) {
PrintErrorAndDie(
"Invalid cache size format: failed to read size suffix");
} else if (f && suffix != "K") {
PrintErrorAndDie("Invalid cache size format: Expected bytes ", suffix);
} else if (suffix == "K") {
info.size *= 1024;
}
}
if (!ReadFromFile(StrCat(fpath, "type"), &info.type)) {
PrintErrorAndDie("Failed to read from file ", fpath, "type");
}
if (!ReadFromFile(StrCat(fpath, "level"), &info.level)) {
PrintErrorAndDie("Failed to read from file ", fpath, "level");
}
std::string map_str;
if (!ReadFromFile(StrCat(fpath, "shared_cpu_map"), &map_str)) {
PrintErrorAndDie("Failed to read from file ", fpath, "shared_cpu_map");
}
info.num_sharing = CountSetBitsInCPUMap(map_str);
res.push_back(info);
}
return res;
}
#ifdef BENCHMARK_OS_MACOSX
std::vector<CPUInfo::CacheInfo> GetCacheSizesMacOSX() {
std::vector<CPUInfo::CacheInfo> res;
std::array<int, 4> cache_counts{{0, 0, 0, 0}};
GetSysctl("hw.cacheconfig", &cache_counts);
struct {
std::string name;
std::string type;
int level;
int num_sharing;
} cases[] = {{"hw.l1dcachesize", "Data", 1, cache_counts[1]},
{"hw.l1icachesize", "Instruction", 1, cache_counts[1]},
{"hw.l2cachesize", "Unified", 2, cache_counts[2]},
{"hw.l3cachesize", "Unified", 3, cache_counts[3]}};
for (auto& c : cases) {
int val;
if (!GetSysctl(c.name, &val)) continue;
CPUInfo::CacheInfo info;
info.type = c.type;
info.level = c.level;
info.size = val;
info.num_sharing = c.num_sharing;
res.push_back(std::move(info));
}
return res;
}
#elif defined(BENCHMARK_OS_WINDOWS)
std::vector<CPUInfo::CacheInfo> GetCacheSizesWindows() {
std::vector<CPUInfo::CacheInfo> res;
DWORD buffer_size = 0;
using PInfo = SYSTEM_LOGICAL_PROCESSOR_INFORMATION;
using CInfo = CACHE_DESCRIPTOR;
using UPtr = std::unique_ptr<PInfo, decltype(&std::free)>;
GetLogicalProcessorInformation(nullptr, &buffer_size);
UPtr buff(static_cast<PInfo*>(std::malloc(buffer_size)), &std::free);
if (!GetLogicalProcessorInformation(buff.get(), &buffer_size)) {
PrintErrorAndDie("Failed during call to GetLogicalProcessorInformation: ",
GetLastError());
}
PInfo* it = buff.get();
PInfo* end = buff.get() + (buffer_size / sizeof(PInfo));
for (; it != end; ++it) {
if (it->Relationship != RelationCache) {
continue;
}
using BitSet = std::bitset<sizeof(ULONG_PTR) * CHAR_BIT>;
BitSet b(it->ProcessorMask);
// To prevent duplicates, only consider caches where CPU 0 is specified
if (!b.test(0)) continue;
const CInfo& cache = it->Cache;
CPUInfo::CacheInfo C;
C.num_sharing = static_cast<int>(b.count());
C.level = cache.Level;
C.size = static_cast<int>(cache.Size);
C.type = "Unknown";
switch (cache.Type) {
// Windows SDK version >= 10.0.26100.0
#ifdef NTDDI_WIN11_GE
case CacheUnknown:
break;
#endif
case CacheUnified:
C.type = "Unified";
break;
case CacheInstruction:
C.type = "Instruction";
break;
case CacheData:
C.type = "Data";
break;
case CacheTrace:
C.type = "Trace";
break;
}
res.push_back(C);
}
return res;
}
#elif BENCHMARK_OS_QNX
std::vector<CPUInfo::CacheInfo> GetCacheSizesQNX() {
std::vector<CPUInfo::CacheInfo> res;
struct cacheattr_entry* cache = SYSPAGE_ENTRY(cacheattr);
uint32_t const elsize = SYSPAGE_ELEMENT_SIZE(cacheattr);
int num = SYSPAGE_ENTRY_SIZE(cacheattr) / elsize;
for (int i = 0; i < num; ++i) {
CPUInfo::CacheInfo info;
switch (cache->flags) {
case CACHE_FLAG_INSTR:
info.type = "Instruction";
info.level = 1;
break;
case CACHE_FLAG_DATA:
info.type = "Data";
info.level = 1;
break;
case CACHE_FLAG_UNIFIED:
info.type = "Unified";
info.level = 2;
break;
case CACHE_FLAG_SHARED:
info.type = "Shared";
info.level = 3;
break;
default:
continue;
break;
}
info.size = cache->line_size * cache->num_lines;
info.num_sharing = 0;
res.push_back(std::move(info));
cache = SYSPAGE_ARRAY_ADJ_OFFSET(cacheattr, cache, elsize);
}
return res;
}
#endif
std::vector<CPUInfo::CacheInfo> GetCacheSizes() {
#ifdef BENCHMARK_OS_MACOSX
return GetCacheSizesMacOSX();
#elif defined(BENCHMARK_OS_WINDOWS)
return GetCacheSizesWindows();
#elif defined(BENCHMARK_OS_QNX)
return GetCacheSizesQNX();
#elif defined(BENCHMARK_OS_QURT) || defined(__EMSCRIPTEN__)
return std::vector<CPUInfo::CacheInfo>();
#elif defined(__FRC_ROBORIO__)
return std::vector<CPUInfo::CacheInfo>();
#else
return GetCacheSizesFromKVFS();
#endif
}
std::string GetSystemName() {
#if defined(BENCHMARK_OS_WINDOWS)
std::string str;
static constexpr int COUNT = MAX_COMPUTERNAME_LENGTH + 1;
TCHAR hostname[COUNT] = {'\0'};
DWORD DWCOUNT = COUNT;
if (!GetComputerName(hostname, &DWCOUNT)) return std::string("");
#ifndef UNICODE
str = std::string(hostname, DWCOUNT);
#else
// `WideCharToMultiByte` returns `0` when conversion fails.
int len = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, hostname,
DWCOUNT, NULL, 0, NULL, NULL);
str.resize(len);
WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, hostname, DWCOUNT, &str[0],
str.size(), NULL, NULL);
#endif
return str;
#elif defined(BENCHMARK_OS_QURT)
std::string str = "Hexagon DSP";
qurt_arch_version_t arch_version_struct;
if (qurt_sysenv_get_arch_version(&arch_version_struct) == QURT_EOK) {
str += " v";
str += std::to_string(arch_version_struct.arch_version);
}
return str;
#else
#ifndef HOST_NAME_MAX
#ifdef BENCHMARK_HAS_SYSCTL // BSD/Mac doesn't have HOST_NAME_MAX defined
#define HOST_NAME_MAX 64
#elif defined(BENCHMARK_OS_NACL)
#define HOST_NAME_MAX 64
#elif defined(BENCHMARK_OS_QNX)
#define HOST_NAME_MAX 154
#elif defined(BENCHMARK_OS_RTEMS)
#define HOST_NAME_MAX 256
#elif defined(BENCHMARK_OS_SOLARIS)
#define HOST_NAME_MAX MAXHOSTNAMELEN
#elif defined(BENCHMARK_OS_ZOS)
#define HOST_NAME_MAX _POSIX_HOST_NAME_MAX
#else
#pragma message("HOST_NAME_MAX not defined. using 64")
#define HOST_NAME_MAX 64
#endif
#endif // def HOST_NAME_MAX
char hostname[HOST_NAME_MAX];
int retVal = gethostname(hostname, HOST_NAME_MAX);
return retVal != 0 ? std::string() : std::string(hostname);
#endif // Catch-all POSIX block.
}
SystemInfo::ASLR GetASLR() {
#ifdef BENCHMARK_OS_LINUX
const auto curr_personality = personality(0xffffffff);
return (curr_personality & ADDR_NO_RANDOMIZE) ? SystemInfo::ASLR::DISABLED
: SystemInfo::ASLR::ENABLED;
#else
// FIXME: support detecting ASLR on other OS.
return SystemInfo::ASLR::UNKNOWN;
#endif
}
int GetNumCPUsImpl() {
#ifdef BENCHMARK_OS_WINDOWS
SYSTEM_INFO sysinfo;
// Use memset as opposed to = {} to avoid GCC missing initializer false
// positives.
std::memset(&sysinfo, 0, sizeof(SYSTEM_INFO));
GetSystemInfo(&sysinfo);
// number of logical processors in the current group
return static_cast<int>(sysinfo.dwNumberOfProcessors);
#elif defined(BENCHMARK_OS_QNX)
return static_cast<int>(_syspage_ptr->num_cpu);
#elif defined(BENCHMARK_OS_QURT)
qurt_sysenv_max_hthreads_t hardware_threads;
if (qurt_sysenv_get_max_hw_threads(&hardware_threads) != QURT_EOK) {
hardware_threads.max_hthreads = 1;
}
return static_cast<int>(hardware_threads.max_hthreads);
#elif defined(BENCHMARK_HAS_SYSCTL)
// *BSD, macOS
int num_cpu = -1;
constexpr auto* hwncpu =
#if defined BENCHMARK_OS_MACOSX
"hw.logicalcpu";
#elif defined(HW_NCPUONLINE)
"hw.ncpuonline";
#else
"hw.ncpu";
#endif
if (GetSysctl(hwncpu, &num_cpu)) return num_cpu;
PrintErrorAndDie("Err: ", strerror(errno));
#elif defined(_SC_NPROCESSORS_ONLN)
// Linux, Solaris, AIX, Haiku, WASM, etc.
// Returns -1 in case of a failure.
int num_cpu = static_cast<int>(sysconf(_SC_NPROCESSORS_ONLN));
if (num_cpu < 0) {
PrintErrorAndDie("sysconf(_SC_NPROCESSORS_ONLN) failed with error: ",
strerror(errno));
}
return num_cpu;
#else
// Fallback, no other API exists.
return -1;
#endif
BENCHMARK_UNREACHABLE();
}
int GetNumCPUs() {
int num_cpus = GetNumCPUsImpl();
if (num_cpus < 1) {
std::cerr << "Unable to extract number of CPUs.\n";
// There must be at least one CPU on which we're running.
num_cpus = 1;
}
return num_cpus;
}
class ThreadAffinityGuard final {
public:
ThreadAffinityGuard() : reset_affinity(SetAffinity()) {
if (!reset_affinity) {
std::cerr << "***WARNING*** Failed to set thread affinity. Estimated CPU "
"frequency may be incorrect.\n";
}
}
~ThreadAffinityGuard() {
if (!reset_affinity) {
return;
}
#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY)
int ret = pthread_setaffinity_np(self, sizeof(previous_affinity),
&previous_affinity);
if (ret == 0) {
return;
}
#elif defined(BENCHMARK_OS_WINDOWS_WIN32)
DWORD_PTR ret = SetThreadAffinityMask(self, previous_affinity);
if (ret != 0) {
return;
}
#endif // def BENCHMARK_HAS_PTHREAD_AFFINITY
PrintErrorAndDie("Failed to reset thread affinity");
}
ThreadAffinityGuard(ThreadAffinityGuard&&) = delete;
ThreadAffinityGuard(const ThreadAffinityGuard&) = delete;
ThreadAffinityGuard& operator=(ThreadAffinityGuard&&) = delete;
ThreadAffinityGuard& operator=(const ThreadAffinityGuard&) = delete;
private:
bool SetAffinity() {
#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY)
int ret = 0;
self = pthread_self();
ret = pthread_getaffinity_np(self, sizeof(previous_affinity),
&previous_affinity);
if (ret != 0) {
return false;
}
cpu_set_t affinity;
memcpy(&affinity, &previous_affinity, sizeof(affinity));
bool is_first_cpu = true;
for (int i = 0; i < CPU_SETSIZE; ++i) {
if (CPU_ISSET(i, &affinity)) {
if (is_first_cpu) {
is_first_cpu = false;
} else {
CPU_CLR(i, &affinity);
}
}
}
if (is_first_cpu) {
return false;
}
ret = pthread_setaffinity_np(self, sizeof(affinity), &affinity);
return ret == 0;
#elif defined(BENCHMARK_OS_WINDOWS_WIN32)
self = GetCurrentThread();
DWORD_PTR mask = static_cast<DWORD_PTR>(1) << GetCurrentProcessorNumber();
previous_affinity = SetThreadAffinityMask(self, mask);
return previous_affinity != 0;
#else
return false;
#endif // def BENCHMARK_HAS_PTHREAD_AFFINITY
}
#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY)
pthread_t self{};
cpu_set_t previous_affinity{};
#elif defined(BENCHMARK_OS_WINDOWS_WIN32)
HANDLE self;
DWORD_PTR previous_affinity;
#endif // def BENCHMARK_HAS_PTHREAD_AFFINITY
bool reset_affinity;
};
double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) {
// Currently, scaling is only used on linux path here,
// suppress diagnostics about it being unused on other paths.
(void)scaling;
#if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN
long freq = 0;
// If the kernel is exporting the tsc frequency use that. There are issues
// where cpuinfo_max_freq cannot be relied on because the BIOS may be
// exporintg an invalid p-state (on x86) or p-states may be used to put the
// processor in a new mode (turbo mode). Essentially, those frequencies
// cannot always be relied upon. The same reasons apply to /proc/cpuinfo as
// well.
if (ReadFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)
// If CPU scaling is disabled, use the *current* frequency.
// Note that we specifically don't want to read cpuinfo_cur_freq,
// because it is only readable by root.
|| (scaling == CPUInfo::Scaling::DISABLED &&
ReadFromFile("/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq",
&freq))
// Otherwise, if CPU scaling may be in effect, we want to use
// the *maximum* frequency, not whatever CPU speed some random processor
// happens to be using now.
|| ReadFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
&freq)) {
// The value is in kHz (as the file name suggests). For example, on a
// 2GHz warpstation, the file contains the value "2000000".
return static_cast<double>(freq) * 1000.0;
}
const double error_value = -1;
double bogo_clock = error_value;
std::ifstream f("/proc/cpuinfo");
if (!f.is_open()) {
std::cerr << "failed to open /proc/cpuinfo\n";
return error_value;
}
auto StartsWithKey = [](std::string const& Value, std::string const& Key) {
if (Key.size() > Value.size()) {
return false;
}
auto Cmp = [&](char X, char Y) {
return std::tolower(X) == std::tolower(Y);
};
return std::equal(Key.begin(), Key.end(), Value.begin(), Cmp);
};
std::string ln;
while (std::getline(f, ln)) {
if (ln.empty()) {
continue;
}
std::size_t split_idx = ln.find(':');
std::string value;
if (split_idx != std::string::npos) {
value = ln.substr(split_idx + 1);
}
// When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only
// accept positive values. Some environments (virtual machines) report zero,
// which would cause infinite looping in WallTime_Init.
if (StartsWithKey(ln, "cpu MHz")) {
if (!value.empty()) {
double cycles_per_second = benchmark::stod(value) * 1000000.0;
if (cycles_per_second > 0) {
return cycles_per_second;
}
}
} else if (StartsWithKey(ln, "bogomips")) {
if (!value.empty()) {
bogo_clock = benchmark::stod(value) * 1000000.0;
if (bogo_clock < 0.0) {
bogo_clock = error_value;
}
}
}
}
if (f.bad()) {
std::cerr << "Failure reading /proc/cpuinfo\n";
return error_value;
}
if (!f.eof()) {
std::cerr << "Failed to read to end of /proc/cpuinfo\n";
return error_value;
}
f.close();
// If we found the bogomips clock, but nothing better, we'll use it (but
// we're not happy about it); otherwise, fallback to the rough estimation
// below.
if (bogo_clock >= 0.0) {
return bogo_clock;
}
#elif defined BENCHMARK_HAS_SYSCTL
constexpr auto* freqStr =
#if defined(BENCHMARK_OS_FREEBSD) || defined(BENCHMARK_OS_NETBSD)
"machdep.tsc_freq";
#elif defined BENCHMARK_OS_OPENBSD
"hw.cpuspeed";
#elif defined BENCHMARK_OS_DRAGONFLY
"hw.tsc_frequency";
#else
"hw.cpufrequency";
#endif
unsigned long long hz = 0;
#if defined BENCHMARK_OS_OPENBSD
if (GetSysctl(freqStr, &hz)) {
return static_cast<double>(hz * 1000000);
}
#else
if (GetSysctl(freqStr, &hz)) {
return static_cast<double>(hz);
}
#endif
fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n",
freqStr, strerror(errno));
fprintf(stderr,
"This does not affect benchmark measurements, only the "
"metadata output.\n");
#elif defined BENCHMARK_OS_WINDOWS_WIN32
// In NT, read MHz from the registry. If we fail to do so or we're in win9x
// then make a crude estimate.
DWORD data, data_size = sizeof(data);
if (IsWindowsXPOrGreater() &&
SUCCEEDED(
SHGetValueA(HKEY_LOCAL_MACHINE,
"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
"~MHz", nullptr, &data, &data_size))) {
return static_cast<double>(static_cast<int64_t>(data) *
static_cast<int64_t>(1000 * 1000)); // was mhz
}
#elif defined(BENCHMARK_OS_SOLARIS)
kstat_ctl_t* kc = kstat_open();
if (!kc) {
std::cerr << "failed to open /dev/kstat\n";
return -1;
}
kstat_t* ksp = kstat_lookup(kc, const_cast<char*>("cpu_info"), -1,
const_cast<char*>("cpu_info0"));
if (!ksp) {
std::cerr << "failed to lookup in /dev/kstat\n";
return -1;
}
if (kstat_read(kc, ksp, NULL) < 0) {
std::cerr << "failed to read from /dev/kstat\n";
return -1;
}
kstat_named_t* knp = (kstat_named_t*)kstat_data_lookup(
ksp, const_cast<char*>("current_clock_Hz"));
if (!knp) {
std::cerr << "failed to lookup data in /dev/kstat\n";
return -1;
}
if (knp->data_type != KSTAT_DATA_UINT64) {
std::cerr << "current_clock_Hz is of unexpected data type: "
<< knp->data_type << "\n";
return -1;
}
double clock_hz = knp->value.ui64;
kstat_close(kc);
return clock_hz;
#elif defined(BENCHMARK_OS_QNX)
return static_cast<double>(
static_cast<int64_t>(SYSPAGE_ENTRY(cpuinfo)->speed) *
static_cast<int64_t>(1000 * 1000));
#elif defined(BENCHMARK_OS_QURT)
// QuRT doesn't provide any API to query Hexagon frequency.
return 1000000000;
#endif
// If we've fallen through, attempt to roughly estimate the CPU clock rate.
// Make sure to use the same cycle counter when starting and stopping the
// cycle timer. We just pin the current thread to a cpu in the previous
// affinity set.
ThreadAffinityGuard affinity_guard;
static constexpr double estimate_time_s = 1.0;
const double start_time = ChronoClockNow();
const auto start_ticks = cycleclock::Now();
// Impose load instead of calling sleep() to make sure the cycle counter
// works.
using PRNG = std::minstd_rand;
using Result = PRNG::result_type;
PRNG rng(static_cast<Result>(start_ticks));
Result state = 0;
do {
static constexpr size_t batch_size = 10000;
rng.discard(batch_size);
state += rng();
} while (ChronoClockNow() - start_time < estimate_time_s);
DoNotOptimize(state);
const auto end_ticks = cycleclock::Now();
const double end_time = ChronoClockNow();
return static_cast<double>(end_ticks - start_ticks) / (end_time - start_time);
// Reset the affinity of current thread when the lifetime of affinity_guard
// ends.
}
std::vector<double> GetLoadAvg() {
#if (defined BENCHMARK_OS_FREEBSD || defined(BENCHMARK_OS_LINUX) || \
defined BENCHMARK_OS_MACOSX || defined BENCHMARK_OS_NETBSD || \
defined BENCHMARK_OS_OPENBSD || defined BENCHMARK_OS_DRAGONFLY) && \
!(defined(__ANDROID__) && __ANDROID_API__ < 29)
static constexpr int kMaxSamples = 3;
std::vector<double> res(kMaxSamples, 0.0);
const auto nelem = getloadavg(res.data(), kMaxSamples);
if (nelem < 1) {
res.clear();
} else {
res.resize(static_cast<size_t>(nelem));
}
return res;
#else
return {};
#endif
}
} // end namespace
const CPUInfo& CPUInfo::Get() {
static const CPUInfo* info = new CPUInfo();
return *info;
}
CPUInfo::CPUInfo()
: num_cpus(GetNumCPUs()),
scaling(CpuScaling(num_cpus)),
cycles_per_second(GetCPUCyclesPerSecond(scaling)),
caches(GetCacheSizes()),
load_avg(GetLoadAvg()) {}
const SystemInfo& SystemInfo::Get() {
static const SystemInfo* info = new SystemInfo();
return *info;
}
SystemInfo::SystemInfo() : name(GetSystemName()), ASLRStatus(GetASLR()) {}
} // end namespace benchmark

View File

@@ -0,0 +1,45 @@
#ifndef BENCHMARK_THREAD_MANAGER_H
#define BENCHMARK_THREAD_MANAGER_H
#include <atomic>
#include "benchmark/benchmark.h"
#include "mutex.h"
namespace benchmark {
namespace internal {
class ThreadManager {
public:
explicit ThreadManager(int num_threads) : start_stop_barrier_(num_threads) {}
Mutex& GetBenchmarkMutex() const RETURN_CAPABILITY(benchmark_mutex_) {
return benchmark_mutex_;
}
bool StartStopBarrier() { return start_stop_barrier_.wait(); }
void NotifyThreadComplete() { start_stop_barrier_.removeThread(); }
struct Result {
IterationCount iterations = 0;
double real_time_used = 0;
double cpu_time_used = 0;
double manual_time_used = 0;
int64_t complexity_n = 0;
std::string report_label_;
std::string skip_message_;
internal::Skipped skipped_ = internal::NotSkipped;
UserCounters counters;
};
GUARDED_BY(GetBenchmarkMutex()) Result results;
private:
mutable Mutex benchmark_mutex_;
Barrier start_stop_barrier_;
};
} // namespace internal
} // namespace benchmark
#endif // BENCHMARK_THREAD_MANAGER_H

View File

@@ -0,0 +1,86 @@
#ifndef BENCHMARK_THREAD_TIMER_H
#define BENCHMARK_THREAD_TIMER_H
#include "check.h"
#include "timers.h"
namespace benchmark {
namespace internal {
class ThreadTimer {
explicit ThreadTimer(bool measure_process_cpu_time_)
: measure_process_cpu_time(measure_process_cpu_time_) {}
public:
static ThreadTimer Create() {
return ThreadTimer(/*measure_process_cpu_time_=*/false);
}
static ThreadTimer CreateProcessCpuTime() {
return ThreadTimer(/*measure_process_cpu_time_=*/true);
}
// Called by each thread
void StartTimer() {
running_ = true;
start_real_time_ = ChronoClockNow();
start_cpu_time_ = ReadCpuTimerOfChoice();
}
// Called by each thread
void StopTimer() {
BM_CHECK(running_);
running_ = false;
real_time_used_ += ChronoClockNow() - start_real_time_;
// Floating point error can result in the subtraction producing a negative
// time. Guard against that.
cpu_time_used_ +=
std::max<double>(ReadCpuTimerOfChoice() - start_cpu_time_, 0);
}
// Called by each thread
void SetIterationTime(double seconds) { manual_time_used_ += seconds; }
bool running() const { return running_; }
// REQUIRES: timer is not running
double real_time_used() const {
BM_CHECK(!running_);
return real_time_used_;
}
// REQUIRES: timer is not running
double cpu_time_used() const {
BM_CHECK(!running_);
return cpu_time_used_;
}
// REQUIRES: timer is not running
double manual_time_used() const {
BM_CHECK(!running_);
return manual_time_used_;
}
private:
double ReadCpuTimerOfChoice() const {
if (measure_process_cpu_time) return ProcessCPUUsage();
return ThreadCPUUsage();
}
// should the thread, or the process, time be measured?
const bool measure_process_cpu_time;
bool running_ = false; // Is the timer running
double start_real_time_ = 0; // If running_
double start_cpu_time_ = 0; // If running_
// Accumulated time so far (does not contain current slice if running_)
double real_time_used_ = 0;
double cpu_time_used_ = 0;
// Manually set iteration time. User sets this with SetIterationTime(seconds).
double manual_time_used_ = 0;
};
} // namespace internal
} // namespace benchmark
#endif // BENCHMARK_THREAD_TIMER_H

View File

@@ -0,0 +1,288 @@
// Copyright 2015 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "timers.h"
#include "internal_macros.h"
#ifdef BENCHMARK_OS_WINDOWS
#include <shlwapi.h>
#undef StrCat // Don't let StrCat in string_util.h be renamed to lstrcatA
#include <versionhelpers.h>
#include <windows.h>
#else
#include <fcntl.h>
#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT)
#include <sys/resource.h>
#endif
#include <sys/time.h>
#include <sys/types.h> // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD
#include <unistd.h>
#if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_DRAGONFLY || \
defined BENCHMARK_OS_MACOSX
#include <sys/sysctl.h>
#endif
#if defined(BENCHMARK_OS_MACOSX)
#include <mach/mach_init.h>
#include <mach/mach_port.h>
#include <mach/thread_act.h>
#endif
#if defined(BENCHMARK_OS_QURT)
#include <qurt.h>
#endif
#endif
#ifdef BENCHMARK_OS_EMSCRIPTEN
#include <emscripten.h>
#endif
#include <cerrno>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <ctime>
#include <iostream>
#include <limits>
#include <mutex>
#include "check.h"
#include "log.h"
#include "string_util.h"
namespace benchmark {
// Suppress unused warnings on helper functions.
#if defined(__GNUC__)
#pragma GCC diagnostic ignored "-Wunused-function"
#endif
#if defined(__NVCOMPILER)
#pragma diag_suppress declared_but_not_referenced
#endif
namespace {
#if defined(BENCHMARK_OS_WINDOWS)
double MakeTime(FILETIME const& kernel_time, FILETIME const& user_time) {
ULARGE_INTEGER kernel;
ULARGE_INTEGER user;
kernel.HighPart = kernel_time.dwHighDateTime;
kernel.LowPart = kernel_time.dwLowDateTime;
user.HighPart = user_time.dwHighDateTime;
user.LowPart = user_time.dwLowDateTime;
return (static_cast<double>(kernel.QuadPart) +
static_cast<double>(user.QuadPart)) *
1e-7;
}
#elif !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT)
double MakeTime(struct rusage const& ru) {
return (static_cast<double>(ru.ru_utime.tv_sec) +
static_cast<double>(ru.ru_utime.tv_usec) * 1e-6 +
static_cast<double>(ru.ru_stime.tv_sec) +
static_cast<double>(ru.ru_stime.tv_usec) * 1e-6);
}
#endif
#if defined(BENCHMARK_OS_MACOSX)
double MakeTime(thread_basic_info_data_t const& info) {
return (static_cast<double>(info.user_time.seconds) +
static_cast<double>(info.user_time.microseconds) * 1e-6 +
static_cast<double>(info.system_time.seconds) +
static_cast<double>(info.system_time.microseconds) * 1e-6);
}
#endif
#if defined(CLOCK_PROCESS_CPUTIME_ID) || defined(CLOCK_THREAD_CPUTIME_ID)
double MakeTime(struct timespec const& ts) {
return static_cast<double>(ts.tv_sec) +
(static_cast<double>(ts.tv_nsec) * 1e-9);
}
#endif
BENCHMARK_NORETURN void DiagnoseAndExit(const char* msg) {
std::cerr << "ERROR: " << msg << '\n';
std::flush(std::cerr);
std::exit(EXIT_FAILURE);
}
} // end namespace
double ProcessCPUUsage() {
#if defined(BENCHMARK_OS_WINDOWS)
HANDLE proc = GetCurrentProcess();
FILETIME creation_time;
FILETIME exit_time;
FILETIME kernel_time;
FILETIME user_time;
if (GetProcessTimes(proc, &creation_time, &exit_time, &kernel_time,
&user_time))
return MakeTime(kernel_time, user_time);
DiagnoseAndExit("GetProccessTimes() failed");
#elif defined(BENCHMARK_OS_QURT)
// Note that qurt_timer_get_ticks() is no longer documented as of SDK 5.3.0,
// and doesn't appear to work on at least some devices (eg Samsung S22),
// so let's use the actually-documented and apparently-equivalent
// qurt_sysclock_get_hw_ticks() call instead.
return static_cast<double>(
qurt_timer_timetick_to_us(qurt_sysclock_get_hw_ticks())) *
1.0e-6;
#elif defined(BENCHMARK_OS_EMSCRIPTEN)
// clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) returns 0 on Emscripten.
// Use Emscripten-specific API. Reported CPU time would be exactly the
// same as total time, but this is ok because there aren't long-latency
// synchronous system calls in Emscripten.
return emscripten_get_now() * 1e-3;
#elif defined(CLOCK_PROCESS_CPUTIME_ID) && !defined(BENCHMARK_OS_MACOSX)
// FIXME We want to use clock_gettime, but its not available in MacOS 10.11.
// See https://github.com/google/benchmark/pull/292
struct timespec spec {};
if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &spec) == 0) {
return MakeTime(spec);
}
DiagnoseAndExit("clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) failed");
#else
struct rusage ru;
if (getrusage(RUSAGE_SELF, &ru) == 0) return MakeTime(ru);
DiagnoseAndExit("getrusage(RUSAGE_SELF, ...) failed");
#endif
}
double ThreadCPUUsage() {
#if defined(BENCHMARK_OS_WINDOWS)
HANDLE this_thread = GetCurrentThread();
FILETIME creation_time;
FILETIME exit_time;
FILETIME kernel_time;
FILETIME user_time;
GetThreadTimes(this_thread, &creation_time, &exit_time, &kernel_time,
&user_time);
return MakeTime(kernel_time, user_time);
#elif defined(BENCHMARK_OS_QURT)
// Note that qurt_timer_get_ticks() is no longer documented as of SDK 5.3.0,
// and doesn't appear to work on at least some devices (eg Samsung S22),
// so let's use the actually-documented and apparently-equivalent
// qurt_sysclock_get_hw_ticks() call instead.
return static_cast<double>(
qurt_timer_timetick_to_us(qurt_sysclock_get_hw_ticks())) *
1.0e-6;
#elif defined(BENCHMARK_OS_MACOSX)
// FIXME We want to use clock_gettime, but its not available in MacOS 10.11.
// See https://github.com/google/benchmark/pull/292
mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT;
thread_basic_info_data_t info;
mach_port_t thread = pthread_mach_thread_np(pthread_self());
if (thread_info(thread, THREAD_BASIC_INFO,
reinterpret_cast<thread_info_t>(&info),
&count) == KERN_SUCCESS) {
return MakeTime(info);
}
DiagnoseAndExit("ThreadCPUUsage() failed when evaluating thread_info");
#elif defined(BENCHMARK_OS_EMSCRIPTEN)
// Emscripten doesn't support traditional threads
return ProcessCPUUsage();
#elif defined(BENCHMARK_OS_RTEMS)
// RTEMS doesn't support CLOCK_THREAD_CPUTIME_ID. See
// https://github.com/RTEMS/rtems/blob/master/cpukit/posix/src/clockgettime.c
return ProcessCPUUsage();
#elif defined(BENCHMARK_OS_ZOS)
// z/OS doesn't support CLOCK_THREAD_CPUTIME_ID.
return ProcessCPUUsage();
#elif defined(BENCHMARK_OS_SOLARIS)
struct rusage ru;
if (getrusage(RUSAGE_LWP, &ru) == 0) return MakeTime(ru);
DiagnoseAndExit("getrusage(RUSAGE_LWP, ...) failed");
#elif defined(CLOCK_THREAD_CPUTIME_ID)
struct timespec ts {};
if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) == 0) {
return MakeTime(ts);
}
DiagnoseAndExit("clock_gettime(CLOCK_THREAD_CPUTIME_ID, ...) failed");
#else
#error Per-thread timing is not available on your system.
#endif
}
std::string LocalDateTimeString() {
// Write the local time in RFC3339 format yyyy-mm-ddTHH:MM:SS+/-HH:MM.
typedef std::chrono::system_clock Clock;
std::time_t now = Clock::to_time_t(Clock::now());
const std::size_t kTzOffsetLen = 6;
const std::size_t kTimestampLen = 19;
std::size_t tz_len = 0;
std::size_t timestamp_len = 0;
long int offset_minutes = 0;
char tz_offset_sign = '+';
// tz_offset is set in one of three ways:
// * strftime with %z - This either returns empty or the ISO 8601 time. The
// maximum length an
// ISO 8601 string can be is 7 (e.g. -03:30, plus trailing zero).
// * snprintf with %c%02li:%02li - The maximum length is 41 (one for %c, up to
// 19 for %02li,
// one for :, up to 19 %02li, plus trailing zero).
// * A fixed string of "-00:00". The maximum length is 7 (-00:00, plus
// trailing zero).
//
// Thus, the maximum size this needs to be is 41.
char tz_offset[41];
// Long enough buffer to avoid format-overflow warnings
char storage[128];
#if defined(BENCHMARK_OS_WINDOWS)
std::tm* timeinfo_p = ::localtime(&now);
#else
std::tm timeinfo{};
std::tm* timeinfo_p = &timeinfo;
::localtime_r(&now, &timeinfo);
#endif
tz_len = std::strftime(tz_offset, sizeof(tz_offset), "%z", timeinfo_p);
if (tz_len < kTzOffsetLen && tz_len > 1) {
// Timezone offset was written. strftime writes offset as +HHMM or -HHMM,
// RFC3339 specifies an offset as +HH:MM or -HH:MM. To convert, we parse
// the offset as an integer, then reprint it to a string.
offset_minutes = ::strtol(tz_offset, NULL, 10);
if (offset_minutes < 0) {
offset_minutes *= -1;
tz_offset_sign = '-';
}
tz_len = static_cast<size_t>(
::snprintf(tz_offset, sizeof(tz_offset), "%c%02li:%02li",
tz_offset_sign, offset_minutes / 100, offset_minutes % 100));
BM_CHECK(tz_len == kTzOffsetLen);
((void)tz_len); // Prevent unused variable warning in optimized build.
} else {
// Unknown offset. RFC3339 specifies that unknown local offsets should be
// written as UTC time with -00:00 timezone.
#if defined(BENCHMARK_OS_WINDOWS)
// Potential race condition if another thread calls localtime or gmtime.
timeinfo_p = ::gmtime(&now);
#else
::gmtime_r(&now, &timeinfo);
#endif
strncpy(tz_offset, "-00:00", kTzOffsetLen + 1);
}
timestamp_len =
std::strftime(storage, sizeof(storage), "%Y-%m-%dT%H:%M:%S", timeinfo_p);
BM_CHECK(timestamp_len == kTimestampLen);
// Prevent unused variable warning in optimized build.
((void)kTimestampLen);
std::strncat(storage, tz_offset, sizeof(storage) - timestamp_len - 1);
return std::string(storage);
}
} // end namespace benchmark

View File

@@ -0,0 +1,75 @@
#ifndef BENCHMARK_TIMERS_H
#define BENCHMARK_TIMERS_H
#include <chrono>
#include <string>
namespace benchmark {
// Return the CPU usage of the current process
double ProcessCPUUsage();
// Return the CPU usage of the children of the current process
double ChildrenCPUUsage();
// Return the CPU usage of the current thread
double ThreadCPUUsage();
#if defined(BENCHMARK_OS_QURT)
// std::chrono::now() can return 0 on some Hexagon devices;
// this reads the value of a 56-bit, 19.2MHz hardware counter
// and converts it to seconds. Unlike std::chrono, this doesn't
// return an absolute time, but since ChronoClockNow() is only used
// to compute elapsed time, this shouldn't matter.
struct QuRTClock {
typedef uint64_t rep;
typedef std::ratio<1, 19200000> period;
typedef std::chrono::duration<rep, period> duration;
typedef std::chrono::time_point<QuRTClock> time_point;
static const bool is_steady = false;
static time_point now() {
unsigned long long count;
asm volatile(" %0 = c31:30 " : "=r"(count));
return time_point(static_cast<duration>(count));
}
};
#else
#if defined(HAVE_STEADY_CLOCK)
template <bool HighResIsSteady = std::chrono::high_resolution_clock::is_steady>
struct ChooseSteadyClock {
typedef std::chrono::high_resolution_clock type;
};
template <>
struct ChooseSteadyClock<false> {
typedef std::chrono::steady_clock type;
};
#endif // HAVE_STEADY_CLOCK
#endif
struct ChooseClockType {
#if defined(BENCHMARK_OS_QURT)
typedef QuRTClock type;
#elif defined(HAVE_STEADY_CLOCK)
typedef ChooseSteadyClock<>::type type;
#else
typedef std::chrono::high_resolution_clock type;
#endif
};
inline double ChronoClockNow() {
typedef ChooseClockType::type ClockType;
using FpSeconds = std::chrono::duration<double, std::chrono::seconds::period>;
return FpSeconds(ClockType::now().time_since_epoch()).count();
}
std::string LocalDateTimeString();
} // end namespace benchmark
#endif // BENCHMARK_TIMERS_H

View File

@@ -13,7 +13,7 @@ plugins {
apply plugin: 'edu.wpi.first.NativeUtils'
apply plugin: 'edu.wpi.first.DeployUtils'
apply from: '../shared/config.gradle'
apply from: "${rootDir}/shared/config.gradle"
application {
if (OperatingSystem.current().isMacOsX()) {

View File

@@ -2,6 +2,8 @@
// Open Source Software; you can modify and/or share it under the terms of
// the WPILib BSD license file in the root directory of this project.
#include "main.h"
#include <poll.h>
#include <spawn.h>
#include <sys/syscall.h>
@@ -11,7 +13,6 @@
#include <climits>
#include <cstdio>
#include <cstring>
#include <filesystem>
#include <string>
int main(int argc, char* argv[]) {
@@ -42,6 +43,22 @@ int main(int argc, char* argv[]) {
return 1;
}
if (exePath.stem() == (L"AdvantageScope")) {
std::filesystem::path AdvantageScopePath{
exePath.parent_path().parent_path() / L"advantagescope" /
L"advantagescope-wpilib"};
return StartExeTool(AdvantageScopePath);
} else if (exePath.stem() == (L"Elastic")) {
std::filesystem::path ElasticPath{exePath.parent_path().parent_path() /
L"elastic" / L"elastic_dashboard"};
return StartExeTool(ElasticPath);
} else {
return StartJavaTool(exePath);
}
}
int StartJavaTool(std::filesystem::path& exePath) {
pid_t pid = 0;
std::filesystem::path jarPath{exePath};
jarPath.replace_extension("jar");
std::filesystem::path parentPath{exePath.parent_path()};
@@ -53,10 +70,10 @@ int main(int argc, char* argv[]) {
std::filesystem::path Java = toolsFolder / "jdk" / "bin" / "java";
pid = 0;
std::string data = jarPath;
std::string jarArg = "-jar";
char* const arguments[] = {jarArg.data(), data.data(), nullptr};
char* const arguments[] = {Java.generic_string().data(), jarArg.data(),
data.data(), nullptr};
int status =
posix_spawn(&pid, Java.c_str(), nullptr, nullptr, arguments, environ);
@@ -85,3 +102,22 @@ int main(int argc, char* argv[]) {
struct pollfd pfd = {childPid, POLLIN, 0};
return poll(&pfd, 1, 3000);
}
int StartExeTool(std::filesystem::path& exePath) {
char* const arguments[] = {nullptr};
pid_t pid = 0;
int status =
posix_spawn(&pid, exePath.c_str(), nullptr, nullptr, arguments, environ);
if (status != 0) {
return 1;
}
int childPid = syscall(SYS_pidfd_open, pid, 0);
if (childPid <= 0) {
return 1;
}
struct pollfd pfd = {childPid, POLLIN, 0};
return poll(&pfd, 1, 5000);
}

View File

@@ -0,0 +1,11 @@
// Copyright (c) FIRST and other WPILib contributors.
// Open Source Software; you can modify and/or share it under the terms of
// the WPILib BSD license file in the root directory of this project.
#pragma once
#include <filesystem>
int StartExeTool(std::filesystem::path& exePath);
int StartJavaTool(std::filesystem::path& exePath);
int main(int argc, char* argv[]);

View File

@@ -0,0 +1,11 @@
// Copyright (c) FIRST and other WPILib contributors.
// Open Source Software; you can modify and/or share it under the terms of
// the WPILib BSD license file in the root directory of this project.
#pragma once
#include <filesystem>
int StartExeTool(std::filesystem::path& exePath);
int StartJavaTool(std::filesystem::path& exePath);
int main(int argc, char* argv[]);

View File

@@ -3,8 +3,10 @@
// the WPILib BSD license file in the root directory of this project.
#import <Foundation/Foundation.h>
#include "main.h"
#include <iostream>
#include <string>
#include <filesystem>
int main(int argc, char* argv[]) {
(void)argc;
@@ -28,6 +30,24 @@ int main(int argc, char* argv[]) {
return 1;
}
if (exePath.stem() == (L"AdvantageScope")) {
std::filesystem::path AdvantageScopePath{
exePath.parent_path().parent_path() / L"advantagescope" /
L"Advantagescope (WPILib).app" / L"Contents" / L"MacOS" /
L"advantagescope"};
return StartExeTool(AdvantageScopePath);
} else if (exePath.stem() == (L"Elastic")) {
std::filesystem::path ElasticPath{exePath.parent_path().parent_path() /
L"elastic" / L"elastic_dashboard.app" /
L"Contents" / L"MacOS" /
L"elastic_dashboard"};
return StartExeTool(ElasticPath);
} else {
return StartJavaTool(exePath);
}
}
int StartJavaTool(std::filesystem::path& exePath) {
std::filesystem::path jarPath{exePath};
jarPath.replace_extension("jar");
std::filesystem::path parentPath{exePath.parent_path()};
@@ -78,3 +98,22 @@ int main(int argc, char* argv[]) {
return task.running ? 0 : 1;
}
int StartExeTool(std::filesystem::path& exePath) {
std::cout << "exePath: " << exePath.c_str() << std::endl;
NSTask* task = [[NSTask alloc] init];
task.launchPath = [NSString stringWithFormat:@"%s", exePath.c_str()];
// task.arguments = Arguments;
task.terminationHandler = ^(NSTask* t) {
(void)t;
CFRunLoopStop(CFRunLoopGetMain());
};
if (![task launchAndReturnError:nil]) {
return 1;
}
CFRunLoopRunInMode(kCFRunLoopDefaultMode, 3, false);
return task.running ? 0 : 1;
}

View File

@@ -2,9 +2,7 @@
// Open Source Software; you can modify and/or share it under the terms of
// the WPILib BSD license file in the root directory of this project.
#include <filesystem>
#include "Windows.h"
#include "main.h"
int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
LPTSTR pCmdLine, int nCmdShow) {
@@ -29,6 +27,24 @@ int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
return 1;
}
if (ExePath.stem() == (L"AdvantageScope")) {
std::filesystem::path AdvantageScopePath{
ExePath.parent_path().parent_path() / L"advantagescope" /
L"AdvantageScope (WPILib).exe"};
return StartExeTool(AdvantageScopePath);
} else if (ExePath.stem() == (L"Elastic")) {
std::filesystem::path ElasticPath{ExePath.parent_path().parent_path() /
L"elastic" / L"elastic_dashboard.exe"};
return StartExeTool(ElasticPath);
} else {
return StartJavaTool(ExePath);
}
}
int StartJavaTool(std::filesystem::path& ExePath) {
WCHAR ExePathRaw[1024];
DWORD Status;
std::filesystem::path JarPath{ExePath};
JarPath.replace_extension(L"jar");
std::filesystem::path ParentPath{ExePath.parent_path()};
@@ -83,3 +99,25 @@ int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
return Status == WAIT_TIMEOUT ? 0 : 1;
}
int StartExeTool(std::filesystem::path& ExePath) {
STARTUPINFOW StartupInfo;
PROCESS_INFORMATION ProcessInfo;
DWORD Status;
ZeroMemory(&StartupInfo, sizeof(StartupInfo));
StartupInfo.cb = sizeof(StartupInfo);
ZeroMemory(&ProcessInfo, sizeof(ProcessInfo));
if (!CreateProcessW(ExePath.c_str(), NULL, NULL, NULL, FALSE, 0, NULL, NULL,
&StartupInfo, &ProcessInfo)) {
return 1;
}
Status =
WaitForSingleObject(ProcessInfo.hProcess, 5000); // Wait for 5 seconds
CloseHandle(ProcessInfo.hProcess);
CloseHandle(ProcessInfo.hThread);
return Status == WAIT_TIMEOUT ? 0 : 1;
}

View File

@@ -0,0 +1,15 @@
// Copyright (c) FIRST and other WPILib contributors.
// Open Source Software; you can modify and/or share it under the terms of
// the WPILib BSD license file in the root directory of this project.
#pragma once
#include <windows.h>
#include <filesystem>
#include <string>
int StartExeTool(std::filesystem::path& ExePath);
int StartJavaTool(std::filesystem::path& ExePath);
int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
LPTSTR pCmdLine, int nCmdShow);

View File

@@ -49,6 +49,7 @@ include 'wpilibNewCommands'
include 'romiVendordep'
include 'xrpVendordep'
include 'developerRobot'
include 'benchmark'
include 'docs'
include 'msvcruntime'
include 'ntcoreffi'

45
upstream_utils/benchmark.py Executable file
View File

@@ -0,0 +1,45 @@
#!/usr/bin/env python3
import os
import shutil
from pathlib import Path
from upstream_utils import Lib, walk_cwd_and_copy_if
def copy_upstream_src(wpilib_root: Path):
upstream_root = Path(".").absolute()
benchmark = wpilib_root / "benchmark"
# Delete old install
shutil.rmtree(
benchmark / "src/main/native/thirdparty/benchmark",
ignore_errors=True,
)
# Copy benchmark source files into allwpilib
os.chdir(upstream_root / "src")
walk_cwd_and_copy_if(
lambda dp, f: f != "benchmark_main.cc" and f != "CMakeLists.txt",
benchmark / "src/main/native/thirdparty/benchmark/src",
)
# Copy benchmark header files into allwpilib
os.chdir(upstream_root / "include")
walk_cwd_and_copy_if(
lambda dp, f: f.endswith(".h"),
benchmark / "src/main/native/thirdparty/benchmark/include",
)
def main():
name = "benchmark"
url = "https://github.com/google/benchmark.git"
tag = "v1.9.4"
benchmark = Lib(name, url, tag, copy_upstream_src)
benchmark.main()
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,22 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Joseph Eng <91924258+KangarooKoala@users.noreply.github.com>
Date: Wed, 25 Jun 2025 17:39:54 -0700
Subject: [PATCH 1/2] Add roboRIO benchmark support
---
src/sysinfo.cc | 2 ++
1 file changed, 2 insertions(+)
diff --git a/src/sysinfo.cc b/src/sysinfo.cc
index 60e9e5c219a470944609f36773b4d8effa019059..86922c0da6303e1c35b4f7cb92a751fb84ba6f95 100644
--- a/src/sysinfo.cc
+++ b/src/sysinfo.cc
@@ -441,6 +441,8 @@ std::vector<CPUInfo::CacheInfo> GetCacheSizes() {
return GetCacheSizesQNX();
#elif defined(BENCHMARK_OS_QURT) || defined(__EMSCRIPTEN__)
return std::vector<CPUInfo::CacheInfo>();
+#elif defined(__FRC_ROBORIO__)
+ return std::vector<CPUInfo::CacheInfo>();
#else
return GetCacheSizesFromKVFS();
#endif

View File

@@ -0,0 +1,78 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Joseph Eng <91924258+KangarooKoala@users.noreply.github.com>
Date: Wed, 25 Jun 2025 18:37:33 -0700
Subject: [PATCH 2/2] Suppress -Wformat-nonliteral
---
src/colorprint.cc | 14 ++++++++++++++
src/string_util.cc | 14 ++++++++++++++
2 files changed, 28 insertions(+)
diff --git a/src/colorprint.cc b/src/colorprint.cc
index c90232f20ff7b6dfdc09ee2df02a6d735b1d99ea..c4c48d15a049f39c77aeee47ae46741ec195a7d1 100644
--- a/src/colorprint.cc
+++ b/src/colorprint.cc
@@ -89,7 +89,14 @@ std::string FormatString(const char* msg, va_list args) {
std::size_t size = 256;
char local_buff[256];
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wformat-nonliteral"
+#endif // __GNUC__
auto ret = vsnprintf(local_buff, size, msg, args_cp);
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif // __GNUC__
va_end(args_cp);
@@ -105,7 +112,14 @@ std::string FormatString(const char* msg, va_list args) {
// we did not provide a long enough buffer on our first attempt.
size = static_cast<size_t>(ret) + 1; // + 1 for the null byte
std::unique_ptr<char[]> buff(new char[size]);
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wformat-nonliteral"
+#endif // __GNUC__
ret = vsnprintf(buff.get(), size, msg, args);
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif // __GNUC__
BM_CHECK(ret > 0 && (static_cast<size_t>(ret)) < size);
return buff.get();
}
diff --git a/src/string_util.cc b/src/string_util.cc
index 420de4cf259ce0bf3087b004b1f0d0b1c78028a7..698129453cd6ea1baf5f12ae58db467008b6ce8c 100644
--- a/src/string_util.cc
+++ b/src/string_util.cc
@@ -123,7 +123,14 @@ std::string StrFormatImp(const char* msg, va_list args) {
// 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation
// in the android-ndk
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wformat-nonliteral"
+#endif // __GNUC__
auto ret = vsnprintf(local_buff.data(), local_buff.size(), msg, args_cp);
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif // __GNUC__
va_end(args_cp);
@@ -141,7 +148,14 @@ std::string StrFormatImp(const char* msg, va_list args) {
auto buff_ptr = std::unique_ptr<char[]>(new char[size]);
// 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation
// in the android-ndk
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wformat-nonliteral"
+#endif // __GNUC__
vsnprintf(buff_ptr.get(), size, msg, args);
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif // __GNUC__
return std::string(buff_ptr.get());
}

View File

@@ -25,6 +25,7 @@
"sharedLibrary": true,
"skipInvalidPlatforms": true,
"binaryPlatforms": [
"linuxsystemcore",
"linuxathena",
"linuxarm32",
"linuxarm64",

View File

@@ -96,7 +96,7 @@ void {{ ConsoleName }}Controller::InitSendable(wpi::SendableBuilder& builder) {
builder.SetSmartDashboardType("HID");
builder.PublishConstString("ControllerType", "{{ ConsoleName }}");
{%- for trigger in triggers %}
builder.AddDoubleProperty("{{ capitalize_first(trigger.name) }}", [this] { return Get{{ capitalize_first(trigger.name) }}Axis(); }, nullptr);
builder.AddDoubleProperty("{{ capitalize_first(trigger.name) }} Axis", [this] { return Get{{ capitalize_first(trigger.name) }}Axis(); }, nullptr);
{%- endfor -%}
{% for stick in sticks %}
builder.AddDoubleProperty("{{ stick.NameParts|map("capitalize")|join }}", [this] { return Get{{ stick.NameParts|map("capitalize")|join }}(); }, nullptr);

View File

@@ -280,8 +280,8 @@ bool PS4Controller::GetTouchpadReleased() {
void PS4Controller::InitSendable(wpi::SendableBuilder& builder) {
builder.SetSmartDashboardType("HID");
builder.PublishConstString("ControllerType", "PS4");
builder.AddDoubleProperty("L2", [this] { return GetL2Axis(); }, nullptr);
builder.AddDoubleProperty("R2", [this] { return GetR2Axis(); }, nullptr);
builder.AddDoubleProperty("L2 Axis", [this] { return GetL2Axis(); }, nullptr);
builder.AddDoubleProperty("R2 Axis", [this] { return GetR2Axis(); }, nullptr);
builder.AddDoubleProperty("LeftX", [this] { return GetLeftX(); }, nullptr);
builder.AddDoubleProperty("LeftY", [this] { return GetLeftY(); }, nullptr);
builder.AddDoubleProperty("RightX", [this] { return GetRightX(); }, nullptr);

View File

@@ -280,8 +280,8 @@ bool PS5Controller::GetTouchpadReleased() {
void PS5Controller::InitSendable(wpi::SendableBuilder& builder) {
builder.SetSmartDashboardType("HID");
builder.PublishConstString("ControllerType", "PS5");
builder.AddDoubleProperty("L2", [this] { return GetL2Axis(); }, nullptr);
builder.AddDoubleProperty("R2", [this] { return GetR2Axis(); }, nullptr);
builder.AddDoubleProperty("L2 Axis", [this] { return GetL2Axis(); }, nullptr);
builder.AddDoubleProperty("R2 Axis", [this] { return GetR2Axis(); }, nullptr);
builder.AddDoubleProperty("LeftX", [this] { return GetLeftX(); }, nullptr);
builder.AddDoubleProperty("LeftY", [this] { return GetLeftY(); }, nullptr);
builder.AddDoubleProperty("RightX", [this] { return GetRightX(); }, nullptr);

View File

@@ -244,8 +244,8 @@ bool XboxController::GetRightBumperReleased() {
void XboxController::InitSendable(wpi::SendableBuilder& builder) {
builder.SetSmartDashboardType("HID");
builder.PublishConstString("ControllerType", "Xbox");
builder.AddDoubleProperty("LeftTrigger", [this] { return GetLeftTriggerAxis(); }, nullptr);
builder.AddDoubleProperty("RightTrigger", [this] { return GetRightTriggerAxis(); }, nullptr);
builder.AddDoubleProperty("LeftTrigger Axis", [this] { return GetLeftTriggerAxis(); }, nullptr);
builder.AddDoubleProperty("RightTrigger Axis", [this] { return GetRightTriggerAxis(); }, nullptr);
builder.AddDoubleProperty("LeftX", [this] { return GetLeftX(); }, nullptr);
builder.AddDoubleProperty("RightX", [this] { return GetRightX(); }, nullptr);
builder.AddDoubleProperty("LeftY", [this] { return GetLeftY(); }, nullptr);

View File

@@ -308,7 +308,7 @@ public class {{ ConsoleName }}Controller extends GenericHID implements Sendable
builder.setSmartDashboardType("HID");
builder.publishConstString("ControllerType", "{{ ConsoleName }}");
{%- for trigger in triggers %}
builder.addDoubleProperty("{{ capitalize_first(trigger.name) }}", this::get{{ capitalize_first(trigger.name) }}Axis, null);
builder.addDoubleProperty("{{ capitalize_first(trigger.name) }} Axis", this::get{{ capitalize_first(trigger.name) }}Axis, null);
{%- endfor -%}
{% for stick in sticks %}
builder.addDoubleProperty("{{ stick.NameParts|map("capitalize")|join }}", this::get{{ stick.NameParts|map("capitalize")|join }}, null);

View File

@@ -757,8 +757,8 @@ public class PS4Controller extends GenericHID implements Sendable {
public void initSendable(SendableBuilder builder) {
builder.setSmartDashboardType("HID");
builder.publishConstString("ControllerType", "PS4");
builder.addDoubleProperty("L2", this::getL2Axis, null);
builder.addDoubleProperty("R2", this::getR2Axis, null);
builder.addDoubleProperty("L2 Axis", this::getL2Axis, null);
builder.addDoubleProperty("R2 Axis", this::getR2Axis, null);
builder.addDoubleProperty("LeftX", this::getLeftX, null);
builder.addDoubleProperty("LeftY", this::getLeftY, null);
builder.addDoubleProperty("RightX", this::getRightX, null);

View File

@@ -757,8 +757,8 @@ public class PS5Controller extends GenericHID implements Sendable {
public void initSendable(SendableBuilder builder) {
builder.setSmartDashboardType("HID");
builder.publishConstString("ControllerType", "PS5");
builder.addDoubleProperty("L2", this::getL2Axis, null);
builder.addDoubleProperty("R2", this::getR2Axis, null);
builder.addDoubleProperty("L2 Axis", this::getL2Axis, null);
builder.addDoubleProperty("R2 Axis", this::getR2Axis, null);
builder.addDoubleProperty("LeftX", this::getLeftX, null);
builder.addDoubleProperty("LeftY", this::getLeftY, null);
builder.addDoubleProperty("RightX", this::getRightX, null);

View File

@@ -687,8 +687,8 @@ public class XboxController extends GenericHID implements Sendable {
public void initSendable(SendableBuilder builder) {
builder.setSmartDashboardType("HID");
builder.publishConstString("ControllerType", "Xbox");
builder.addDoubleProperty("LeftTrigger", this::getLeftTriggerAxis, null);
builder.addDoubleProperty("RightTrigger", this::getRightTriggerAxis, null);
builder.addDoubleProperty("LeftTrigger Axis", this::getLeftTriggerAxis, null);
builder.addDoubleProperty("RightTrigger Axis", this::getRightTriggerAxis, null);
builder.addDoubleProperty("LeftX", this::getLeftX, null);
builder.addDoubleProperty("RightX", this::getRightX, null);
builder.addDoubleProperty("LeftY", this::getLeftY, null);

View File

@@ -8,7 +8,9 @@ package edu.wpi.first.math.util;
public final class Units {
private static final double kInchesPerFoot = 12.0;
private static final double kMetersPerInch = 0.0254;
private static final double kMetersPerMile = 1609.344;
private static final double kSecondsPerMinute = 60;
private static final double kMinutesPerHour = 60;
private static final double kMillisecondsPerSecond = 1000;
private static final double kKilogramsPerLb = 0.453592;
@@ -137,6 +139,26 @@ public final class Units {
return radiansPerSecond * (kSecondsPerMinute / 2) / Math.PI;
}
/**
* Converts miles per hour to meters per second.
*
* @param mph The miles per hour to convert to meters per second.
* @return Meters per second converted from miles per hour.
*/
public static double milesPerHourToMetersPerSecond(double mph) {
return mph * kMetersPerMile / (kSecondsPerMinute * kMinutesPerHour);
}
/**
* Converts meters per second to miles per hour.
*
* @param metersPerSecond The meters per second to convert to from miles per hour.
* @return Miles per hour converted from meters per second.
*/
public static double metersPerSecondToMilesPerHour(double metersPerSecond) {
return metersPerSecond / kMetersPerMile * (kSecondsPerMinute * kMinutesPerHour);
}
/**
* Converts given milliseconds to seconds.
*

View File

@@ -50,17 +50,27 @@ class UnitsTest extends UtilityClassTest<Units> {
}
@Test
void radiansPerSecondToRotationsPerMinute() {
void radiansPerSecondToRotationsPerMinuteTest() {
assertEquals(76.39, Units.radiansPerSecondToRotationsPerMinute(8), 1e-2);
}
@Test
void millisecondsToSeconds() {
void milesPerHourToMetersPerSecondTest() {
assertEquals(0.44704, Units.milesPerHourToMetersPerSecond(1), 1e-2);
}
@Test
void metersPerSecondToMilesPerHourTest() {
assertEquals(2.2369, Units.metersPerSecondToMilesPerHour(1), 1e-2);
}
@Test
void millisecondsToSecondsTest() {
assertEquals(0.5, Units.millisecondsToSeconds(500), 1e-2);
}
@Test
void secondsToMilliseconds() {
void secondsToMillisecondsTest() {
assertEquals(1500, Units.secondsToMilliseconds(1.5), 1e-2);
}

View File

@@ -47,7 +47,7 @@ TEST(UvGetAddrInfoTest, BothNull) {
ASSERT_EQ(fail_cb_called, 1);
}
TEST(UvGetAddrInfoTest, FailedLookup) {
TEST(UvGetAddrInfoTest, DISABLED_FailedLookup) {
int fail_cb_called = 0;
auto loop = Loop::Create();