aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2023-07-07 05:16:29 +0000
committerAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2023-07-07 05:16:29 +0000
commit8e8d611820a02a62d0fb4439c5e61825b55efab7 (patch)
tree963ff30204aa75d49afd4ab80365ce5ab54744b7
parent57b8940af53bceb02dcbe41640d2d9e4de3c3210 (diff)
parent9ae8a3b36dc387f23a6f4b4a9d1f71c8fb4415e5 (diff)
downloadtrace-cmd-android14-mainline-sdkext-release.tar.gz
Snap for 10453563 from 9ae8a3b36dc387f23a6f4b4a9d1f71c8fb4415e5 to mainline-sdkext-releaseaml_sdk_341510000aml_sdk_341410000aml_sdk_341110080aml_sdk_341110000aml_sdk_341010000aml_sdk_340912010android14-mainline-sdkext-release
Change-Id: I8928a0a251308b2f52bf607910083b341ef46792
-rw-r--r--.gitattributes2
-rw-r--r--.gitignore29
-rw-r--r--Android.bp160
-rw-r--r--CODING_STYLE287
-rw-r--r--CONTRIBUTE103
-rw-r--r--COPYING4
-rw-r--r--COPYING.LIB510
-rw-r--r--DCO47
-rw-r--r--Documentation/.gitignore3
-rw-r--r--Documentation/Makefile101
-rw-r--r--Documentation/README.PythonPlugin127
-rw-r--r--Documentation/asciidoc.conf120
-rw-r--r--Documentation/libtracecmd/Makefile112
-rw-r--r--Documentation/libtracecmd/libtracecmd-files.txt169
-rw-r--r--Documentation/libtracecmd/libtracecmd-instances.txt129
-rw-r--r--Documentation/libtracecmd/libtracecmd-log.txt78
-rw-r--r--Documentation/libtracecmd/libtracecmd-peer.txt137
-rw-r--r--Documentation/libtracecmd/libtracecmd-record.txt138
-rw-r--r--Documentation/libtracecmd/libtracecmd.txt86
-rw-r--r--Documentation/manpage-1.72.xsl14
-rw-r--r--Documentation/manpage-base.xsl35
-rw-r--r--Documentation/manpage-bold-literal.xsl17
-rw-r--r--Documentation/manpage-normal.xsl13
-rw-r--r--Documentation/manpage-suppress-sp.xsl21
-rw-r--r--Documentation/trace-cmd/Makefile132
-rw-r--r--Documentation/trace-cmd/trace-cmd-agent.1.txt62
-rw-r--r--Documentation/trace-cmd/trace-cmd-check-events.1.txt55
-rw-r--r--Documentation/trace-cmd/trace-cmd-clear.1.txt51
-rw-r--r--Documentation/trace-cmd/trace-cmd-convert.1.txt65
-rw-r--r--Documentation/trace-cmd/trace-cmd-dump.1.txt142
-rw-r--r--Documentation/trace-cmd/trace-cmd-extract.1.txt98
-rw-r--r--Documentation/trace-cmd/trace-cmd-hist.1.txt50
-rw-r--r--Documentation/trace-cmd/trace-cmd-list.1.txt95
-rw-r--r--Documentation/trace-cmd/trace-cmd-listen.1.txt71
-rw-r--r--Documentation/trace-cmd/trace-cmd-mem.1.txt74
-rw-r--r--Documentation/trace-cmd/trace-cmd-options.1.txt35
-rw-r--r--Documentation/trace-cmd/trace-cmd-profile.1.txt686
-rw-r--r--Documentation/trace-cmd/trace-cmd-record.1.txt519
-rw-r--r--Documentation/trace-cmd/trace-cmd-report.1.txt518
-rw-r--r--Documentation/trace-cmd/trace-cmd-reset.1.txt116
-rw-r--r--Documentation/trace-cmd/trace-cmd-restore.1.txt105
-rw-r--r--Documentation/trace-cmd/trace-cmd-set.1.txt273
-rw-r--r--Documentation/trace-cmd/trace-cmd-show.1.txt100
-rw-r--r--Documentation/trace-cmd/trace-cmd-snapshot.1.txt65
-rw-r--r--Documentation/trace-cmd/trace-cmd-split.1.txt107
-rw-r--r--Documentation/trace-cmd/trace-cmd-stack.1.txt50
-rw-r--r--Documentation/trace-cmd/trace-cmd-start.1.txt51
-rw-r--r--Documentation/trace-cmd/trace-cmd-stat.1.txt80
-rw-r--r--Documentation/trace-cmd/trace-cmd-stop.1.txt63
-rw-r--r--Documentation/trace-cmd/trace-cmd-stream.1.txt50
-rw-r--r--Documentation/trace-cmd/trace-cmd.1.txt109
-rw-r--r--Documentation/trace-cmd/trace-cmd.dat.v6.5.txt266
-rw-r--r--Documentation/trace-cmd/trace-cmd.dat.v7.5.txt451
-rw-r--r--LICENSE865
-rw-r--r--LICENSES/GPL-2.0359
-rw-r--r--LICENSES/LGPL-2.1503
-rw-r--r--METADATA13
-rw-r--r--MODULE_LICENSE_GPL0
-rw-r--r--Makefile591
-rw-r--r--OWNERS2
-rw-r--r--PACKAGING30
-rw-r--r--README68
-rw-r--r--features.mk55
-rw-r--r--include/linux/time64.h14
-rw-r--r--include/trace-cmd/trace-cmd.h53
-rw-r--r--include/version.h12
-rw-r--r--lib/trace-cmd/Makefile129
-rw-r--r--lib/trace-cmd/include/private/trace-cmd-private.h635
-rw-r--r--lib/trace-cmd/include/private/trace-filter-hash.h64
-rw-r--r--lib/trace-cmd/include/private/trace-hash.h55
-rw-r--r--lib/trace-cmd/include/private/trace-msg.h16
-rw-r--r--lib/trace-cmd/include/trace-cmd-local.h98
-rw-r--r--lib/trace-cmd/include/trace-hash-local.h46
-rw-r--r--lib/trace-cmd/include/trace-tsync-local.h78
-rw-r--r--lib/trace-cmd/include/trace-write-local.h43
-rw-r--r--lib/trace-cmd/plugins/Makefile58
-rw-r--r--lib/trace-cmd/test.c7
-rw-r--r--lib/trace-cmd/trace-blk-hack.c148
-rw-r--r--lib/trace-cmd/trace-compress-zlib.c116
-rw-r--r--lib/trace-cmd/trace-compress-zstd.c120
-rw-r--r--lib/trace-cmd/trace-compress.c991
-rw-r--r--lib/trace-cmd/trace-filter-hash.c211
-rw-r--r--lib/trace-cmd/trace-ftrace.c397
-rw-r--r--lib/trace-cmd/trace-hash.c84
-rw-r--r--lib/trace-cmd/trace-hooks.c168
-rw-r--r--lib/trace-cmd/trace-input.c5886
-rw-r--r--lib/trace-cmd/trace-msg.c1404
-rw-r--r--lib/trace-cmd/trace-output.c2819
-rw-r--r--lib/trace-cmd/trace-perf.c105
-rw-r--r--lib/trace-cmd/trace-plugin.c314
-rw-r--r--lib/trace-cmd/trace-recorder.c601
-rw-r--r--lib/trace-cmd/trace-timesync-kvm.c559
-rw-r--r--lib/trace-cmd/trace-timesync-ptp.c718
-rw-r--r--lib/trace-cmd/trace-timesync.c1079
-rw-r--r--lib/trace-cmd/trace-util.c692
-rw-r--r--libtracecmd.pc.template11
-rwxr-xr-xmake-trace-cmd.sh39
-rw-r--r--python/Makefile32
-rw-r--r--python/ctracecmd.i250
-rwxr-xr-xpython/event-viewer.py272
-rw-r--r--python/tracecmd.py255
-rw-r--r--python/tracecmdgui.py239
-rw-r--r--scripts/debug/tsync_hist.py57
-rw-r--r--scripts/debug/tsync_readme12
-rw-r--r--scripts/debug/tsync_res.py46
-rw-r--r--scripts/utils.mk210
-rw-r--r--tracecmd/.gitignore1
-rw-r--r--tracecmd/Makefile98
-rw-r--r--tracecmd/include/bug.h15
-rw-r--r--tracecmd/include/list.h68
-rw-r--r--tracecmd/include/trace-local.h437
-rw-r--r--tracecmd/trace-agent.c384
-rw-r--r--tracecmd/trace-check-events.c70
-rw-r--r--tracecmd/trace-clear.c125
-rw-r--r--tracecmd/trace-cmd.bash345
-rw-r--r--tracecmd/trace-cmd.c176
-rw-r--r--tracecmd/trace-convert.c109
-rw-r--r--tracecmd/trace-dump.c1355
-rw-r--r--tracecmd/trace-hist.c1076
-rw-r--r--tracecmd/trace-list.c760
-rw-r--r--tracecmd/trace-listen.c1201
-rw-r--r--tracecmd/trace-mem.c564
-rw-r--r--tracecmd/trace-profile.c2455
-rw-r--r--tracecmd/trace-read.c1984
-rw-r--r--tracecmd/trace-record.c7322
-rw-r--r--tracecmd/trace-restore.c164
-rw-r--r--tracecmd/trace-setup-guest.c252
-rw-r--r--tracecmd/trace-show.c172
-rw-r--r--tracecmd/trace-snapshot.c114
-rw-r--r--tracecmd/trace-split.c556
-rw-r--r--tracecmd/trace-stack.c216
-rw-r--r--tracecmd/trace-stat.c926
-rw-r--r--tracecmd/trace-stream.c147
-rw-r--r--tracecmd/trace-usage.c492
-rw-r--r--tracecmd/trace-vm.c388
-rw-r--r--tracecmd/trace-vsock.c176
-rw-r--r--utest/Makefile41
-rw-r--r--utest/README15
-rw-r--r--utest/trace-utest.c83
-rw-r--r--utest/trace-utest.h11
-rw-r--r--utest/tracefs-utest.c630
141 files changed, 51538 insertions, 0 deletions
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 00000000..59661532
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,2 @@
+.gitattributes export-ignore
+.gitignore export-ignore
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..eb1b0dbe
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,29 @@
+*.o
+*.so
+*.a
+*.dat
+*.data
+*.patch
+.*.d
+*.orig
+*.rej
+.pc
+*~
+*.pyc
+*.swp
+\#*\#
+patches/
+tc_version.h
+ks_version.h
+ctracecmd_wrap.c
+ctracecmdgui_wrap.c
+tags
+TAGS
+cscope*
+trace_python_dir
+tracecmd_plugin_dir
+libtracecmd.pc
+build_prefix
+build_install
+build_libs_install
+ltc_version.h
diff --git a/Android.bp b/Android.bp
new file mode 100644
index 00000000..112786b5
--- /dev/null
+++ b/Android.bp
@@ -0,0 +1,160 @@
+package {
+ default_applicable_licenses: ["external_trace-cmd_license"],
+}
+
+// See: http://go/android-license-faq
+license {
+ name: "external_trace-cmd_license",
+
+ visibility: [":__subpackages__"],
+
+ license_kinds: [
+ "SPDX-license-identifier-GPL-2.0",
+ "SPDX-license-identifier-GPL-2.0-only",
+ "SPDX-license-identifier-GPL-2.0+",
+ "SPDX-license-identifier-GPL-2.0-or-later",
+ "SPDX-license-identifier-LGPL-2.1+",
+ ],
+
+ license_text: [
+ "LICENSE",
+ ],
+}
+
+genrule {
+ name: "tc_version_header",
+ srcs: ["Makefile"],
+ out: ["tc_version.h"],
+ cmd: "(" +
+ "VERSION=$$(grep '\\bTC_VERSION =' <$(in) | awk '{ print $$3 }') " +
+ "&& PATCHLEVEL=$$(grep '\\bTC_PATCHLEVEL =' <$(in) | awk '{ print $$3 }') " +
+ "&& VERSION_CODE=$$(expr $${VERSION} \\* 256 + $${PATCHLEVEL}) " +
+ "&& EXTRAVERSION=$$(grep '\\bTC_EXTRAVERSION =' <$(in) | awk '{ print $$3 }') " +
+ "&& echo '/* This file is automatically generated. Do not modify */' " +
+ "&& echo \"#define VERSION_CODE $${VERSION_CODE}\" " +
+ "&& echo \"#define EXTRAVERSION $${EXTRAVERSION}\" " +
+ "&& echo '#define VERSION_STRING \"'$${VERSION}.$${PATCHLEVEL}.$${EXTRAVERSION}'\"' " +
+ "&& echo '#define FILE_VERSION ' " +
+ "&& echo '#define VERSION_GIT \"not-a-git-repo\"' " +
+ ") > $(out)",
+}
+
+cc_library {
+ name: "libtracecmd",
+
+ // Restrict visibility due to GPL license
+ visibility: [
+ "//external/trace-cmd:__subpackages__",
+ ],
+
+ local_include_dirs: [
+ "lib/trace-cmd/include/private",
+ "lib/trace-cmd/include",
+ "include/trace-cmd",
+ "tracecmd/include",
+ "include",
+ ],
+
+ export_include_dirs: [
+ "lib/trace-cmd/include",
+ ],
+
+ srcs: [
+ "lib/trace-cmd/test.c",
+ "lib/trace-cmd/trace-blk-hack.c",
+ "lib/trace-cmd/trace-compress.c",
+ "lib/trace-cmd/trace-compress-zlib.c",
+ "lib/trace-cmd/trace-filter-hash.c",
+ "lib/trace-cmd/trace-ftrace.c",
+ "lib/trace-cmd/trace-hash.c",
+ "lib/trace-cmd/trace-hooks.c",
+ "lib/trace-cmd/trace-input.c",
+ "lib/trace-cmd/trace-msg.c",
+ "lib/trace-cmd/trace-output.c",
+ "lib/trace-cmd/trace-perf.c",
+ "lib/trace-cmd/trace-plugin.c",
+ "lib/trace-cmd/trace-recorder.c",
+ "lib/trace-cmd/trace-timesync.c",
+ "lib/trace-cmd/trace-timesync-kvm.c",
+ "lib/trace-cmd/trace-timesync-ptp.c",
+ "lib/trace-cmd/trace-util.c",
+ ],
+
+ shared: {
+ shared_libs: [
+ "libtraceevent",
+ "libtracefs",
+ "libz",
+ ],
+ export_shared_lib_headers: [
+ "libtraceevent",
+ "libtracefs",
+ "libz",
+ ],
+ },
+
+ static: {
+ static_libs: [
+ "libtraceevent",
+ "libtracefs",
+ "libz",
+ ],
+ export_static_lib_headers: [
+ "libtraceevent",
+ "libtracefs",
+ "libz",
+ ],
+ },
+
+ generated_headers: ["tc_version_header"],
+
+ export_generated_headers: ["tc_version_header"],
+
+ cflags: [
+ "-D__bswap_64=__swap64",
+ "-D_GNU_SOURCE",
+ "-DPERF",
+ "-DVSOCK",
+ "-Wno-unused-parameter",
+ "-Wno-macro-redefined",
+ "-Wno-unused-but-set-variable",
+ "-Wno-user-defined-warnings",
+ "-Wno-visibility",
+ "-Wno-pointer-arith",
+ ],
+
+ c_std: "gnu99",
+}
+
+cc_binary {
+ name: "trace-cmd",
+
+ local_include_dirs: [
+ "lib/trace-cmd/include/private",
+ "include/trace-cmd",
+ "tracecmd/include",
+ "include",
+ ],
+
+ srcs: ["tracecmd/*.c"],
+
+ static_libs: [
+ "libtraceevent",
+ "libtracecmd",
+ "libtracefs",
+ ],
+
+ static_executable: true,
+
+ cflags: [
+ "-D_GNU_SOURCE",
+ "-DNO_AUDIT",
+ "-DVSOCK",
+ "-Wno-unused-parameter",
+ "-Wno-macro-redefined",
+ "-Wno-visibility",
+ "-Wno-pointer-arith",
+ ],
+
+ c_std: "gnu99",
+}
diff --git a/CODING_STYLE b/CODING_STYLE
new file mode 100644
index 00000000..24fb10ec
--- /dev/null
+++ b/CODING_STYLE
@@ -0,0 +1,287 @@
+
+trace-cmd coding-style
+======================
+
+The coding style of trace-cmd and the tracing libraries (libtracefs and
+libtraceevent) are very similar to the Linux kernel coding style:
+
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/process/coding-style.rst
+
+Indentation
+===========
+
+Tabs are used for the start of indentation (the '\t' character), and should be
+set to 8 characters. Spaces may be used at the end for continued lines where
+having the start of text line up to braces in the previous line is not
+divisible by 8.
+
+Max line width
+==============
+
+All lines should not be more than 100 characters in length.
+
+This is a guide, as readability is more important than breaking lines up into a
+hard limit. Ideally, strings should never be broken up except for where a new
+line is added.
+
+ printf("This is a line that may continue for a very long string.\n"
+ "This is another line, but after a new line\n");
+
+But line breaks should not be:
+
+ printf("This is a line that may continue for a very"
+ "long string.\n This is another line,"
+ "but after a new line\n");
+
+Not only is the above not as readable as the first version, it is not
+even equivalent, because it is missing spaces between the line breaks.
+For this reason, finish the string on the same line, even if that string
+breaks the 100 character limit.
+
+Brackets and braces
+===================
+
+For all conditionals, the braces start on the same line:
+
+ if (cond) {
+ }
+
+And the ending brace is at the same indentation as the conditional.
+
+ while (cond) {
+ }
+
+ do {
+ } while (cond);
+
+ for (i = 0; i < 10; i++) {
+ }
+
+The same is true for structures:
+
+ struct my_struct {
+ int field;
+ };
+
+But for functions, the braces should start on the following line:
+
+ void my_function(void)
+ {
+ }
+
+
+It is also fine to not use braces for simple conditionals and loops.
+
+ if (!x)
+ y = x;
+ else
+ y = 1;
+
+ for (i = 0; i < 10; i++)
+ foo(i);
+
+ while (getline(&line, &size, fp) > 0)
+ printf("%s", line);
+
+But any complex or multiline conditional or loop should have braces even if it
+is allowed not to by the C language.
+
+ if (x) {
+ for (i = 0; i < 10; i++)
+ foo(i);
+ } else {
+ foo(1);
+ }
+
+Notice above that even though the else portion is simple, it too has braces as
+the else and if blocks should match. If one is required to have braces, they
+both should have braces.
+
+
+Spaces
+======
+
+A single space should be used between C commands and their starting
+parenthesis.
+
+ if (x)
+ for (i = 0; i < 10; i++)
+ while (getline(&line, &size, fp) > 0)
+
+There should be no space between function or macros and the starting
+parenthesis.
+
+ foo(x)
+ IS_VALID(y)
+
+This includes prototypes and declarations.
+
+ void foo(int x)
+
+A space should be before and after assignment, comparison and algorithmic
+signs.
+
+ i = 0;
+ if (i < 10)
+ if (i == 5)
+
+ y = i + 10;
+
+ i += 5;
+
+For structures, use tabs to make all the fields line up nicely.
+
+ struct {
+ int foo;
+ int bar;
+ unsigned long long time;
+ };
+
+Variable declarations
+=====================
+
+The order of variables that are declared, should first keep the same types
+together, but also should be ordered by their length such that the variables
+are ordered in an "upside-down Christmas tree" fashion where the length gets
+smaller.
+
+ int tracecmd_count_cpus(void)
+ {
+ static int once;
+ char buf[1024];
+ int cpus = 0;
+ char *pbuf;
+ size_t *pn;
+ FILE *fp;
+ size_t n;
+ int r;
+
+The above shows that the order is done by length, and in the above example it
+also shows that "int cpu = 0;" is not grouped next to "int r;". As this is more
+of a guideline and made to be more aesthetic to the eye of the reader, both the
+above and is acceptable as below.
+
+ int tracecmd_count_cpus(void)
+ {
+ static int once;
+ char buf[1024];
+ char *pbuf;
+ size_t *pn;
+ FILE *fp;
+ size_t n;
+ int cpus = 0;
+ int r;
+
+
+Unless variables are tightly related, it is expected that each variable be on
+its own line and not grouped by type. That is,
+
+ int r, cpus = 0;
+
+is to be discouraged, as the two variables are not related to each other.
+But if you had a bunch of counters:
+
+ int i, j, k;
+
+That would be fine, as the variables are all related as they are all for the
+same purpose (arbitrary counters). The same may go with pointers;
+
+
+ char *begin, *end;
+
+Comments
+========
+
+Comments will use the "/* */" format and the C++ "//" style is discouraged.
+If a comment is on one line, keep the "/*" and "*/" on the same line:
+
+ /* This is a single line comment. */
+
+If a comment spans more than one line, then have the "/*" on a separate line
+before the comment and the "*/" on a separate line at the end of the comment,
+and each line starts with a "*" where all the "*" line up with each other.
+
+ /*
+ * This is a multi line comment, where all the '*'
+ * will line up, and the text is on a separate line
+ * as the start and end markers.
+ */
+
+
+Function documentation
+======================
+
+All global functions (and especially any APIs) should have a function
+description in the form of "kernel doc":
+
+ https://www.kernel.org/doc/html/latest/doc-guide/kernel-doc.html
+
+The form is:
+
+ /**
+ * function_name() - Brief description of function.
+ * @arg1: Describe the first argument.
+ * @arg2: Describe the second argument.
+ * One can provide multiple line descriptions
+ * for arguments.
+ *
+ * A longer description, with more discussion of the function function_name()
+ * that might be useful to those using or modifying it. Begins with an
+ * empty comment line, and may include additional embedded empty
+ * comment lines.
+ *
+ * The longer description may have multiple paragraphs.
+ *
+ * Context: Describes whether the function can sleep, what locks it takes,
+ * releases, or expects to be held. It can extend over multiple
+ * lines.
+ * Return: Describe the return value of function_name.
+ *
+ * The return value description can also have multiple paragraphs, and should
+ * be placed at the end of the comment block.
+ */
+
+Structure layout
+================
+
+This is more about compaction than coding style. When creating structures, be
+aware that if the fields are placed together without being sized by alignment,
+that the compiler will create "holes" in them.
+
+ struct {
+ int x;
+ char y;
+ unsigned long long f;
+ };
+
+As int is 4 bytes in length, char is one byte, and unsigned long long is 8
+bytes. The compiler will try to naturally align them by their size, and will
+include padding (holes) inside the structure to do so. The above is equivalent
+to:
+
+ struct {
+ int x;
+ char y;
+ char padding[3];
+ unsigned long long f;
+ };
+
+It is best to try to organize the structure where there are no holes within
+them.
+
+ struct {
+ unsigned long long f;
+ int x;
+ char y;
+ };
+
+The above is better formatting, even if there may be padding outside the
+structure, but the compiler will still have more flexibility to utilize the
+space outside the structure than what it can do within it.
+
+General
+=======
+
+As stated, this is a guide and may not be strictly enforced. The goal is to
+have consistent and readable code. In general, try to have the coding style
+match the surrounding code.
diff --git a/CONTRIBUTE b/CONTRIBUTE
new file mode 100644
index 00000000..0440b186
--- /dev/null
+++ b/CONTRIBUTE
@@ -0,0 +1,103 @@
+If you like to become part of the community and submit patches, here's how
+to do so for trace-cmd.
+
+If you only want to report a bug, or suggest an enhancement, you may do
+so at:
+
+ https://bugzilla.kernel.org/buglist.cgi?component=Trace-cmd%2FKernelshark
+
+All development is done via a mailing list:
+
+ http://vger.kernel.org/vger-lists.html#linux-trace-devel
+
+Patches should be sent to linux-trace-devel@vger.kernel.org
+
+Start by cloning the official repository:
+
+ git clone git://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git
+
+Make your changes. When you are satisfied with them, commit them into git.
+Here's some helpful hints for your git commits.
+
+1) When making changes, please follow the coding style defined by the file
+ called CODING_STYLE in this directory.
+
+2) Every commit should only do one thing.
+ That is, if your work requires some cleaning up of code, do that
+ clean up as a separate commit and not with your functional changes.
+ Find ways to take "steps" in modifying code. If you can break up
+ your changes in a series of steps, do so.
+
+3) The commit log should start with a title. Like the below
+
+ trace-cmd: Add CONTRIBUTE file
+
+ Even though this repo is for trace-cmd, start the topic with
+ "trace-cmd:" because the commits will end up as patches to a mailing
+ list that handles other tracing repos, differentiating them with the subject
+ is useful. You can be more specific as well. If the change only affects the
+ "record" command, you may start the title with "trace-cmd record:".
+
+4) The body of the commit (with a blank line from the title), should be self
+ contained, and explain why you are making the change. The title should hold
+ the "what" is changing, but the body contains the rationale for the change.
+ It should be a stand alone, and not state things like "See the next patch",
+ because when it is in git history, there's no knowing what the next patch
+ is. You can make statements like "This is needed for a <future-feature>
+ that will come later". Where "<future-feature>" is something that you are
+ working on and the current commit is one of the steps required to get there.
+
+5) Add your Developer Certificate of Origin (DCO) at the bottom of the commit
+ log. That is "Signed-off-by: Full Name <email>" where your full name is your
+ real name (no pseudonyms). Optionally, if you are making the change on
+ behalf of your company, you may also add your company name, if you are not
+ using your company's email. "Signed-off-by: Full Name (Company) <email>".
+ Please note, the DCO is your statement that you have the legal right to
+ make these changes for the project you are submitting to.
+
+You can use the Linux kernel "checkpatch.pl" script to help verify the formatting
+of your patch:
+
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/scripts/checkpatch.pl
+
+Please note that checkpatch.pl is a guide and not a hard rule. If it reports a
+fix that makes the code harder to read, that fix can probably be ignored.
+
+ git format-patch --stdout HEAD~1..HEAD | ./checkpatch.pl
+
+Finally, you can use the git "send-email" functionality:
+
+ git send-email --from='<your-email> --to='linux-trace-devel@vger.kernel.org' HEAD~1..HEAD
+
+If you are sending one patch, if you are adding more than one patch, also include
+a cover letter:
+
+ git send-email --cover-letter --annotate --from='<your-email> --to='linux-trace-devel@vger.kernel.org' <first-commit>~1..HEAD
+
+If you receive feedback on your patches, and plan on sending another version,
+please use the '-v' option to mark your patches that they are a new version.
+For example, if you add "-v2" to the above commands, instead of having:
+"[PATCH]" in the subject, it will have "[PATCH v2]", letting the reviewers know
+that this is a new version. If you send another version, use "-v3" and so on.
+
+For more information about git send-email:
+
+ https://git-scm.com/docs/git-send-email
+
+To keep track of the status of patches that have been submitted, check out:
+
+ https://patchwork.kernel.org/project/linux-trace-devel/list/
+
+If you would like to apply patches from the mailing list, you can use
+the "b4" utility.
+
+ $ pip install b4
+
+Then from the mailing list archive, find a message id from a patch or patch
+series. For example, to get the patch from:
+
+ https://lore.kernel.org/linux-trace-devel/20210205173713.132051-1-tz.stoyanov@gmail.com/
+
+ $ b4 am -o - 20210205173713.132051-1-tz.stoyanov@gmail.com > /tmp/p.mbox
+ $ git am /tmp/p.mbox
+
diff --git a/COPYING b/COPYING
new file mode 100644
index 00000000..9d46b791
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,4 @@
+There are two main licenses that the tools in this directory are covered
+under. For the applications themselves, they are covered under GPL-2.0 (see
+LICENSES/GPL-2.0). As for the exported headers and libraries, they are covered
+under LPGL-2.1 (see LICENSES/LGPL-2.1).
diff --git a/COPYING.LIB b/COPYING.LIB
new file mode 100644
index 00000000..da52742b
--- /dev/null
+++ b/COPYING.LIB
@@ -0,0 +1,510 @@
+
+ GNU LESSER GENERAL PUBLIC LICENSE
+ Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL. It also counts
+ as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+ This license, the Lesser General Public License, applies to some
+specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it. You
+can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better
+strategy to use in any particular case, based on the explanations
+below.
+
+ When we speak of free software, we are referring to freedom of use,
+not price. Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge
+for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of
+it in new free programs; and that you are informed that you can do
+these things.
+
+ To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights. These restrictions translate to certain responsibilities for
+you if you distribute copies of the library or if you modify it.
+
+ For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you. You must make sure that they, too, receive or can get the source
+code. If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them
+with the library after making changes to the library and recompiling
+it. And you must show them these terms so they know their rights.
+
+ We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+ To protect each distributor, we want to make it very clear that
+there is no warranty for the free library. Also, if the library is
+modified by someone else and passed on, the recipients should know
+that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be
+introduced by others.
+^L
+ Finally, software patents pose a constant threat to the existence of
+any free program. We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a
+restrictive license from a patent holder. Therefore, we insist that
+any patent license obtained for a version of the library must be
+consistent with the full freedom of use specified in this license.
+
+ Most GNU software, including some libraries, is covered by the
+ordinary GNU General Public License. This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and
+is quite different from the ordinary General Public License. We use
+this license for certain libraries in order to permit linking those
+libraries into non-free programs.
+
+ When a program is linked with a library, whether statically or using
+a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library. The ordinary
+General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom. The Lesser General
+Public License permits more lax criteria for linking other code with
+the library.
+
+ We call this license the "Lesser" General Public License because it
+does Less to protect the user's freedom than the ordinary General
+Public License. It also provides other free software developers Less
+of an advantage over competing non-free programs. These disadvantages
+are the reason we use the ordinary General Public License for many
+libraries. However, the Lesser license provides advantages in certain
+special circumstances.
+
+ For example, on rare occasions, there may be a special need to
+encourage the widest possible use of a certain library, so that it
+becomes a de-facto standard. To achieve this, non-free programs must
+be allowed to use the library. A more frequent case is that a free
+library does the same job as widely used non-free libraries. In this
+case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+
+ In other cases, permission to use a particular library in non-free
+programs enables a greater number of people to use a large body of
+free software. For example, permission to use the GNU C Library in
+non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating
+system.
+
+ Although the Lesser General Public License is Less protective of the
+users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run
+that program using a modified version of the Library.
+
+ The precise terms and conditions for copying, distribution and
+modification follow. Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library". The
+former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+^L
+ GNU LESSER GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License Agreement applies to any software library or other
+program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of
+this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+
+ A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+ The "Library", below, refers to any such software library or work
+which has been distributed under these terms. A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language. (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+ "Source code" for a work means the preferred form of the work for
+making modifications to it. For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control
+compilation and installation of the library.
+
+ Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it). Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+
+ 1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+ You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+
+ 2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) The modified work must itself be a software library.
+
+ b) You must cause the files modified to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ c) You must cause the whole of the work to be licensed at no
+ charge to all third parties under the terms of this License.
+
+ d) If a facility in the modified Library refers to a function or a
+ table of data to be supplied by an application program that uses
+ the facility, other than as an argument passed when the facility
+ is invoked, then you must make a good faith effort to ensure that,
+ in the event an application does not supply such function or
+ table, the facility still operates, and performs whatever part of
+ its purpose remains meaningful.
+
+ (For example, a function in a library to compute square roots has
+ a purpose that is entirely well-defined independent of the
+ application. Therefore, Subsection 2d requires that any
+ application-supplied function or table used by this function must
+ be optional: if the application does not supply it, the square
+ root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library. To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License. (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.) Do not make any other change in
+these notices.
+^L
+ Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+ This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+ 4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+ If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library". Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+ However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library". The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+ When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library. The
+threshold for this to be true is not precisely defined by law.
+
+ If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work. (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+ Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+^L
+ 6. As an exception to the Sections above, you may also combine or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+ You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License. You must supply a copy of this License. If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License. Also, you must do one
+of these things:
+
+ a) Accompany the work with the complete corresponding
+ machine-readable source code for the Library including whatever
+ changes were used in the work (which must be distributed under
+ Sections 1 and 2 above); and, if the work is an executable linked
+ with the Library, with the complete machine-readable "work that
+ uses the Library", as object code and/or source code, so that the
+ user can modify the Library and then relink to produce a modified
+ executable containing the modified Library. (It is understood
+ that the user who changes the contents of definitions files in the
+ Library will not necessarily be able to recompile the application
+ to use the modified definitions.)
+
+ b) Use a suitable shared library mechanism for linking with the
+ Library. A suitable mechanism is one that (1) uses at run time a
+ copy of the library already present on the user's computer system,
+ rather than copying library functions into the executable, and (2)
+ will operate properly with a modified version of the library, if
+ the user installs one, as long as the modified version is
+ interface-compatible with the version that the work was made with.
+
+ c) Accompany the work with a written offer, valid for at least
+ three years, to give the same user the materials specified in
+ Subsection 6a, above, for a charge no more than the cost of
+ performing this distribution.
+
+ d) If distribution of the work is made by offering access to copy
+ from a designated place, offer equivalent access to copy the above
+ specified materials from the same place.
+
+ e) Verify that the user has already received a copy of these
+ materials or that you have already sent this user a copy.
+
+ For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it. However, as a special exception,
+the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+ It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system. Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+^L
+ 7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+ a) Accompany the combined library with a copy of the same work
+ based on the Library, uncombined with any other library
+ facilities. This must be distributed under the terms of the
+ Sections above.
+
+ b) Give prominent notice with the combined library of the fact
+ that part of it is a work based on the Library, and explaining
+ where to find the accompanying uncombined form of the same work.
+
+ 8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License. Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License. However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+ 9. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Library or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+ 10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties with
+this License.
+^L
+ 11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all. For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply, and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License
+may add an explicit geographical distribution limitation excluding those
+countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 13. The Free Software Foundation may publish revised and/or new
+versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation. If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+^L
+ 14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission. For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this. Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+ NO WARRANTY
+
+ 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+^L
+ How to Apply These Terms to Your New Libraries
+
+ If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change. You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms
+of the ordinary General Public License).
+
+ To apply these terms, attach the following notices to the library.
+It is safest to attach them to the start of each source file to most
+effectively convey the exclusion of warranty; and each file should
+have at least the "copyright" line and a pointer to where the full
+notice is found.
+
+
+ <one line to give the library's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+Also add information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or
+your school, if any, to sign a "copyright disclaimer" for the library,
+if necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the
+ library `Frob' (a library for tweaking knobs) written by James
+ Random Hacker.
+
+ <signature of Ty Coon>, 1 April 1990
+ Ty Coon, President of Vice
+
+That's all there is to it!
+
+
diff --git a/DCO b/DCO
new file mode 100644
index 00000000..775c1108
--- /dev/null
+++ b/DCO
@@ -0,0 +1,47 @@
+
+(Copied from the Linux Kernel's Documentation/process/submitting-patches.rst)
+
+Sign your work - the Developer's Certificate of Origin
+------------------------------------------------------
+
+The sign-off is a simple line at the end of the explanation for the
+patch, which certifies that you wrote it or otherwise have the right to
+pass it on as an open-source patch. The rules are pretty simple: if you
+can certify the below:
+
+Developer's Certificate of Origin 1.1
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+By making a contribution to this project, I certify that:
+
+ (a) The contribution was created in whole or in part by me and I
+ have the right to submit it under the open source license
+ indicated in the file; or
+
+ (b) The contribution is based upon previous work that, to the best
+ of my knowledge, is covered under an appropriate open source
+ license and I have the right under that license to submit that
+ work with modifications, whether created in whole or in part
+ by me, under the same open source license (unless I am
+ permitted to submit under a different license), as indicated
+ in the file; or
+
+ (c) The contribution was provided directly to me by some other
+ person who certified (a), (b) or (c) and I have not modified
+ it.
+
+ (d) I understand and agree that this project and the contribution
+ are public and that a record of the contribution (including all
+ personal information I submit with it, including my sign-off) is
+ maintained indefinitely and may be redistributed consistent with
+ this project or the open source license(s) involved.
+
+then you just add a line saying::
+
+ Signed-off-by: Random J Developer <random@developer.example.org>
+
+using your real name (sorry, no pseudonyms or anonymous contributions.)
+
+Some people also put extra tags at the end. They'll just be ignored for
+now, but you can do this to mark internal company procedures or just
+point out some special detail about the sign-off.
diff --git a/Documentation/.gitignore b/Documentation/.gitignore
new file mode 100644
index 00000000..8a38b2ea
--- /dev/null
+++ b/Documentation/.gitignore
@@ -0,0 +1,3 @@
+*.[1-9]
+*.m
+*.html
diff --git a/Documentation/Makefile b/Documentation/Makefile
new file mode 100644
index 00000000..ec364916
--- /dev/null
+++ b/Documentation/Makefile
@@ -0,0 +1,101 @@
+# SPDX-License-Identifier: GPL-2.0
+
+doc_dir:=$(src)/Documentation
+
+export doc_dir
+
+SUBDIR += trace-cmd
+SUBDIR += libtracecmd
+
+.PHONY: $(SUBDIR)
+
+DOCDIR = $(src)/Documentation
+ASCIIDOC=asciidoc
+ASCIIDOC_CONF = $(DOCDIR)/asciidoc.conf
+ASCIIDOC_EXTRA = --unsafe -f $(ASCIIDOC_CONF)
+ASCIIDOC_HTML = xhtml11
+MANPAGE_XSL = $(DOCDIR)/manpage-normal.xsl
+XMLTO_EXTRA =
+INSTALL?=install
+RM ?= rm -f
+
+ASCIIDOC_INSTALLED := $(shell command -v $(ASCIIDOC) 2> /dev/null)
+ifndef ASCIIDOC_INSTALLED
+ missing_tools += $(ASCIIDOC)
+endif
+
+XMLTO=xmlto
+XMLTO_INSTALLED := $(shell command -v $(XMLTO) 2> /dev/null)
+ifndef XMLTO_INSTALLED
+ missing_tools += $(XMLTO)
+endif
+
+#
+# For asciidoc ...
+# -7.1.2, no extra settings are needed.
+# 8.0-, set ASCIIDOC8.
+#
+
+#
+# For docbook-xsl ...
+# -1.68.1, set ASCIIDOC_NO_ROFF? (based on changelog from 1.73.0)
+# 1.69.0, no extra settings are needed?
+# 1.69.1-1.71.0, set DOCBOOK_SUPPRESS_SP?
+# 1.71.1, no extra settings are needed?
+# 1.72.0, set DOCBOOK_XSL_172.
+# 1.73.0-, set ASCIIDOC_NO_ROFF
+#
+
+#
+# If you had been using DOCBOOK_XSL_172 in an attempt to get rid
+# of 'the ".ft C" problem' in your generated manpages, and you
+# instead ended up with weird characters around callouts, try
+# using ASCIIDOC_NO_ROFF instead (it works fine with ASCIIDOC8).
+#
+
+ifdef ASCIIDOC8
+ASCIIDOC_EXTRA += -a asciidoc7compatible
+endif
+ifdef DOCBOOK_XSL_172
+ASCIIDOC_EXTRA += -a libtracecmd-asciidoc-no-roff
+MANPAGE_XSL = $(DOCDIR)/manpage-1.72.xsl
+else
+ ifdef ASCIIDOC_NO_ROFF
+ # docbook-xsl after 1.72 needs the regular XSL, but will not
+ # pass-thru raw roff codes from asciidoc.conf, so turn them off.
+ ASCIIDOC_EXTRA += -a libtracecmd-asciidoc-no-roff
+ endif
+endif
+ifdef MAN_BOLD_LITERAL
+XMLTO_EXTRA += -m $(DOCDIR)/manpage-bold-literal.xsl
+endif
+ifdef DOCBOOK_SUPPRESS_SP
+XMLTO_EXTRA += -m $(DOCDIR)/manpage-suppress-sp.xsl
+endif
+
+ifdef USE_ASCIIDOCTOR
+ASCIIDOC = asciidoctor
+ASCIIDOC_EXTRA = -a compat-mode
+ASCIIDOC_EXTRA += -I. -rasciidoctor-extensions
+ASCIIDOC_HTML = xhtml5
+endif
+
+ifneq ($(findstring $(MAKEFLAGS),w),w)
+PRINT_DIR = --no-print-directory
+else # "make -w"
+NO_SUBDIR = :
+endif
+
+export ASCIIDOC ASCIIDOC_CONF ASCIIDOC_EXTRA ASCIIDOC_HTML
+export MANPAGE_XSL
+export XMLTO XMLTO_INSTALLED XMLTO_EXTRA
+export missing_tools
+export RM
+
+all: $(SUBDIR)
+clean: $(SUBDIR)
+install: $(SUBDIR)
+
+$(SUBDIR):
+ make -C $@ $(MAKECMDGOALS)
+
diff --git a/Documentation/README.PythonPlugin b/Documentation/README.PythonPlugin
new file mode 100644
index 00000000..3de05647
--- /dev/null
+++ b/Documentation/README.PythonPlugin
@@ -0,0 +1,127 @@
+ PYTHON PLUGIN DOCUMENTATION
+=============================
+
+With the python plugin (make python-plugin) you can now
+write plugins in python. The API exported by the python
+plugin itself (written in C) allows you to access most
+information about a record from python.
+
+To write a python plugin, put a new .py file into a new
+~/.trace-cmd/python/ directory.
+
+The most basic python plugin is this:
+
+--- %< ---
+def register(pevent):
+ pass
+--- >% ---
+
+which obviously does nothing at all.
+
+To register a callback, use the pevent.register_event_handler
+function:
+
+--- %< ---
+import tracecmd
+
+def my_event_handler(trace_seq, event):
+ pass
+
+def register(pevent):
+ pevent.register_event_handler("subsys", "event_name",
+ my_event_handler)
+--- >% ---
+
+
+There are four object types that you get, described below.
+
+ tracecmd.PEvent
+-----------------
+
+This is the class of the 'pevent' object above,
+you get one of those via your register callback.
+It has one method and one property:
+ * register_event_handler() - example above, to register
+ an event handler function
+ * file_endian - either '<' or '>' indicating
+ which endianness the file has,
+ to be used with struct.unpack()
+
+ tracecmd.TraceSeq
+-------------------
+
+This is the class of the 'trace_seq' parameter to your callback
+function. It has only one method, puts(), to put data into the
+buffer. Formatting must be done in python.
+
+ tracecmd.Event
+----------------------
+
+This is the class of the 'event' parameter to your callback
+function. Note that it doesn't just contain the format, but
+also the event data. As such, you can do much with this, and
+this is what you'll usually use. Each instance of this allows
+access to record items via the dict protocol, and you can get
+the items via its keys() methods. So for example, your
+callback could be
+
+--- %< ---
+def my_callback(trace_seq, event):
+ for fieldname in event.keys():
+ field = event[fieldname]
+--- >% ---
+
+Each field returned from the dict protocol is an instance of
+the next (and last) class:
+
+ tracecmd.Field
+----------------------
+
+This is an instance of a field, including its data. It affords
+numerous use cases and is what you'll be using most.
+
+ * If this is an integer field, i.e. 1, 2, 4 or 8 bytes long,
+ you can convert it to the number contained, according to
+ the file's endianness, by simply casting it to a long:
+
+ field = event['myint']
+ value = long(field)
+
+ * You can access the field's data, as field.data, and if the
+ data is really a "__data_loc" type that will be resolved
+ automatically. (If you don't know what this means, don't
+ worry about it and just use field.data)
+
+
+This is it. It's pretty simple. A fully-featured plugin could
+look like this:
+
+--- %< ---
+def my_event_handler(trace_seq, event):
+ trace_seq.puts("myev: %u", long(event['myfield']))
+
+def register(pevent):
+ pevent.register_event_handler("subsys", "event_name",
+ my_event_handler)
+--- >% ---
+
+
+ Tips and tricks
+-----------------
+
+Be familiar with the struct module and use it, always
+checking endianness and potentially using pevent.file_endian.
+
+
+If you need access to pevent in your callbacks, simply
+pass it in yourself:
+
+--- %< ---
+def my_event_handler(pevent, trace_seq, event):
+ pass
+
+def register(pevent):
+ pevent.register_event_handler("subsys", "event_name",
+ lambda *args: my_event_handler(pevent, *args)
+ )
+--- >% ---
diff --git a/Documentation/asciidoc.conf b/Documentation/asciidoc.conf
new file mode 100644
index 00000000..c15aa13b
--- /dev/null
+++ b/Documentation/asciidoc.conf
@@ -0,0 +1,120 @@
+## linktep: macro
+#
+# Usage: linktep:command[manpage-section]
+#
+# Note, {0} is the manpage section, while {target} is the command.
+#
+# Show TEP link as: <command>(<section>); if section is defined, else just show
+# the command.
+
+[macros]
+(?su)[\\]?(?P<name>linktep):(?P<target>\S*?)\[(?P<attrlist>.*?)\]=
+
+[attributes]
+asterisk=&#42;
+plus=&#43;
+caret=&#94;
+startsb=&#91;
+endsb=&#93;
+tilde=&#126;
+
+ifdef::backend-docbook[]
+[linktep-inlinemacro]
+{0%{target}}
+{0#<citerefentry>}
+{0#<refentrytitle>{target}</refentrytitle><manvolnum>{0}</manvolnum>}
+{0#</citerefentry>}
+endif::backend-docbook[]
+
+ifdef::backend-docbook[]
+ifndef::tep-asciidoc-no-roff[]
+# "unbreak" docbook-xsl v1.68 for manpages. v1.69 works with or without this.
+# v1.72 breaks with this because it replaces dots not in roff requests.
+[listingblock]
+<example><title>{title}</title>
+<literallayout>
+ifdef::doctype-manpage[]
+&#10;.ft C&#10;
+endif::doctype-manpage[]
+|
+ifdef::doctype-manpage[]
+&#10;.ft&#10;
+endif::doctype-manpage[]
+</literallayout>
+{title#}</example>
+endif::tep-asciidoc-no-roff[]
+
+ifdef::tep-asciidoc-no-roff[]
+ifdef::doctype-manpage[]
+# The following two small workarounds insert a simple paragraph after screen
+[listingblock]
+<example><title>{title}</title>
+<literallayout>
+|
+</literallayout><simpara></simpara>
+{title#}</example>
+
+[verseblock]
+<formalpara{id? id="{id}"}><title>{title}</title><para>
+{title%}<literallayout{id? id="{id}"}>
+{title#}<literallayout>
+|
+</literallayout>
+{title#}</para></formalpara>
+{title%}<simpara></simpara>
+endif::doctype-manpage[]
+endif::tep-asciidoc-no-roff[]
+endif::backend-docbook[]
+
+ifdef::doctype-manpage[]
+ifdef::backend-docbook[]
+[header]
+template::[header-declarations]
+<refentry>
+<refmeta>
+<refentrytitle>{mantitle}</refentrytitle>
+<manvolnum>{manvolnum}</manvolnum>
+<refmiscinfo class="source">libtracefs</refmiscinfo>
+<refmiscinfo class="version">{libtracefs_version}</refmiscinfo>
+<refmiscinfo class="manual">libtracefs Manual</refmiscinfo>
+</refmeta>
+<refnamediv>
+ <refname>{manname1}</refname>
+ <refname>{manname2}</refname>
+ <refname>{manname3}</refname>
+ <refname>{manname4}</refname>
+ <refname>{manname5}</refname>
+ <refname>{manname6}</refname>
+ <refname>{manname7}</refname>
+ <refname>{manname8}</refname>
+ <refname>{manname9}</refname>
+ <refname>{manname10}</refname>
+ <refname>{manname11}</refname>
+ <refname>{manname12}</refname>
+ <refname>{manname13}</refname>
+ <refname>{manname14}</refname>
+ <refname>{manname15}</refname>
+ <refname>{manname16}</refname>
+ <refname>{manname17}</refname>
+ <refname>{manname18}</refname>
+ <refname>{manname19}</refname>
+ <refname>{manname20}</refname>
+ <refname>{manname21}</refname>
+ <refname>{manname22}</refname>
+ <refname>{manname23}</refname>
+ <refname>{manname24}</refname>
+ <refname>{manname25}</refname>
+ <refname>{manname26}</refname>
+ <refname>{manname27}</refname>
+ <refname>{manname28}</refname>
+ <refname>{manname29}</refname>
+ <refname>{manname30}</refname>
+ <refpurpose>{manpurpose}</refpurpose>
+</refnamediv>
+endif::backend-docbook[]
+endif::doctype-manpage[]
+
+ifdef::backend-xhtml11[]
+[linktep-inlinemacro]
+<a href="{target}.html">{target}{0?({0})}</a>
+endif::backend-xhtml11[]
diff --git a/Documentation/libtracecmd/Makefile b/Documentation/libtracecmd/Makefile
new file mode 100644
index 00000000..48334525
--- /dev/null
+++ b/Documentation/libtracecmd/Makefile
@@ -0,0 +1,112 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# Include the utils
+include $(src)/scripts/utils.mk
+
+# This Makefile and manpage XSL files were taken from libtracefs
+# and modified for libtracecmd
+
+MAN3_TXT= \
+ $(wildcard libtracecmd-*.txt) \
+ libtracecmd.txt
+
+MAN_TXT = $(MAN3_TXT)
+_MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT))
+_MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT))
+_DOC_MAN3=$(patsubst %.txt,%.m,$(MAN3_TXT))
+
+MAN_XML=$(addprefix $(OUTPUT),$(_MAN_XML))
+MAN_HTML=$(addprefix $(OUTPUT),$(_MAN_HTML))
+DOC_MAN3=$(addprefix $(OUTPUT),$(_DOC_MAN3))
+
+# Make the path relative to DESTDIR, not prefix
+ifndef DESTDIR
+prefix?=$(HOME)
+endif
+bindir?=$(prefix)/bin
+htmldir?=$(prefix)/share/doc/libtracecmd-doc
+pdfdir?=$(prefix)/share/doc/libtracecmd-doc
+mandir?=$(prefix)/share/man
+man3dir=$(mandir)/man3
+
+ifdef USE_ASCIIDOCTOR
+ASCIIDOC_EXTRA += -a mansource="libtracecmd" -a manmanual="libtracecmd Manual"
+endif
+
+all: check-man-tools html man
+
+man: man3
+man3: $(DOC_MAN3)
+
+html: $(MAN_HTML)
+
+$(MAN_HTML) $(DOC_MAN3): $(ASCIIDOC_CONF)
+
+install: check-man-tools install-man install-html
+
+check-man-tools:
+ifdef missing_tools
+ $(error "You need to install $(missing_tools) for man pages")
+endif
+
+install-%.3: $(OUTPUT)%.3
+ $(Q)$(call do_install_docs,$<,$(man3dir),644);
+
+do-install-man: man $(addprefix install-,$(wildcard $(OUTPUT)*.3))
+
+install-man: man
+ $(Q)$(MAKE) -C . do-install-man
+
+install-%.txt: $(OUTPUT)%.html
+ $(Q)$(call do_install_docs,$<,$(htmldir),644);
+
+do-install-html: html $(addprefix install-,$(wildcard *.txt))
+
+install-html: html do-install-html
+
+uninstall: uninstall-man uninstall-html
+
+uninstall-man:
+ $(Q)$(RM) $(addprefix $(DESTDIR)$(man3dir)/,$(DOC_MAN3))
+
+uninstall-html:
+ $(Q)$(RM) $(addprefix $(DESTDIR)$(htmldir)/,$(MAN_HTML))
+
+ifdef missing_tools
+ DO_INSTALL_MAN = $(warning Please install $(missing_tools) to have the man pages installed)
+else
+ DO_INSTALL_MAN = do-install-man
+endif
+
+CLEAN_FILES = \
+ $(MAN_XML) $(addsuffix +,$(MAN_XML)) \
+ $(MAN_HTML) $(addsuffix +,$(MAN_HTML)) \
+ $(DOC_MAN3) *.3 *.m
+
+clean:
+ $(Q) $(RM) $(CLEAN_FILES)
+
+ifdef USE_ASCIIDOCTOR
+$(OUTPUT)%.m : $(OUTPUT)%.txt
+ $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
+ $(ASCIIDOC) -b manpage -d manpage \
+ $(ASCIIDOC_EXTRA) -alibtracecmd_version=$(LIBTRACECMD_VERSION) -o $@+ $< && \
+ mv $@+ $@
+endif
+
+$(OUTPUT)%.m : $(OUTPUT)%.xml
+ $(QUIET_XMLTO)$(RM) $@ && \
+ $(XMLTO) -o $(OUTPUT). -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<; \
+ touch $@
+
+$(OUTPUT)%.xml : %.txt
+ $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
+ $(ASCIIDOC) -b docbook -d manpage \
+ $(ASCIIDOC_EXTRA) -alibtracecmd_version=$(LIBTRACECMD_VERSION) -o $@+ $< && \
+ mv $@+ $@
+
+$(MAN_HTML): $(OUTPUT)%.html : %.txt
+ $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
+ $(ASCIIDOC) -b $(ASCIIDOC_HTML) -d manpage \
+ $(ASCIIDOC_EXTRA) -alibtracecmd_version=$(LIBTRACECMD_VERSION) -o $@+ $< && \
+ mv $@+ $@
diff --git a/Documentation/libtracecmd/libtracecmd-files.txt b/Documentation/libtracecmd/libtracecmd-files.txt
new file mode 100644
index 00000000..2de5d6df
--- /dev/null
+++ b/Documentation/libtracecmd/libtracecmd-files.txt
@@ -0,0 +1,169 @@
+libtracecmd(3)
+=============
+
+NAME
+----
+tracecmd_open, tracecmd_open_fd, tracecmd_open_head, tracecmd_init_data,
+tracecmd_close - Open and close a trace file.
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <trace-cmd.h>*
+
+struct tracecmd_input pass:[*]*tracecmd_open*(const char pass:[*]_file_, int _flags_);
+struct tracecmd_input pass:[*]*tracecmd_open_fd*(int _fd_, int _flags_);
+struct tracecmd_input pass:[*]*tracecmd_open_head*(const char pass:[*]_file_, int _flags_);
+int *tracecmd_init_data*(struct tracecmd_input pass:[*]_handle_);
+void *tracecmd_close*(struct tracecmd_input pass:[*]_handle_);
+--
+
+DESCRIPTION
+-----------
+This set of APIs can be used to open and close a trace file recorded by
+_trace-cmd(1)_ and containing tracing information from ftrace, the official
+Linux kernel tracer. The opened file is represented by a _tracecmd_input_
+structure, all other library APIs that work with the file require a pointer
+to the structure. The APIs for opening a trace file have a _flag_ input
+parameter, which controls how the file will be opened and parsed. The _flag_
+is a combination of these options:
+
+ TRACECMD_FL_LOAD_NO_PLUGINS - Do not load any plugins
+ TRACECMD_FL_LOAD_NO_SYSTEM_PLUGINS - Do not load system wide plugins, load only "local only"
+ plugins from user's home directory.
+
+The _tracecmd_open()_ function opens a given trace _file_, parses the
+metadata headers from the file, allocates and initializes а _tracecmd_input_
+handler structure representing the file. It also initializes the handler
+for reading trace data from the file. The returned handler is ready to be
+used with _tracecmd_read__ APIs.
+
+The _tracecmd_open_fd()_ function does the same as _tracecmd_open()_, but
+works with a file descriptor to a trace file, opened for reading.
+
+The _tracecmd_open_head()_ function is the same as _tracecmd_open()_, but
+does not initialize the handler for reading trace data. It reads and parses
+the metadata headers only. The _tracecmd_init_data()_ should be used before
+using the _tracecmd_read__ APIs.
+
+The _tracecmd_init_data()_ function initializes a _handle_, allocated with
+_tracecmd_open_head()_, for reading trace data from the file associated with
+it. This API must be called before any of the _tracecmd_read__ APIs.
+
+The _tracecmd_close()_ function frees a _handle_, pointer to tracecmd_input
+structure, previously allocated with _tracecmd_open()_, _tracecmd_open_fd()_
+or _tracecmd_open_head()_ APIs.
+
+RETURN VALUE
+------------
+The _tracecmd_open()_, _tracecmd_open_fd()_ and _tracecmd_open_head()_
+functions return a pointer to tracecmd_input structure or NULL in case of
+an error. The returned structure must be free with _tracecmd_close()_.
+Note that if _tracecmd_open_fd()_ is used to allocate a tracecmd_input handler,
+when _tracecmd_close()_ is called to close it, that fd will be closed also.
+
+The _tracecmd_init_data()_ function returns -1 in case of an error or
+0 otherwise.
+
+EXAMPLE
+-------
+[source,c]
+--
+The are two different use patterns for opening and reading trace data from
+a trace file, which can be used depending on the use case.
+
+1. Open and initialise the trace file in а single step:
+
+#include <trace-cmd.h>
+...
+struct tracecmd_input *handle = tracecmd_open("trace.dat");
+ if (!handle) {
+ /* Failed to open trace.dat file */
+ }
+...
+ /* Read tracing data from the file, using the handle */
+...
+ tracecmd_close(handle);
+...
+int fd;
+ fd = = open("trace.dat", O_RDONLY);
+ if (fd < 0) {
+ /* Failed to open trace file for reading */
+ }
+ handle = tracecmd_open_fd(fd);
+ if (!handle) {
+ close(fd);
+ /* Failed to initialise handler for reading the trace file */
+ }
+...
+ /* Read tracing data from the file, using the handle */
+...
+ tracecmd_close(handle);
+...
+
+2. Open and initialise the trace file in two steps. This allows to perform
+some processing based on metadata, read from the file, before initialising
+the trace data for reading. Example for such use case is when opening multiple
+trace files recorded in a same trace session. In that case timestamps of all
+trace events must be adjusted based on the information from the file's metadata
+and before reading the trace data.
+
+#include <trace-cmd.h>
+...
+struct tracecmd_input *handle = tracecmd_open_head("trace.dat");
+ if (!handle) {
+ /* Failed to open trace.dat file */
+ }
+...
+ /* do some processing, before initialising the trace data for reading */
+...
+ if (tracecmd_init_data(handle) < 0) {
+ /* Failed to initialize hadle for reading the trace data */
+ }
+...
+ /* Read tracing data from the file, using the handle */
+...
+ tracecmd_close(handle);
+...
+--
+FILES
+-----
+[verse]
+--
+*trace-cmd.h*
+ Header file to include in order to have access to the library APIs.
+*-ltracecmd*
+ Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtracefs(3)_,
+_libtraceevent(3)_,
+_trace-cmd(1)_
+_trace-cmd.dat(5)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>
+--
+REPORTING BUGS
+--------------
+Report bugs to <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtracecmd is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/
+
+COPYING
+-------
+Copyright \(C) 2020 VMware, Inc. Free use of this software is granted under
+the terms of the GNU Public License (GPL).
diff --git a/Documentation/libtracecmd/libtracecmd-instances.txt b/Documentation/libtracecmd/libtracecmd-instances.txt
new file mode 100644
index 00000000..df8fdc4e
--- /dev/null
+++ b/Documentation/libtracecmd/libtracecmd-instances.txt
@@ -0,0 +1,129 @@
+libtracecmd(3)
+=============
+
+NAME
+----
+tracecmd_buffer_instances, tracecmd_buffer_instance_name, tracecmd_buffer_instance_handle
+- Read tracing instances from a trace file.
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <trace-cmd.h>*
+
+int *tracecmd_buffer_instances*(struct tracecmd_input pass:[*]_handle_);
+const char pass:[*]*tracecmd_buffer_instance_name*(struct tracecmd_input pass:[*]_handle_, int _indx_);
+struct tracecmd_input pass:[*]*tracecmd_buffer_instance_handle*(struct tracecmd_input pass:[*]_handle_, int _indx_);
+--
+
+DESCRIPTION
+-----------
+This set of APIs can be used to get information and read tracing data
+from tracing instances stored in a trace file.
+
+The _tracecmd_buffer_instances()_ function gets the number of tracing
+instances recorded in a trace file. The top instance is not counted.
+The _handle_ is a tracecmd_input handler returned by
+_tracecmd_open_head()_.
+
+The _tracecmd_buffer_instance_name()_ function gets the name of the
+tracing instance with given index _indx_, recorded in a trace file.
+The _indx_ is a number in the interval [0 .. count-1], where count
+is the number returned by _tracecmd_buffer_instances()_. The _handle_
+is a tracecmd_input handler returned by _tracecmd_open_head()_.
+
+The _tracecmd_buffer_instance_handle()_ allocates and initializes a
+tracecmd_input handle, associated with trace instance with index
+_indx_ from a trace file. The _handle_ is a tracecmd_input handler
+returned by _tracecmd_open_head()_. The _indx_ is a number in the
+interval [0 .. count-1], where count is the number returned by
+_tracecmd_buffer_instances()_.
+
+RETURN VALUE
+------------
+The _tracecmd_buffer_instances()_ function returns the number of tracing
+instances recorded in a trace file.
+
+The _tracecmd_buffer_instance_name()_ function returns a string, the name
+of a tracing instance, or NULL in case of an error The string must *not*
+be freed.
+
+The _tracecmd_buffer_instance_handle()_ function returns a pointer to
+newly allocated tracecmd_input handler or NULL in case if an error. The
+returned handler must be closed by _tracecmd_close()(3)_
+
+EXAMPLE
+-------
+[source,c]
+--
+#include <trace-cmd.h>
+...
+struct tracecmd_input *handle = tracecmd_open_head("trace.dat");
+ if (!handle) {
+ /* Failed to open trace.dat file */
+ }
+...
+int num = tracecmd_buffer_instances(handle);
+
+ while(num) {
+ struct tracecmd_input *h;
+ char *name;
+
+ name = tracecmd_buffer_instance_name(handle, num);
+ if (!name) {
+ /* Failed to get name of instance num */
+ }
+ h = tracecmd_buffer_instance_handle(handle, num);
+ if (!h) {
+ /* Failed to initialize handler for instance num */
+ }
+
+ ...
+ tracecmd_close(h);
+ num--;
+ }
+...
+ tracecmd_close(handle);
+
+--
+FILES
+-----
+[verse]
+--
+*trace-cmd.h*
+ Header file to include in order to have access to the library APIs.
+*-ltracecmd*
+ Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtracefs(3)_,
+_libtraceevent(3)_,
+_trace-cmd(1)_
+_trace-cmd.dat(5)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>
+--
+REPORTING BUGS
+--------------
+Report bugs to <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtracecmd is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/
+
+COPYING
+-------
+Copyright \(C) 2020 VMware, Inc. Free use of this software is granted under
+the terms of the GNU Public License (GPL).
diff --git a/Documentation/libtracecmd/libtracecmd-log.txt b/Documentation/libtracecmd/libtracecmd-log.txt
new file mode 100644
index 00000000..de5c2776
--- /dev/null
+++ b/Documentation/libtracecmd/libtracecmd-log.txt
@@ -0,0 +1,78 @@
+libtracecmd(3)
+=============
+
+NAME
+----
+tracecmd_set_loglevel - Set log level of the library
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <trace-cmd.h>*
+
+int *tracecmd_set_loglevel*(enum tep_loglevel _level_);
+--
+
+DESCRIPTION
+-----------
+The _tracecmd_set_loglevel()_ function sets the level of the library logs that will be printed on
+the console. See _libtraceevent(3)_ for detailed desciription of the log levels. Setting the log
+level to specific value means that logs from the previous levels will be printed too. For example
+_TEP_LOG_WARNING_ will print any logs with severity _TEP_LOG_WARNING_, _TEP_LOG_ERROR_ and
+_TEP_LOG_CRITICAL_. The default log level is _TEP_LOG_CRITICAL_. When a new level is set, it is
+also propagated to the libtracefs and libtraceevent.
+
+EXAMPLE
+-------
+[source,c]
+--
+#include <trace-cmd.h>
+...
+tracecmd_set_loglevel(TEP_LOG_ALL);
+...
+/* call libtracecmd, libtracefs or libtraceevent APIs and observe any logs they produce */
+...
+tracecmd_set_loglevel(TEP_LOG_CRITICAL);
+--
+
+FILES
+-----
+[verse]
+--
+*trace-cmd.h*
+ Header file to include in order to have access to the library APIs.
+*-ltracecmd*
+ Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtracefs(3)_,
+_libtraceevent(3)_,
+_trace-cmd(1)_
+_trace-cmd.dat(5)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>
+--
+REPORTING BUGS
+--------------
+Report bugs to <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtracecmd is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/
+
+COPYING
+-------
+Copyright \(C) 2021 VMware, Inc. Free use of this software is granted under
+the terms of the GNU Public License (GPL).
diff --git a/Documentation/libtracecmd/libtracecmd-peer.txt b/Documentation/libtracecmd/libtracecmd-peer.txt
new file mode 100644
index 00000000..2e3232c6
--- /dev/null
+++ b/Documentation/libtracecmd/libtracecmd-peer.txt
@@ -0,0 +1,137 @@
+libtracecmd(3)
+=============
+
+NAME
+----
+tracecmd_get_traceid, tracecmd_get_guest_cpumap - Manage trace session with multiple trace peers,
+recorded in multiple trace files.
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <trace-cmd.h>*
+
+unsigned long long *tracecmd_get_traceid*(struct tracecmd_input pass:[*]_handle_);
+int *tracecmd_get_guest_cpumap*(struct tracecmd_input pass:[*]_handle_, unsigned long long _trace_id_, const char pass:[*]pass:[*]_name_, int pass:[*]_vcpu_count_, const int pass:[*]pass:[*]_cpu_pid_);
+--
+
+DESCRIPTION
+-----------
+This set of APIs can be used to manage a trace session with multiple trace
+peers, for example, tracing both a host and one or more guest virtual machines.
+The trace data of each peer from the session is recorded in separate trace files.
+Information about peers from the session is stored in the metadata of each
+trace file. These APIs use that information to extract and synchronize
+the trace data.
+
+The _tracecmd_get_traceid()_ function returns the trace ID stored in
+the trace file metadata associated with _handle_. Each peer from a trace
+session has an ID unique for that peer and that trace session only.
+This ID is used to match multiple trace files recorded in a same trace
+session.
+
+The _tracecmd_get_guest_cpumap()_ function gets the mapping of guest
+virtual CPUs (VCPU) to the host process that represents those VCPUs and is
+stored in the metadata of the trace file associated with _handle_. This
+information is gathered during a host-guest trace session and is stored
+in the host trace file. The _trace_id_ parameter is the trace ID of the guest
+in this particular trace session. If a guest with that ID was part of that
+session, its VCPU to host process mapping is in the host trace file and the
+information is returned in _name_, _vcpu_count_ and _cpu_pid_ parameters.
+The _name_ parameter contains the name of the guest, the _vcpu_count_ contains
+the count of VCPUs of that guest and the _cpu_pid_ array contains the VCPU to
+host process mapping. The array is of size _vcpu_count_ where the index is VCPU
+and the value is the process ID (PID) of the host process, running that VCPU.
+The _name_, _vcpu_count_ and _cpu_pid_ values must *not* be freed.
+
+RETURN VALUE
+------------
+The _tracecmd_get_traceid()_ function returns a 64 bit trace ID.
+
+The _tracecmd_get_guest_cpumap()_ function returns -1 in case of
+an error or 0 otherwise. If 0 is returned, then the _name_, _vcpu_count_
+and _cpu_pid_ parameters contain the requested information.
+
+EXAMPLE
+-------
+[source,c]
+--
+#include <trace-cmd.h>
+...
+struct tracecmd_input *host = tracecmd_open("trace.dat");
+ if (!host) {
+ /* Failed to open host trace file */
+ }
+
+struct tracecmd_input *guest1 = tracecmd_open_head("trace-Guest1.dat");
+ if (!guest1) {
+ /* Failed to open guest1 trace file */
+ }
+struct tracecmd_input *guest2 = tracecmd_open_head("trace-Guest2.dat");
+ if (!guest2) {
+ /* Failed to open guest2 trace file */
+ }
+
+unsigned long long guest_id_1 = tracecmd_get_traceid(guest1);
+unsigned long long guest_id_2 = tracecmd_get_traceid(guest2);
+int *cpu_pid_1, *cpu_pid_2;
+int vcount_1, vcount_2;
+char *name_1, *name_2;
+
+ if (!tracecmd_get_guest_cpumap(host, guest_id_1, &name_1, &vcount_1, &cpu_pid_1)) {
+ /* The Host and a guest1 with name_1 are part of the same trace session.
+ * Got guest1 VCPU to host PID mapping.
+ */
+ }
+ if (!tracecmd_get_guest_cpumap(host, guest_id_2, &name_2, &vcount_2, &cpu_pid_2)) {
+ /* The Host and a guest2 with name_2 are part of the same trace session.
+ * Got guest2 VCPU to host PID mapping.
+ */
+ }
+...
+ tracecmd_close(guest1);
+ tracecmd_close(guest2);
+ tracecmd_close(handle);
+
+--
+FILES
+-----
+[verse]
+--
+*trace-cmd.h*
+ Header file to include in order to have access to the library APIs.
+*-ltracecmd*
+ Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtracefs(3)_,
+_libtraceevent(3)_,
+_trace-cmd(1)_
+_trace-cmd.dat(5)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>
+--
+REPORTING BUGS
+--------------
+Report bugs to <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtracecmd is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/
+
+COPYING
+-------
+Copyright \(C) 2020 VMware, Inc. Free use of this software is granted under
+the terms of the GNU Public License (GPL).
diff --git a/Documentation/libtracecmd/libtracecmd-record.txt b/Documentation/libtracecmd/libtracecmd-record.txt
new file mode 100644
index 00000000..aa1a4a66
--- /dev/null
+++ b/Documentation/libtracecmd/libtracecmd-record.txt
@@ -0,0 +1,138 @@
+libtracecmd(3)
+=============
+
+NAME
+----
+tracecmd_read_cpu_first, tracecmd_read_data, tracecmd_read_at,
+tracecmd_free_record, tracecmd_get_tep - Read recorded events from a trace file.
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <trace-cmd.h>*
+
+struct tep_record pass:[*]*tracecmd_read_cpu_first*(struct tracecmd_input pass:[*]_handle_, int _cpu_);
+struct tep_record pass:[*]*tracecmd_read_data*(struct tracecmd_input pass:[*]_handle_, int _cpu_);
+struct tep_record pass:[*]*tracecmd_read_at*(struct tracecmd_input pass:[*]_handle_, unsigned long long _offset_, int pass:[*]_cpu_);
+void *tracecmd_free_record*(struct tep_record pass:[*]_record_);
+struct tep_handle pass:[*]*tracecmd_get_tep*(struct tracecmd_input pass:[*]_handle_);
+--
+
+DESCRIPTION
+-----------
+This set of APIs can be used to read tracing data from a trace file opened
+with _tracecmd_open()(3)_, _tracecmd_open_fd()(3)_ or _tracecmd_open_head()(3)_.
+
+The _tracecmd_read_cpu_first()_ function reads the first trace record
+for a given _cpu_ from a trace file associated with _handle_. The returned
+record must be freed with _tracecmd_free_record()_.
+
+The _tracecmd_read_data()_ function reads the next trace record for
+a given _cpu_ from a trace file associated with _handle_ and increments
+the read location pointer, so that the next call to _tracecmd_read_data()_
+will not read the same record again. The returned record must be freed
+with _tracecmd_free_record()_.
+
+The _tracecmd_read_at()_ function reads a trace record from a specific
+_offset_ within the file associated with _handle_. The CPU on which the
+recorded event occurred is stored in the _cpu_. The function does not
+change the current read location pointer. The returned record must be
+freed with _tracecmd_free_record()_.
+
+The _tracecmd_free_record()_ function frees a _record_ returned by any
+of the _tracecmd_read__ APIs.
+
+The _tracecmd_get_tep()_ function returns a tep context for a given
+_handle_.
+
+RETURN VALUE
+------------
+The _tracecmd_read_cpu_first()_, _tracecmd_read_data()_ and
+_tracecmd_read_at()_ functions return a pointer to struct tep_record or
+NULL in case of an error.The returned record must be freed with
+_tracecmd_free_record()_.
+
+The _tracecmd_get_tep()_ function returns a pointer to tep context or
+NULL if there is no tep context for the given _handle_. The returned
+tep pointer must *not* be freed.
+
+EXAMPLE
+-------
+[source,c]
+--
+#include <trace-cmd.h>
+...
+struct tracecmd_input *handle = tracecmd_open("trace.dat");
+ if (!handle) {
+ /* Failed to open trace.dat file */
+ }
+...
+unsigned long long offset = 0;
+struct tep_record *rec;
+int cpu = 0;
+ rec = tracecmd_read_cpu_first(handle, cpu);
+ while (rec) {
+ ...
+ if ( /* some interesting record noticed */) {
+ /* store the offset of the interesting record */
+ offset = rec->offset;
+ }
+ ...
+ tracecmd_free_record(rec);
+ rec = tracecmd_read_data(handle, cpu);
+ }
+ ...
+ if (offset) {
+ rec = tracecmd_read_at(handle, offset, &cpu);
+ if (rec) {
+ /* Got record at offset on cpu */
+ ...
+ tracecmd_free_record(rec);
+ }
+ }
+
+...
+ tracecmd_close(hadle);
+
+--
+FILES
+-----
+[verse]
+--
+*trace-cmd.h*
+ Header file to include in order to have access to the library APIs.
+*-ltracecmd*
+ Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtracefs(3)_,
+_libtraceevent(3)_,
+_trace-cmd(1)_
+_trace-cmd.dat(5)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>
+--
+REPORTING BUGS
+--------------
+Report bugs to <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtracecmd is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/
+
+COPYING
+-------
+Copyright \(C) 2020 VMware, Inc. Free use of this software is granted under
+the terms of the GNU Public License (GPL).
diff --git a/Documentation/libtracecmd/libtracecmd.txt b/Documentation/libtracecmd/libtracecmd.txt
new file mode 100644
index 00000000..dc528ce0
--- /dev/null
+++ b/Documentation/libtracecmd/libtracecmd.txt
@@ -0,0 +1,86 @@
+libtracecmd(3)
+=============
+
+NAME
+----
+libtracecmd - trace-cmd library APIs
+
+SYNOPSIS
+--------
+[verse]
+--
+*#include <trace-cmd.h>*
+
+Open and close trace file:
+ struct tracecmd_input pass:[*]*tracecmd_open*(const char pass:[*]_file_, int _flags_);
+ struct tracecmd_input pass:[*]*tracecmd_open_fd*(int _fd_, int _flags_);
+ struct tracecmd_input pass:[*]*tracecmd_open_head*(const char pass:[*]_file_, int _flags_);
+ void *tracecmd_close*(struct tracecmd_input pass:[*]_handle_);
+
+Read tracing records from a trace file:
+ int *tracecmd_init_data*(struct tracecmd_input pass:[*]_handle_);
+ struct tep_record pass:[*]*tracecmd_read_cpu_first*(struct tracecmd_input pass:[*]_handle_, int _cpu_);
+ struct tep_record pass:[*]*tracecmd_read_data*(struct tracecmd_input pass:[*]_handle_, int _cpu_);
+ struct tep_record pass:[*]*tracecmd_read_at*(struct tracecmd_input pass:[*]_handle_, unsigned long long _offset_, int pass:[*]_cpu_);
+ void *tracecmd_free_record*(struct tep_record pass:[*]_record_);
+ struct tep_handle pass:[*]*tracecmd_get_tep*(struct tracecmd_input pass:[*]_handle_);
+
+Read tracing instances from a trace file:
+ int *tracecmd_buffer_instances*(struct tracecmd_input pass:[*]_handle_);
+ const char pass:[*]*tracecmd_buffer_instance_name*(struct tracecmd_input pass:[*]_handle_, int _indx_);
+ struct tracecmd_input pass:[*]*tracecmd_buffer_instance_handle*(struct tracecmd_input pass:[*]_handle_, int _indx_);
+
+Get traceing peer information from a trace file:
+ unsigned long long *tracecmd_get_traceid*(struct tracecmd_input pass:[*]_handle_);
+ int *tracecmd_get_guest_cpumap*(struct tracecmd_input pass:[*]_handle_, unsigned long long _trace_id_, const char pass:[*]pass:[*]_name_, int pass:[*]_vcpu_count_, const int pass:[*]pass:[*]_cpu_pid_);
+
+Control library logs:
+ int *tracecmd_set_loglevel*(enum tep_loglevel _level_);
+--
+
+DESCRIPTION
+-----------
+The libtracecmd(3) library provides APIs to read, parse and write
+_trace-cmd.dat(5)_ files, recorded with _trace-cmd(1)_ application and containing
+tracing information from ftrace, the official Linux kernel tracer.
+
+FILES
+-----
+[verse]
+--
+*trace-cmd.h*
+ Header file to include in order to have access to the library APIs.
+*-ltracecmd*
+ Linker switch to add when building a program that uses the library.
+--
+
+SEE ALSO
+--------
+_libtraceevent(3)_
+_libtracefs(3)_
+_trace-cmd(1)_
+_trace-cmd.dat(5)_
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>
+--
+REPORTING BUGS
+--------------
+Report bugs to <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+libtracecmd is Free Software licensed under the GNU LGPL 2.1
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/
+
+COPYING
+-------
+Copyright \(C) 2020 VMware, Inc. Free use of this software is granted under
+the terms of the GNU Public License (GPL).
diff --git a/Documentation/manpage-1.72.xsl b/Documentation/manpage-1.72.xsl
new file mode 100644
index 00000000..b4d315cb
--- /dev/null
+++ b/Documentation/manpage-1.72.xsl
@@ -0,0 +1,14 @@
+<!-- manpage-1.72.xsl:
+ special settings for manpages rendered from asciidoc+docbook
+ handles peculiarities in docbook-xsl 1.72.0 -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ version="1.0">
+
+<xsl:import href="manpage-base.xsl"/>
+
+<!-- these are the special values for the roff control characters
+ needed for docbook-xsl 1.72.0 -->
+<xsl:param name="git.docbook.backslash">&#x2593;</xsl:param>
+<xsl:param name="git.docbook.dot" >&#x2302;</xsl:param>
+
+</xsl:stylesheet>
diff --git a/Documentation/manpage-base.xsl b/Documentation/manpage-base.xsl
new file mode 100644
index 00000000..a264fa61
--- /dev/null
+++ b/Documentation/manpage-base.xsl
@@ -0,0 +1,35 @@
+<!-- manpage-base.xsl:
+ special formatting for manpages rendered from asciidoc+docbook -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ version="1.0">
+
+<!-- these params silence some output from xmlto -->
+<xsl:param name="man.output.quietly" select="1"/>
+<xsl:param name="refentry.meta.get.quietly" select="1"/>
+
+<!-- convert asciidoc callouts to man page format;
+ git.docbook.backslash and git.docbook.dot params
+ must be supplied by another XSL file or other means -->
+<xsl:template match="co">
+ <xsl:value-of select="concat(
+ $git.docbook.backslash,'fB(',
+ substring-after(@id,'-'),')',
+ $git.docbook.backslash,'fR')"/>
+</xsl:template>
+<xsl:template match="calloutlist">
+ <xsl:value-of select="$git.docbook.dot"/>
+ <xsl:text>sp&#10;</xsl:text>
+ <xsl:apply-templates/>
+ <xsl:text>&#10;</xsl:text>
+</xsl:template>
+<xsl:template match="callout">
+ <xsl:value-of select="concat(
+ $git.docbook.backslash,'fB',
+ substring-after(@arearefs,'-'),
+ '. ',$git.docbook.backslash,'fR')"/>
+ <xsl:apply-templates/>
+ <xsl:value-of select="$git.docbook.dot"/>
+ <xsl:text>br&#10;</xsl:text>
+</xsl:template>
+
+</xsl:stylesheet>
diff --git a/Documentation/manpage-bold-literal.xsl b/Documentation/manpage-bold-literal.xsl
new file mode 100644
index 00000000..608eb5df
--- /dev/null
+++ b/Documentation/manpage-bold-literal.xsl
@@ -0,0 +1,17 @@
+<!-- manpage-bold-literal.xsl:
+ special formatting for manpages rendered from asciidoc+docbook -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ version="1.0">
+
+<!-- render literal text as bold (instead of plain or monospace);
+ this makes literal text easier to distinguish in manpages
+ viewed on a tty -->
+<xsl:template match="literal">
+ <xsl:value-of select="$git.docbook.backslash"/>
+ <xsl:text>fB</xsl:text>
+ <xsl:apply-templates/>
+ <xsl:value-of select="$git.docbook.backslash"/>
+ <xsl:text>fR</xsl:text>
+</xsl:template>
+
+</xsl:stylesheet>
diff --git a/Documentation/manpage-normal.xsl b/Documentation/manpage-normal.xsl
new file mode 100644
index 00000000..a48f5b11
--- /dev/null
+++ b/Documentation/manpage-normal.xsl
@@ -0,0 +1,13 @@
+<!-- manpage-normal.xsl:
+ special settings for manpages rendered from asciidoc+docbook
+ handles anything we want to keep away from docbook-xsl 1.72.0 -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ version="1.0">
+
+<xsl:import href="manpage-base.xsl"/>
+
+<!-- these are the normal values for the roff control characters -->
+<xsl:param name="git.docbook.backslash">\</xsl:param>
+<xsl:param name="git.docbook.dot" >.</xsl:param>
+
+</xsl:stylesheet>
diff --git a/Documentation/manpage-suppress-sp.xsl b/Documentation/manpage-suppress-sp.xsl
new file mode 100644
index 00000000..a63c7632
--- /dev/null
+++ b/Documentation/manpage-suppress-sp.xsl
@@ -0,0 +1,21 @@
+<!-- manpage-suppress-sp.xsl:
+ special settings for manpages rendered from asciidoc+docbook
+ handles erroneous, inline .sp in manpage output of some
+ versions of docbook-xsl -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ version="1.0">
+
+<!-- attempt to work around spurious .sp at the tail of the line
+ that some versions of docbook stylesheets seem to add -->
+<xsl:template match="simpara">
+ <xsl:variable name="content">
+ <xsl:apply-templates/>
+ </xsl:variable>
+ <xsl:value-of select="normalize-space($content)"/>
+ <xsl:if test="not(ancestor::authorblurb) and
+ not(ancestor::personblurb)">
+ <xsl:text>&#10;&#10;</xsl:text>
+ </xsl:if>
+</xsl:template>
+
+</xsl:stylesheet>
diff --git a/Documentation/trace-cmd/Makefile b/Documentation/trace-cmd/Makefile
new file mode 100644
index 00000000..1568af53
--- /dev/null
+++ b/Documentation/trace-cmd/Makefile
@@ -0,0 +1,132 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# Include the utils
+include $(src)/scripts/utils.mk
+
+# This Makefile and manpage XSL files were taken from libtracefs
+# and modified for libtracecmd
+
+MAN1_TXT= \
+ $(wildcard trace-cmd*.1.txt)
+
+MAN5_TXT= \
+ $(wildcard trace-cmd*.5.txt)
+
+MAN_TXT = $(MAN1_TXT) $(MAN5_TXT)
+_MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT))
+_MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT))
+_DOC_MAN1=$(patsubst %.1.txt,%.1,$(MAN1_TXT))
+_DOC_MAN5=$(patsubst %.5.txt,%.5,$(MAN5_TXT))
+
+MAN_XML=$(addprefix $(OUTPUT),$(_MAN_XML))
+MAN_HTML=$(addprefix $(OUTPUT),$(_MAN_HTML))
+DOC_MAN1=$(addprefix $(OUTPUT),$(_DOC_MAN1))
+DOC_MAN5=$(addprefix $(OUTPUT),$(_DOC_MAN5))
+
+# Make the path relative to DESTDIR, not prefix
+ifndef DESTDIR
+prefix?=$(HOME)
+endif
+bindir?=$(prefix)/bin
+htmldir?=$(prefix)/share/doc/trace-cmd
+pdfdir?=$(prefix)/share/doc/trace-cmd
+mandir?=$(prefix)/share/man
+man1dir=$(mandir)/man1
+man5dir=$(mandir)/man5
+
+ifdef USE_ASCIIDOCTOR
+ASCIIDOC_EXTRA += -a mansource="libtracecmd" -a manmanual="libtracecmd Manual"
+endif
+
+all: check-man-tools html man
+
+man: man1 man5
+man1: $(DOC_MAN1)
+man5: $(DOC_MAN5)
+
+html: $(MAN_HTML)
+
+$(MAN_HTML) $(DOC_MAN1) $(DOC_MAN5): $(ASCIIDOC_CONF)
+
+install: check-man-tools install-man install-html
+
+check-man-tools:
+ifdef missing_tools
+ $(error "You need to install $(missing_tools) for man pages")
+endif
+
+install-%.1: $(OUTPUT)%.1
+ $(Q)$(call do_install_docs,$<,$(man1dir),644);
+
+install-%.5: $(OUTPUT)%.5
+ $(Q)$(call do_install_docs,$<,$(man5dir),644);
+
+do-install-man: man $(addprefix install-,$(wildcard $(OUTPUT)*.1)) \
+ $(addprefix install-,$(wildcard $(OUTPUT)*.5))
+
+install-man: man
+ $(Q)$(MAKE) -C . do-install-man
+
+install-%.txt: $(OUTPUT)%.html
+ $(Q)$(call do_install_docs,$<,$(htmldir),644);
+
+do-install-html: html $(addprefix install-,$(wildcard *.txt))
+
+install-html: html do-install-html
+
+uninstall: uninstall-man uninstall-html
+
+uninstall-man:
+ $(Q)$(RM) $(addprefix $(DESTDIR)$(man1dir)/,$(DOC_MAN1))
+ $(Q)$(RM) $(addprefix $(DESTDIR)$(man5dir)/,$(DOC_MAN5))
+
+uninstall-html:
+ $(Q)$(RM) $(addprefix $(DESTDIR)$(htmldir)/,$(MAN_HTML))
+
+ifdef missing_tools
+ DO_INSTALL_MAN = $(warning Please install $(missing_tools) to have the man pages installed)
+else
+ DO_INSTALL_MAN = do-install-man
+endif
+
+CLEAN_FILES = \
+ $(MAN_XML) $(addsuffix +,$(MAN_XML)) \
+ $(MAN_HTML) $(addsuffix +,$(MAN_HTML)) \
+ $(DOC_MAN1) $(DOC_MAN5) *.1 *.5
+
+clean:
+ $(Q) $(RM) $(CLEAN_FILES)
+
+ifdef USE_ASCIIDOCTOR
+$(OUTPUT)%.1 : $(OUTPUT)%.1.txt
+ $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
+ $(ASCIIDOC) -b manpage -d manpage \
+ $(ASCIIDOC_EXTRA) -atracecmd_version=$(TRACECMD_VERSION) -o $@+ $< && \
+ mv $@+ $@
+
+$(OUTPUT)%.5 : $(OUTPUT)%.5.txt
+ $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
+ $(ASCIIDOC) -b manpage -d manpage \
+ $(ASCIIDOC_EXTRA) -atracecmd_version=$(TRACECMD_VERSION) -o $@+ $< && \
+ mv $@+ $@
+endif
+
+$(OUTPUT)%.1 : $(OUTPUT)%.1.xml
+ $(QUIET_XMLTO)$(RM) $@ && \
+ $(XMLTO) -o $(OUTPUT). -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<; \
+
+$(OUTPUT)%.5 : $(OUTPUT)%.5.xml
+ $(QUIET_XMLTO)$(RM) $@ && \
+ $(XMLTO) -o $(OUTPUT). -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<; \
+
+$(OUTPUT)%.xml : %.txt
+ $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
+ $(ASCIIDOC) -b docbook -d manpage \
+ $(ASCIIDOC_EXTRA) -atracecmd_version=$(TRACECMD_VERSION) -o $@+ $< && \
+ mv $@+ $@
+
+$(MAN_HTML): $(OUTPUT)%.html : %.txt
+ $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
+ $(ASCIIDOC) -b $(ASCIIDOC_HTML) -d manpage \
+ $(ASCIIDOC_EXTRA) -atracecmd_version=$(TRACECMD_VERSION) -o $@+ $< && \
+ mv $@+ $@
diff --git a/Documentation/trace-cmd/trace-cmd-agent.1.txt b/Documentation/trace-cmd/trace-cmd-agent.1.txt
new file mode 100644
index 00000000..f247d41d
--- /dev/null
+++ b/Documentation/trace-cmd/trace-cmd-agent.1.txt
@@ -0,0 +1,62 @@
+TRACE-CMD-AGENT(1)
+==================
+
+NAME
+----
+trace-cmd-agent - Run as an agent on a machine (to be controlled by another machine)
+
+SYNOPSIS
+--------
+*trace-cmd agent* ['OPTIONS']
+
+DESCRIPTION
+-----------
+The trace-cmd(1) agent listens over a vsocket (for virtual machines) or a TCP port
+for connections to control the tracing of the machine. The agent will then start
+tracing on the local machine and pass the data to the controlling connection.
+
+OPTIONS
+-------
+*-N* 'client'::
+ Listen over TCP instead of a vsocket. Must pass in a client host name or IP address
+ to allow connection to. It will only connect to the specified client. Note, any process
+ on that client can control the agent.
+
+ *This is a very insecure setting. Only use on a trusted network*
+ *Only use if the client machine is totally trusted*
+
+*-p* 'port'::
+ This option will specify the port to listen to.
+
+*-D*::
+ This options causes trace-cmd agent to go into a daemon mode and run in
+ the background.
+
+*--verbose*[='level']::
+ Set the log level. Supported log levels are "none", "critical", "error", "warning",
+ "info", "debug", "all" or their identifiers "0", "1", "2", "3", "4", "5", "6". Setting the log
+ level to specific value enables all logs from that and all previous levels.
+ The level will default to "info" if one is not specified.
+
+ Example: enable all critical, error and warning logs
+
+ trace-cmd listen --verbose=warning
+
+SEE ALSO
+--------
+trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1),
+trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1),
+trace-cmd-split(1), trace-cmd-list(1)
+
+AUTHOR
+------
+Written by Steven Rostedt, <rostedt@goodmis.org>
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/
+
+COPYING
+-------
+Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under
+the terms of the GNU Public License (GPL).
diff --git a/Documentation/trace-cmd/trace-cmd-check-events.1.txt b/Documentation/trace-cmd/trace-cmd-check-events.1.txt
new file mode 100644
index 00000000..debab6c5
--- /dev/null
+++ b/Documentation/trace-cmd/trace-cmd-check-events.1.txt
@@ -0,0 +1,55 @@
+TRACE-CMD-CHECK_EVENTS(1)
+=========================
+
+NAME
+----
+trace-cmd-check-events - parse the event formats on local system
+
+SYNOPSIS
+--------
+*trace-cmd check-events* ['OPTIONS']
+
+DESCRIPTION
+-----------
+The trace-cmd(1) check-events parses format strings for all the events on the
+local system. It returns whether all the format strings can be parsed
+correctly. It will load plugins unless specified otherwise.
+
+This is useful to check for any trace event format strings which may contain
+some internal kernel function references which cannot be decoded outside of
+the kernel. This may mean that either the unparsed format strings of the trace
+events need to be changed or that a plugin needs to be created to parse them.
+
+OPTIONS
+-------
+*-N* - Don't load plugins
+
+*--verbose*[='level']::
+ Set the log level. Supported log levels are "none", "critical", "error", "warning",
+ "info", "debug", "all" or their identifiers "0", "1", "2", "3", "4", "5", "6". Setting the log
+ level to specific value enables all logs from that and all previous levels.
+ The level will default to "info" if one is not specified.
+
+ Example: enable all critical, error and warning logs
+
+ trace-cmd check-events --verbose=warning
+
+SEE ALSO
+--------
+trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-stop(1),
+trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1),
+trace-cmd-list(1), trace-cmd-listen(1), trace-cmd-start(1)
+
+AUTHOR
+------
+Written by Vaibhav Nagarnaik, <vnagarnaik@google.com>
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/
+
+COPYING
+-------
+Copyright \(C) 2011 Google, Inc. Free use of this software is granted under
+the terms of the GNU Public License (GPL).
+
diff --git a/Documentation/trace-cmd/trace-cmd-clear.1.txt b/Documentation/trace-cmd/trace-cmd-clear.1.txt
new file mode 100644
index 00000000..74236960
--- /dev/null
+++ b/Documentation/trace-cmd/trace-cmd-clear.1.txt
@@ -0,0 +1,51 @@
+TRACE-CMD-CLEAR(1)
+=================
+
+NAME
+----
+trace-cmd-clear - clear the Ftrace buffer.
+
+SYNOPSIS
+--------
+*trace-cmd clear* ['OPTIONS']
+
+DESCRIPTION
+-----------
+The *trace-cmd(1) clear* clears the content of the Ftrace ring buffer.
+
+OPTIONS
+-------
+*-B* 'buffer-name'::
+ If the kernel supports multiple buffers, this will clear only the given
+ buffer. It does not affect any other buffers. This may be used multiple
+ times to specify different buffers. The top level buffer will not be
+ clearded if this option is given.
+
+*-a*::
+ Clear all existing buffers, including the top level one.
+
+SEE ALSO
+--------
+trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1),
+trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1),
+trace-cmd-list(1), trace-cmd-listen(1)
+
+AUTHOR
+------
+[verse]
+--
+*Steven Rostedt* <rostedt@goodmis.org>, author of *trace-cmd*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+--
+REPORTING BUGS
+--------------
+Report bugs to <linux-trace-devel@vger.kernel.org>
+
+LICENSE
+-------
+trace-cmd is Free Software licensed under the terms of the
+GNU Public License (GPL).
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ \ No newline at end of file
diff --git a/Documentation/trace-cmd/trace-cmd-convert.1.txt b/Documentation/trace-cmd/trace-cmd-convert.1.txt
new file mode 100644
index 00000000..7c13cf3d
--- /dev/null
+++ b/Documentation/trace-cmd/trace-cmd-convert.1.txt
@@ -0,0 +1,65 @@
+TRACE-CMD-CONVERT(1)
+===================
+
+NAME
+----
+trace-cmd-convert - convert trace files
+
+SYNOPSIS
+--------
+*trace-cmd convert* ['OPTIONS'] ['output-file']
+
+DESCRIPTION
+-----------
+The trace-cmd(1) convert command converts trace file. It reads the input file and copies the data
+into an output file. The output file may be in different format, depending on the command line
+arguments.
+
+OPTIONS
+-------
+*-i* 'input-file'::
+ By default, trace-cmd convert will read the file 'trace.dat'. But the *-i*
+ option open up the given 'input-file' instead.
+
+*-o* 'out-file'::
+ The name of the output file, this parameter is mandatory. Note, the output file may also be
+ specified as the last item on the command line.
+
+*--file-version*::
+ Desired version of the output file. Supported versions are 6 or 7.
+
+*--compression*::
+ Compression of the trace output file, one of these strings can be passed:
+
+ 'any' - auto select the best available compression algorithm
+
+ 'none' - do not compress the trace file
+
+ 'name' - the name of the desired compression algorithms. Available algorithms can be listed with
+ trace-cmd list -c
+
+*--help*::
+ Print usage information.
+
+EXAMPLES
+--------
+
+# trace-cmd convert --compression any trace_compress.dat
+
+SEE ALSO
+--------
+trace-cmd(1), trace-cmd.dat(1)
+
+AUTHOR
+------
+*Steven Rostedt* <rostedt@goodmis.org>, author of *trace-cmd*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/
+
+COPYING
+-------
+Copyright \(C) 2021 VMware. Free use of this software is granted under
+the terms of the GNU Public License (GPL).
diff --git a/Documentation/trace-cmd/trace-cmd-dump.1.txt b/Documentation/trace-cmd/trace-cmd-dump.1.txt
new file mode 100644
index 00000000..9c95244b
--- /dev/null
+++ b/Documentation/trace-cmd/trace-cmd-dump.1.txt
@@ -0,0 +1,142 @@
+TRACE-CMD-DUMP(1)
+===================
+
+NAME
+----
+trace-cmd-dump - show a meta data from a trace file, created by trace-cmd record
+
+SYNOPSIS
+--------
+*trace-cmd dump* ['OPTIONS'] ['input-file']
+
+DESCRIPTION
+-----------
+The trace-cmd(1) dump command will display the meta data from a trace file
+created by trace-cmd record.
+
+OPTIONS
+-------
+*-i* 'input-file'::
+ By default, trace-cmd dump will read the file 'trace.dat'. But the *-i*
+ option open up the given 'input-file' instead. Note, the input file may
+ also be specified as the last item on the command line.
+*-v*, *--validate*::
+ Check if the input file is a valid trace file, created by trace-cmd.
+*--summary*::
+ Print a meta data summary - initial format and a short description of each
+ file section. This is the default action, if no arguments are specified.
+*--head-page*::
+ Print the header page information, stored in the file.
+*--head-event*::
+ Print the event header information, stored in the file.
+*--ftrace-events*::
+ Print formats of ftrace specific events.
+*--systems*::
+ Print information of event systems, stored in the file - name and number of
+ events for each system.
+*--events*::
+ Print formats of all events, stored in the file.
+*--kallsyms*::
+ Print information of the mapping of function addresses to the function names.
+*--printk*::
+ Print trace_printk() format strings, stored in the file.
+*--cmd-lines*::
+ Print mapping a PID to a process name.
+*--options*::
+ Print all options, stored in the file.
+*--flyrecord*::
+ Print the offset and the size of tracing data per each CPU.
+*--clock*::
+ Print the trace clock, used for timestamp of the tracing events, stored in the file.
+*--all*::
+ Print all meta data from the file.
+*--help*::
+ Print usage information.
+*--verbose*[='level']::
+ Set the log level. Supported log levels are "none", "critical", "error", "warning",
+ "info", "debug", "all" or their identifiers "0", "1", "2", "3", "4", "5", "6". Setting the log
+ level to specific value enables all logs from that and all previous levels.
+ The level will default to "info" if one is not specified.
+
+ Example: enable all critical, error and warning logs
+
+ trace-cmd report --verbose=warning
+
+EXAMPLES
+--------
+
+# trace-cmd dump --summary -i trace.dat
+
+ Tracing meta data in file trace.dat:
+ [Initial format]
+ 6 [Version]
+ 0 [Little endian]
+ 8 [Bytes in a long]
+ 4096 [Page size, bytes]
+ [Header info, 205 bytes]
+ [Header event, 205 bytes]
+ [Ftrace format, 15 events]
+ [Events format, 2 systems]
+ [Kallsyms, 7144493 bytes]
+ [Trace printk, 2131 bytes]
+ [Saved command lines, 117 bytes]
+ 8 [CPUs with tracing data]
+ [12 options]
+ [Flyrecord tracing data]
+------------------------------------------
+
+# trace-cmd dump --flyrecord -i trace.dat
+ [Flyrecord tracing data]
+ 7176192 0 [offset, size of cpu 0]
+ 7176192 0 [offset, size of cpu 1]
+ 7176192 0 [offset, size of cpu 2]
+ 7176192 4096 [offset, size of cpu 3]
+ 7180288 4096 [offset, size of cpu 4]
+ 7184384 0 [offset, size of cpu 5]
+ 7184384 0 [offset, size of cpu 6]
+ 7184384 0 [offset, size of cpu 7]
+------------------------------------------
+
+# trace-cmd dump --summary --systems -i trace.dat
+
+ Tracing meta data in file trace.dat:
+ [Initial format]
+ 6 [Version]
+ 0 [Little endian]
+ 8 [Bytes in a long]
+ 4096 [Page size, bytes]
+ [Header info, 205 bytes]
+ [Header event, 205 bytes]
+ [Ftrace format, 15 events]
+ [Events format, 3 systems]
+ sched 23 [system, events]
+ irq 5 [system, events]
+ kvm 70 [system, events]
+ [Kallsyms, 7144493 bytes]
+ [Trace printk, 2131 bytes]
+ [Saved command lines, 157 bytes]
+ 8 [CPUs with tracing data]
+ [11 options]
+ [Flyrecord tracing data]
+------------------------------------------
+
+# trace-cmd dump --summary --systems -i trace.dat
+File trace.dat is a valid trace-cmd file
+------------------------------------------
+SEE ALSO
+--------
+trace-cmd(1), trace-cmd.dat(1)
+
+AUTHOR
+------
+*Steven Rostedt* <rostedt@goodmis.org>, author of *trace-cmd*.
+*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/
+
+COPYING
+-------
+Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under
+the terms of the GNU Public License (GPL).
diff --git a/Documentation/trace-cmd/trace-cmd-extract.1.txt b/Documentation/trace-cmd/trace-cmd-extract.1.txt
new file mode 100644
index 00000000..776da6e1
--- /dev/null
+++ b/Documentation/trace-cmd/trace-cmd-extract.1.txt
@@ -0,0 +1,98 @@
+TRACE-CMD-EXTRACT(1)
+====================
+
+NAME
+----
+trace-cmd-extract - extract out the data from the Ftrace Linux tracer.
+
+SYNOPSIS
+--------
+*trace-cmd extract ['OPTIONS']*
+
+DESCRIPTION
+-----------
+The trace-cmd(1) extract is usually used after 'trace-cmd-start(1)' and
+'trace-cmd-stop(1)'. It can be used after the Ftrace tracer has been started
+manually through the Ftrace pseudo file system.
+
+The extract command creates a trace.dat file that can be used by
+'trace-cmd-report(1)' to read from. It reads the kernel internal ring buffer
+to produce the trace.dat file.
+
+OPTIONS
+-------
+*-p* 'plugin'::
+ Although *extract* does not start any traces, some of the plugins require
+ just reading the output in ASCII format. These are the latency tracers,
+ since the latency tracers have a separate internal buffer. The plugin
+ option is therefore only necessary for the 'wakeup', 'wakeup-rt',
+ 'irqsoff', 'preemptoff' and 'preemptirqsoff' plugins.
+
+ With out this option, the extract command will extract from the internal
+ Ftrace buffers.
+
+*-O* 'option'::
+ If a latency tracer is being extracted, and the *-p* option is used, then
+ there are some Ftrace options that can change the format. This will update
+ those options before extracting. To see the list of options see
+ 'trace-cmd-list'. To enable an option, write its name, to disable the
+ option append the characters 'no' to it. For example: 'noprint-parent'
+ will disable the 'print-parent' option that prints the parent function in
+ printing a function event.
+
+*-o* 'outputfile'::
+ By default, the extract command will create a 'trace.dat' file. This
+ option will change where the file is written to.
+
+*-s*::
+ Extract from the snapshot buffer (if the kernel supports it).
+
+*--date*::
+ This is the same as the trace-cmd-record(1) --date option, but it
+ does cause the extract routine to disable all tracing. That is,
+ the end of the extract will perform something similar to trace-cmd-reset(1).
+
+*-B* 'buffer-name'::
+ If the kernel supports multiple buffers, this will extract the trace for
+ only the given buffer. It does not affect any other buffer. This may be
+ used multiple times to specify different buffers. When this option is
+ used, the top level instance will not be extracted unless *-t* is given.
+
+*-a*::
+ Extract all existing buffer instances. When this option is used, the
+ top level instance will not be extracted unless *-t* is given.
+
+*-t*::
+ Extracts the top level instance buffer. Without the *-B* or *-a* option
+ this is the same as the default. But if *-B* or *-a* is used, this is
+ required if the top level instance buffer should also be extracted.
+
+*--verbose*[='level']::
+ Set the log level. Supported log levels are "none", "critical", "error", "warning",
+ "info", "debug", "all" or their identifiers "0", "1", "2", "3", "4", "5", "6". Setting the log
+ level to specific value enables all logs from that and all previous levels.
+ The level will default to "info" if one is not specified.
+
+ Example: enable all critical, error and warning logs
+
+ trace-cmd extract --verbose=warning
+
+SEE ALSO
+--------
+trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1),
+trace-cmd-stop(1), trace-cmd-reset(1), trace-cmd-split(1),
+trace-cmd-list(1), trace-cmd-listen(1)
+
+AUTHOR
+------
+Written by Steven Rostedt, <rostedt@goodmis.org>
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/
+
+COPYING
+-------
+Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under
+the terms of the GNU Public License (GPL).
+
diff --git a/Documentation/trace-cmd/trace-cmd-hist.1.txt b/Documentation/trace-cmd/trace-cmd-hist.1.txt
new file mode 100644
index 00000000..169f8d7b
--- /dev/null
+++ b/Documentation/trace-cmd/trace-cmd-hist.1.txt
@@ -0,0 +1,50 @@
+TRACE-CMD-HIST(1)
+=================
+
+NAME
+----
+trace-cmd-hist - show histogram of events in trace.dat file
+
+SYNOPSIS
+--------
+*trace-cmd hist* ['OPTIONS']['input-file']
+
+DESCRIPTION
+-----------
+The trace-cmd(1) hist displays a histogram form from the trace.dat file.
+Instead of showing the events as they were ordered, it creates a histogram
+that can be displayed per task or for all tasks where the most common
+events appear first. It uses the function tracer and call stacks that it
+finds to try to put together a call graph of the events.
+
+OPTIONS
+-------
+*-i* 'input-file'::
+ By default, trace-cmd hist will read the file 'trace.dat'. But the *-i*
+ option open up the given 'input-file' instead. Note, the input file may
+ also be specified as the last item on the command line.
+
+*-P*::
+ To compact all events and show the call graphs by ignoring tasks
+ and different PIDs, add the *-P* to do so. Instead of showing the
+ task name, it will group all chains together and show "<all pids>".
+
+SEE ALSO
+--------
+trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1),
+trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1),
+trace-cmd-split(1), trace-cmd-listen(1)
+
+AUTHOR
+------
+Written by Steven Rostedt, <rostedt@goodmis.org>
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/
+
+COPYING
+-------
+Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under
+the terms of the GNU Public License (GPL).
+
diff --git a/Documentation/trace-cmd/trace-cmd-list.1.txt b/Documentation/trace-cmd/trace-cmd-list.1.txt
new file mode 100644
index 00000000..b77e3460
--- /dev/null
+++ b/Documentation/trace-cmd/trace-cmd-list.1.txt
@@ -0,0 +1,95 @@
+TRACE-CMD-LIST(1)
+=================
+
+NAME
+----
+trace-cmd-list - list available plugins, events or options for Ftrace.
+
+SYNOPSIS
+--------
+*trace-cmd list* ['OPTIONS']
+
+DESCRIPTION
+-----------
+The trace-cmd(1) list displays the available plugins, events or Ftrace options
+that are configured on the current machine. If no option is given, then it
+lists all plugins, event systems, events and Ftrace options to standard output.
+
+OPTIONS
+-------
+*-e* ['regex']::
+ This option will list the available events that are enabled on the
+ local system.
+
+ It takes an optional argument that uses 'regcomp(3)' expressions to seach.
+
+ trace-cmd list -e '^sys.*'
+
+*-F*::
+ Used with *-e* 'regex' to show the fields of the event.
+
+*--full*::
+ Used with *-F* which will show the "print fmt" of the event along with the fields.
+
+*-l*::
+ Used with *-e* 'regex' to show those events filters.
+
+*-R*::
+ Used with *-e* 'regex' to show those events triggers.
+
+*-s*::
+ This option will list the available event systems.
+
+*-t*::
+ This option will list the available tracers that are enabled on the
+ local system.
+
+*-p*::
+ Same as *-t* and only for legacy purposes.
+
+*-o*::
+ This option will list the available Ftrace options that are configured on
+ the local system.
+
+*-f* ['regex']::
+ This option will list the available filter functions. These are the list of
+ functions on the system that you can trace, or filter on.
+ It takes an optional argument that uses 'regcomp(3)' expressions to seach.
+
+ trace-cmd list -f '^sched.*'
+
+*-P*::
+ List the plugin files that get loaded on trace-cmd report.
+
+*-O*::
+ List plugin options that can be used by trace-cmd report *-O* option.
+
+*-B*::
+ List defined buffer instances (sub buffers).
+
+*-C*::
+ List defined clocks that can be used with trace-cmd record -C.
+ The one in brackets ([]) is the active clock.
+
+*-c*::
+ List the available trace file compression algorithms.
+
+SEE ALSO
+--------
+trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1),
+trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1),
+trace-cmd-split(1), trace-cmd-listen(1)
+
+AUTHOR
+------
+Written by Steven Rostedt, <rostedt@goodmis.org>
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/
+
+COPYING
+-------
+Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under
+the terms of the GNU Public License (GPL).
+
diff --git a/Documentation/trace-cmd/trace-cmd-listen.1.txt b/Documentation/trace-cmd/trace-cmd-listen.1.txt
new file mode 100644
index 00000000..7c6093ba
--- /dev/null
+++ b/Documentation/trace-cmd/trace-cmd-listen.1.txt
@@ -0,0 +1,71 @@
+TRACE-CMD-LISTEN(1)
+===================
+
+NAME
+----
+trace-cmd-listen - listen for incoming connection to record tracing.
+
+SYNOPSIS
+--------
+*trace-cmd listen* -p 'port' ['OPTIONS']
+
+DESCRIPTION
+-----------
+The trace-cmd(1) listen sets up a port to listen to waiting for connections
+from other hosts that run 'trace-cmd-record(1)' with the *-N* option. When a
+connection is made, and the remote host sends data, it will create a file
+called 'trace.HOST:PORT.dat'. Where HOST is the name of the remote host, and
+PORT is the port that the remote host used to connect with.
+
+OPTIONS
+-------
+*-p* 'port'::
+ This option will specify the port to listen to.
+
+*-D*::
+ This options causes trace-cmd listen to go into a daemon mode and run in
+ the background.
+
+*-V*::
+ Listen on a vsocket instead. This is useful for tracing between host and
+ guest VMs.
+
+*-d* 'dir'::
+ This option specifies a directory to write the data files into.
+
+*-o* 'filename'::
+ This option overrides the default 'trace' in the 'trace.HOST:PORT.dat' that
+ is created when a remote host connects.
+
+*-l* 'filename'::
+ This option writes the output messages to a log file instead of standard output.
+
+*--verbose*[='level']::
+ Set the log level. Supported log levels are "none", "critical", "error", "warning",
+ "info", "debug", "all" or their identifiers "0", "1", "2", "3", "4", "5", "6". Setting the log
+ level to specific value enables all logs from that and all previous levels.
+ The level will default to "info" if one is not specified.
+
+ Example: enable all critical, error and warning logs
+
+ trace-cmd listen --verbose=warning
+
+SEE ALSO
+--------
+trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1),
+trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1),
+trace-cmd-split(1), trace-cmd-list(1)
+
+AUTHOR
+------
+Written by Steven Rostedt, <rostedt@goodmis.org>
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/
+
+COPYING
+-------
+Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under
+the terms of the GNU Public License (GPL).
+
diff --git a/Documentation/trace-cmd/trace-cmd-mem.1.txt b/Documentation/trace-cmd/trace-cmd-mem.1.txt
new file mode 100644
index 00000000..90e430b8
--- /dev/null
+++ b/Documentation/trace-cmd/trace-cmd-mem.1.txt
@@ -0,0 +1,74 @@
+TRACE-CMD-MEM(1)
+================
+
+NAME
+----
+trace-cmd-mem - show memory usage of certain kmem events
+
+SYNOPSIS
+--------
+*trace-cmd mem* ['OPTIONS']['input-file']
+
+DESCRIPTION
+-----------
+The trace-cmd(1) mem requires a trace-cmd record that enabled the following
+events:
+
+ kmalloc
+ kmalloc_node
+ kfree
+ kmem_cache_alloc
+ kmem_cache_alloc_node
+ kmem_cache_alloc_free
+
+It then reads the amount requested and the ammount freed as well as the
+functions that called the allocation. It then reports the final amount
+of bytes requested and allocated, along with the total amount allocated
+and requested, as well as the max allocation and requested during the run.
+It reports the amount of wasted bytes (allocated - requested) that was
+not freed, as well as the max wasted amount during the run. The list is
+sorted by descending order of wasted bytes after the run.
+
+ Function Waste Alloc req TotAlloc TotReq MaxAlloc MaxReq MaxWaste
+ -------- ----- ----- --- -------- ------ -------- ------ --------
+ rb_allocate_cpu_buffer 768 2304 1536 2304 1536 2304 1536 768
+ alloc_pipe_info 400 1152 752 1152 752 1152 752 400
+ instance_mkdir 252 544 292 544 292 544 292 252
+ __d_alloc 215 1086560 1086345 1087208 1086993 1086560 1086345 215
+ get_empty_filp 72 2304 2232 4864 4712 4864 4712 152
+ mm_alloc 40 960 920 960 920 960 920 40
+ prepare_creds 32 192 160 1728 1440 1728 1440 288
+ tracing_buffers_open 8 32 24 32 24 32 24 8
+ do_brk 0 0 0 368 368 368 368 0
+ journal_add_journal_head 0 6048 6048 6048 6048 6048 6048 0
+ journal_start 0 0 0 1224 1224 48 48 0
+ __rb_allocate_pages 0 3289856 3289856 3289856 3289856 3289856 3289856 0
+ anon_vma_alloc 0 0 0 936 936 864 864 0
+ [...]
+
+OPTIONS
+-------
+*-i* 'input-file'::
+ By default, trace-cmd hist will read the file 'trace.dat'. But the *-i*
+ option open up the given 'input-file' instead. Note, the input file may
+ also be specified as the last item on the command line.
+
+SEE ALSO
+--------
+trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1),
+trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-hist(1),
+trace-cmd-split(1), trace-cmd-listen(1)
+
+AUTHOR
+------
+Written by Steven Rostedt, <rostedt@goodmis.org>
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/
+
+COPYING
+-------
+Copyright \(C) 2013 Red Hat, Inc. Free use of this software is granted under
+the terms of the GNU Public License (GPL).
+
diff --git a/Documentation/trace-cmd/trace-cmd-options.1.txt b/Documentation/trace-cmd/trace-cmd-options.1.txt
new file mode 100644
index 00000000..bcdf0533
--- /dev/null
+++ b/Documentation/trace-cmd/trace-cmd-options.1.txt
@@ -0,0 +1,35 @@
+TRACE-CMD-OPTIONS(1)
+====================
+
+NAME
+----
+trace-cmd-options - list available options from trace-cmd plugins
+
+SYNOPSIS
+--------
+*trace-cmd options*
+
+DESCRIPTION
+-----------
+The trace-cmd(1) options command will examine all the trace-cmd plugins
+that are used by *trace-cmd report(1)* and list them.
+
+SEE ALSO
+--------
+trace-cmd(1), trace-cmd-record(1), trace-cmd-start(1), trace-cmd-stop(1),
+trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1),
+trace-cmd-list(1), trace-cmd-listen(1)
+
+AUTHOR
+------
+Written by Steven Rostedt, <rostedt@goodmis.org>
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/
+
+COPYING
+-------
+Copyright \(C) 2011 Red Hat, Inc. Free use of this software is granted under
+the terms of the GNU Public License (GPL).
+
diff --git a/Documentation/trace-cmd/trace-cmd-profile.1.txt b/Documentation/trace-cmd/trace-cmd-profile.1.txt
new file mode 100644
index 00000000..078ae9e0
--- /dev/null
+++ b/Documentation/trace-cmd/trace-cmd-profile.1.txt
@@ -0,0 +1,686 @@
+TRACE-CMD-PROFILE(1)
+====================
+
+NAME
+----
+trace-cmd-profile - profile tasks running live
+
+SYNOPSIS
+--------
+*trace-cmd profile ['OPTIONS']* ['command']
+
+DESCRIPTION
+-----------
+The trace-cmd(1) profile will start tracing just like trace-cmd-record(1),
+with the *--profile* option, except that it does not write to a file,
+but instead, it will read the events as they happen and will update the accounting
+of the events. When the trace is finished, it will report the results just like
+trace-cmd-report(1) would do with its *--profile* option. In other words,
+the profile command does the work of trace-cmd record --profile, and trace-cmd
+report --profile without having to record the data to disk, in between.
+
+The advantage of using the profile command is that the profiling can be done
+over a long period of time where recording all events would take up too much
+disk space.
+
+This will enable several events as well as the function graph tracer
+with a depth of one (if the kernel supports it). This is to show where
+tasks enter and exit the kernel and how long they were in the kernel.
+
+To disable calling function graph, use the *-p* option to enable another
+tracer. To not enable any tracer, use *-p nop*.
+
+All timings are currently in nanoseconds.
+
+OPTIONS
+-------
+These are the same as trace-cmd-record(1) with the *--profile* option.
+
+*-p* 'tracer'::
+ Set a tracer plugin to run instead of function graph tracing set to
+ depth of 1. To not run any tracer, use *-p nop*.
+
+*-S*::
+ Only enable the tracer or events speficied on the command line.
+ With this option, the function_graph tracer is not enabled, nor are
+ any events (like sched_switch), unless they are specifically specified
+ on the command line (i.e. -p function -e sched_switch -e sched_wakeup)
+
+*-G*::
+ Set interrupt (soft and hard) events as global (associated to CPU
+ instead of tasks).
+
+*-o* 'file'::
+ Write the output of the profile to 'file'. This supersedes *--stderr*
+
+*-H* 'event-hooks'::
+ Add custom event matching to connect any two events together. Format is:
+ [<start_system>:]<start_event>,<start_match>[,<start_pid>]/
+ [<end_system>:]<end_event>,<end_match>[,<flags>]
+
+ The start_system:start_event (start_system is optional), is the event that
+ starts the timing.
+
+ start_match is the field in the start event that is to match with the
+ end_match in the end event.
+
+ start_pid is optional, as matches are attached to the tasks that run
+ the events, if another field should be used to find that task, then
+ it is specified with start_pid.
+
+ end_system:end_event is the event that ends the timing (end_system is
+ optional).
+
+ end_match is the field in end_match that wil match the start event field
+ start_match.
+
+ flags are optional and can be the following (case insensitive):
+
+ p : The two events are pinned to the same CPU (start and end happen
+ on the same CPU always).
+
+ s : The event should have a stack traced with it (enable stack tracing
+ for the start event).
+
+ g : The event is global (not associated to a task). start_pid is
+ not applicable with this flag.
+
+*--stderr*::
+ Redirect the output to stderr. The output of the command being executed
+ is not changed. This allows watching the command execute and saving the
+ output of the profile to another file.
+
+*--verbose*[='level']::
+ Set the log level. Supported log levels are "none", "critical", "error", "warning",
+ "info", "debug", "all" or their identifiers "0", "1", "2", "3", "4", "5", "6". Setting the log
+ level to specific value enables all logs from that and all previous levels.
+ The level will default to "info" if one is not specified.
+
+ Example: enable all critical, error and warning logs
+
+ trace-cmd profile --verbose=warning
+
+EXAMPLES
+--------
+
+ ---
+# trace-cmd profile -F sleep 1
+ [..]
+task: sleep-1121
+ Event: sched_switch:R (2) Total: 234559 Avg: 117279 Max: 129886 Min:104673
+ |
+ + ftrace_raw_event_sched_switch (0xffffffff8109f310)
+ 100% (2) time:234559 max:129886 min:104673 avg:117279
+ __schedule (0xffffffff816c1e81)
+ preempt_schedule (0xffffffff816c236e)
+ ___preempt_schedule (0xffffffff81351a59)
+ |
+ + unmap_single_vma (0xffffffff81198c05)
+ | 55% (1) time:129886 max:129886 min:0 avg:129886
+ | stop_one_cpu (0xffffffff8110909a)
+ | sched_exec (0xffffffff810a119b)
+ | do_execveat_common.isra.31 (0xffffffff811de528)
+ | do_execve (0xffffffff811dea8c)
+ | SyS_execve (0xffffffff811ded1e)
+ | return_to_handler (0xffffffff816c8458)
+ | stub_execve (0xffffffff816c6929)
+ |
+ + unmap_single_vma (0xffffffff81198c05)
+ 45% (1) time:104673 max:104673 min:0 avg:104673
+ unmap_vmas (0xffffffff81199174)
+ exit_mmap (0xffffffff811a1f5b)
+ mmput (0xffffffff8107699a)
+ flush_old_exec (0xffffffff811ddb75)
+ load_elf_binary (0xffffffff812287df)
+ search_binary_handler (0xffffffff811dd3e0)
+ do_execveat_common.isra.31 (0xffffffff811de8bd)
+ do_execve (0xffffffff811dea8c)
+ SyS_execve (0xffffffff811ded1e)
+ return_to_handler (0xffffffff816c8458)
+ stub_execve (0xffffffff816c6929)
+
+
+
+
+ Event: sched_switch:S (1) Total: 1000513242 Avg: 1000513242 Max: 1000513242 Min:1000513242
+ |
+ + ftrace_raw_event_sched_switch (0xffffffff8109f310)
+ 100% (1) time:1000513242 max:1000513242 min:0 avg:1000513242
+ __schedule (0xffffffff816c1e81)
+ schedule (0xffffffff816c23b9)
+ do_nanosleep (0xffffffff816c4f1c)
+ hrtimer_nanosleep (0xffffffff810dcd86)
+ SyS_nanosleep (0xffffffff810dcea6)
+ return_to_handler (0xffffffff816c8458)
+ tracesys_phase2 (0xffffffff816c65b0)
+
+
+
+ Event: sched_wakeup:1121 (1) Total: 43405 Avg: 43405 Max: 43405 Min:43405
+ |
+ + ftrace_raw_event_sched_wakeup_template (0xffffffff8109d960)
+ 100% (1) time:43405 max:43405 min:0 avg:43405
+ ttwu_do_wakeup (0xffffffff810a01a2)
+ ttwu_do_activate.constprop.122 (0xffffffff810a0236)
+ try_to_wake_up (0xffffffff810a3ec3)
+ wake_up_process (0xffffffff810a4057)
+ hrtimer_wakeup (0xffffffff810db772)
+ __run_hrtimer (0xffffffff810dbd91)
+ hrtimer_interrupt (0xffffffff810dc6b7)
+ local_apic_timer_interrupt (0xffffffff810363e7)
+ smp_trace_apic_timer_interrupt (0xffffffff816c8c6a)
+ trace_apic_timer_interrupt (0xffffffff816c725a)
+ finish_task_switch (0xffffffff8109c3a4)
+ __schedule (0xffffffff816c1e01)
+ schedule (0xffffffff816c23b9)
+ ring_buffer_wait (0xffffffff811323a3)
+ wait_on_pipe (0xffffffff81133d93)
+ tracing_buffers_splice_read (0xffffffff811350b0)
+ do_splice_to (0xffffffff8120476f)
+ SyS_splice (0xffffffff81206c1f)
+ tracesys_phase2 (0xffffffff816c65b0)
+
+
+ Event: func: sys_nanosleep() (1) Total: 1000598016 Avg: 1000598016 Max: 1000598016 Min:1000598016
+ Event: func: sys_munmap() (1) Total: 14300 Avg: 14300 Max: 14300 Min:14300
+ Event: func: sys_arch_prctl() (1) Total: 571 Avg: 571 Max: 571 Min:571
+ Event: func: sys_mprotect() (4) Total: 14382 Avg: 3595 Max: 7196 Min:2190
+ Event: func: SyS_read() (1) Total: 2640 Avg: 2640 Max: 2640 Min:2640
+ Event: func: sys_close() (5) Total: 4001 Avg: 800 Max: 1252 Min:414
+ Event: func: sys_newfstat() (3) Total: 11684 Avg: 3894 Max: 10206 Min:636
+ Event: func: SyS_open() (3) Total: 23615 Avg: 7871 Max: 10535 Min:4743
+ Event: func: sys_access() (1) Total: 5924 Avg: 5924 Max: 5924 Min:5924
+ Event: func: SyS_mmap() (8) Total: 39153 Avg: 4894 Max: 12354 Min:1518
+ Event: func: smp_trace_apic_timer_interrupt() (1) Total: 10298 Avg: 10298 Max: 10298 Min:10298
+ Event: func: SyS_brk() (4) Total: 2407 Avg: 601 Max: 1564 Min:206
+ Event: func: do_notify_resume() (2) Total: 4095 Avg: 2047 Max: 2521 Min:1574
+ Event: func: sys_execve() (5) Total: 1625251 Avg: 325050 Max: 1605698 Min:3570
+ |
+ + ftrace_raw_event_sched_wakeup_template (0xffffffff8109d960)
+ 100% (1) time:1605698 max:1605698 min:0 avg:1605698
+ ttwu_do_wakeup (0xffffffff810a01a2)
+ ttwu_do_activate.constprop.122 (0xffffffff810a0236)
+ try_to_wake_up (0xffffffff810a3ec3)
+ wake_up_process (0xffffffff810a4057)
+ cpu_stop_queue_work (0xffffffff81108df8)
+ stop_one_cpu (0xffffffff8110909a)
+ sched_exec (0xffffffff810a119b)
+ do_execveat_common.isra.31 (0xffffffff811de528)
+ do_execve (0xffffffff811dea8c)
+ SyS_execve (0xffffffff811ded1e)
+ return_to_handler (0xffffffff816c8458)
+ stub_execve (0xffffffff816c6929)
+ stub_execve (0xffffffff816c6929)
+
+
+ Event: func: syscall_trace_enter_phase2() (38) Total: 21544 Avg: 566 Max: 1066 Min:329
+ Event: func: syscall_trace_enter_phase1() (38) Total: 9202 Avg: 242 Max: 376 Min:150
+ Event: func: __do_page_fault() (53) Total: 257672 Avg: 4861 Max: 27745 Min:458
+ |
+ + ftrace_raw_event_sched_wakeup_template (0xffffffff8109d960)
+ 100% (1) time:27745 max:27745 min:0 avg:27745
+ ttwu_do_wakeup (0xffffffff810a01a2)
+ ttwu_do_activate.constprop.122 (0xffffffff810a0236)
+ try_to_wake_up (0xffffffff810a3ec3)
+ default_wake_function (0xffffffff810a4002)
+ autoremove_wake_function (0xffffffff810b50fd)
+ __wake_up_common (0xffffffff810b4958)
+ __wake_up (0xffffffff810b4cb8)
+ rb_wake_up_waiters (0xffffffff8112f126)
+ irq_work_run_list (0xffffffff81157d0f)
+ irq_work_run (0xffffffff81157d5e)
+ smp_trace_irq_work_interrupt (0xffffffff810082fc)
+ trace_irq_work_interrupt (0xffffffff816c7aaa)
+ return_to_handler (0xffffffff816c8458)
+ trace_do_page_fault (0xffffffff810478b2)
+ trace_page_fault (0xffffffff816c7dd2)
+
+
+ Event: func: syscall_trace_leave() (38) Total: 26145 Avg: 688 Max: 1264 Min:381
+ Event: func: __sb_end_write() (1) Total: 373 Avg: 373 Max: 373 Min:373
+ Event: func: fsnotify() (1) Total: 598 Avg: 598 Max: 598 Min:598
+ Event: func: __fsnotify_parent() (1) Total: 286 Avg: 286 Max: 286 Min:286
+ Event: func: mutex_unlock() (2) Total: 39636 Avg: 19818 Max: 39413 Min:223
+ Event: func: smp_trace_irq_work_interrupt() (6) Total: 236459 Avg: 39409 Max: 100671 Min:634
+ |
+ + ftrace_raw_event_sched_wakeup_template (0xffffffff8109d960)
+ 100% (4) time:234348 max:100671 min:38745 avg:58587
+ ttwu_do_wakeup (0xffffffff810a01a2)
+ ttwu_do_activate.constprop.122 (0xffffffff810a0236)
+ try_to_wake_up (0xffffffff810a3ec3)
+ default_wake_function (0xffffffff810a4002)
+ autoremove_wake_function (0xffffffff810b50fd)
+ __wake_up_common (0xffffffff810b4958)
+ __wake_up (0xffffffff810b4cb8)
+ rb_wake_up_waiters (0xffffffff8112f126)
+ irq_work_run_list (0xffffffff81157d0f)
+ irq_work_run (0xffffffff81157d5e)
+ smp_trace_irq_work_interrupt (0xffffffff810082fc)
+ return_to_handler (0xffffffff816c8458)
+ trace_irq_work_interrupt (0xffffffff816c7aaa)
+ |
+ + ftrace_return_to_handler (0xffffffff81140840)
+ | 84% (3) time:197396 max:100671 min:38745 avg:65798
+ | return_to_handler (0xffffffff816c846d)
+ | trace_page_fault (0xffffffff816c7dd2)
+ |
+ + ftrace_return_to_handler (0xffffffff81140840)
+ 16% (1) time:36952 max:36952 min:0 avg:36952
+ ftrace_graph_caller (0xffffffff816c8428)
+ mutex_unlock (0xffffffff816c3f75)
+ rb_simple_write (0xffffffff81133142)
+ vfs_write (0xffffffff811d7727)
+ SyS_write (0xffffffff811d7acf)
+ tracesys_phase2 (0xffffffff816c65b0)
+
+
+
+
+ Event: sys_enter:35 (1) Total: 1000599765 Avg: 1000599765 Max: 1000599765 Min:1000599765
+ Event: sys_enter:11 (1) Total: 55025 Avg: 55025 Max: 55025 Min:55025
+ Event: sys_enter:158 (1) Total: 1584 Avg: 1584 Max: 1584 Min:1584
+ Event: sys_enter:10 (4) Total: 18359 Avg: 4589 Max: 8764 Min:2933
+ Event: sys_enter:0 (1) Total: 4223 Avg: 4223 Max: 4223 Min:4223
+ Event: sys_enter:3 (5) Total: 9948 Avg: 1989 Max: 2606 Min:1203
+ Event: sys_enter:5 (3) Total: 15530 Avg: 5176 Max: 11840 Min:1405
+ Event: sys_enter:2 (3) Total: 28002 Avg: 9334 Max: 12035 Min:5656
+ Event: sys_enter:21 (1) Total: 7814 Avg: 7814 Max: 7814 Min:7814
+ Event: sys_enter:9 (8) Total: 49583 Avg: 6197 Max: 14137 Min:2362
+ Event: sys_enter:12 (4) Total: 108493 Avg: 27123 Max: 104079 Min:922
+ Event: sys_enter:59 (5) Total: 1631608 Avg: 326321 Max: 1607529 Min:4563
+ Event: page_fault_user:0x398d86b630 (1)
+ Event: page_fault_user:0x398d844de0 (1)
+ Event: page_fault_user:0x398d8d9020 (1)
+ Event: page_fault_user:0x1d37008 (1)
+ Event: page_fault_user:0x7f0b89e91074 (1)
+ Event: page_fault_user:0x7f0b89d98ed0 (1)
+ Event: page_fault_user:0x7f0b89ec8950 (1)
+ Event: page_fault_user:0x7f0b89d83644 (1)
+ Event: page_fault_user:0x7f0b89d622a8 (1)
+ Event: page_fault_user:0x7f0b89d5a560 (1)
+ Event: page_fault_user:0x7f0b89d34010 (1)
+ Event: page_fault_user:0x1d36008 (1)
+ Event: page_fault_user:0x398d900510 (1)
+ Event: page_fault_user:0x398dbb3ae8 (1)
+ Event: page_fault_user:0x398d87f490 (1)
+ Event: page_fault_user:0x398d8eb660 (1)
+ Event: page_fault_user:0x398d8bd730 (1)
+ Event: page_fault_user:0x398d9625d9 (1)
+ Event: page_fault_user:0x398d931810 (1)
+ Event: page_fault_user:0x398dbb7114 (1)
+ Event: page_fault_user:0x398d837610 (1)
+ Event: page_fault_user:0x398d89e860 (1)
+ Event: page_fault_user:0x398d8f23b0 (1)
+ Event: page_fault_user:0x398dbb4510 (1)
+ Event: page_fault_user:0x398dbad6f0 (1)
+ Event: page_fault_user:0x398dbb1018 (1)
+ Event: page_fault_user:0x398d977b37 (1)
+ Event: page_fault_user:0x398d92eb60 (1)
+ Event: page_fault_user:0x398d8abff0 (1)
+ Event: page_fault_user:0x398dbb0d30 (1)
+ Event: page_fault_user:0x398dbb6c24 (1)
+ Event: page_fault_user:0x398d821c50 (1)
+ Event: page_fault_user:0x398dbb6c20 (1)
+ Event: page_fault_user:0x398d886350 (1)
+ Event: page_fault_user:0x7f0b90125000 (1)
+ Event: page_fault_user:0x7f0b90124740 (1)
+ Event: page_fault_user:0x7f0b90126000 (1)
+ Event: page_fault_user:0x398d816230 (1)
+ Event: page_fault_user:0x398d8002b8 (1)
+ Event: page_fault_user:0x398dbb0b40 (1)
+ Event: page_fault_user:0x398dbb2880 (1)
+ Event: page_fault_user:0x7f0b90141cc6 (1)
+ Event: page_fault_user:0x7f0b9013b85c (1)
+ Event: page_fault_user:0x7f0b90127000 (1)
+ Event: page_fault_user:0x606e70 (1)
+ Event: page_fault_user:0x7f0b90144010 (1)
+ Event: page_fault_user:0x7fffcb31b038 (1)
+ Event: page_fault_user:0x606da8 (1)
+ Event: page_fault_user:0x400040 (1)
+ Event: page_fault_user:0x398d222218 (1)
+ Event: page_fault_user:0x398d015120 (1)
+ Event: page_fault_user:0x398d220ce8 (1)
+ Event: page_fault_user:0x398d220b80 (1)
+ Event: page_fault_user:0x7fffcb2fcff8 (1)
+ Event: page_fault_user:0x398d001590 (1)
+ Event: page_fault_user:0x398d838490 (1)
+ Event: softirq_raise:RCU (3) Total: 252931 Avg: 84310 Max: 243288 Min:4639
+ Event: softirq_raise:SCHED (2) Total: 241249 Avg: 120624 Max: 239076 Min:2173
+ |
+ + ftrace_raw_event_sched_wakeup_template (0xffffffff8109d960)
+ 100% (1) time:239076 max:239076 min:0 avg:239076
+ ttwu_do_wakeup (0xffffffff810a01a2)
+ ttwu_do_activate.constprop.122 (0xffffffff810a0236)
+ try_to_wake_up (0xffffffff810a3ec3)
+ default_wake_function (0xffffffff810a4002)
+ autoremove_wake_function (0xffffffff810b50fd)
+ __wake_up_common (0xffffffff810b4958)
+ __wake_up (0xffffffff810b4cb8)
+ rb_wake_up_waiters (0xffffffff8112f126)
+ irq_work_run_list (0xffffffff81157d0f)
+ irq_work_run (0xffffffff81157d5e)
+ smp_trace_irq_work_interrupt (0xffffffff810082fc)
+ trace_irq_work_interrupt (0xffffffff816c7aaa)
+ irq_exit (0xffffffff8107dd66)
+ smp_trace_apic_timer_interrupt (0xffffffff816c8c7a)
+ trace_apic_timer_interrupt (0xffffffff816c725a)
+ prepare_ftrace_return (0xffffffff8103d4fd)
+ ftrace_graph_caller (0xffffffff816c8428)
+ mem_cgroup_begin_page_stat (0xffffffff811cfd25)
+ page_remove_rmap (0xffffffff811a4fc5)
+ stub_execve (0xffffffff816c6929)
+ unmap_single_vma (0xffffffff81198b1c)
+ unmap_vmas (0xffffffff81199174)
+ exit_mmap (0xffffffff811a1f5b)
+ mmput (0xffffffff8107699a)
+ flush_old_exec (0xffffffff811ddb75)
+ load_elf_binary (0xffffffff812287df)
+ search_binary_handler (0xffffffff811dd3e0)
+ do_execveat_common.isra.31 (0xffffffff811de8bd)
+ do_execve (0xffffffff811dea8c)
+ SyS_execve (0xffffffff811ded1e)
+ return_to_handler (0xffffffff816c8458)
+
+
+ Event: softirq_raise:HI (3) Total: 72472 Avg: 24157 Max: 64186 Min:3430
+ Event: softirq_entry:RCU (2) Total: 3191 Avg: 1595 Max: 1788 Min:1403
+ |
+ + ftrace_raw_event_sched_wakeup_template (0xffffffff8109d960)
+ 100% (1) time:1788 max:1788 min:0 avg:1788
+ ttwu_do_wakeup (0xffffffff810a01a2)
+ ttwu_do_activate.constprop.122 (0xffffffff810a0236)
+ try_to_wake_up (0xffffffff810a3ec3)
+ default_wake_function (0xffffffff810a4002)
+ autoremove_wake_function (0xffffffff810b50fd)
+ __wake_up_common (0xffffffff810b4958)
+ __wake_up (0xffffffff810b4cb8)
+ rb_wake_up_waiters (0xffffffff8112f126)
+ irq_work_run_list (0xffffffff81157d0f)
+ irq_work_run (0xffffffff81157d5e)
+ smp_trace_irq_work_interrupt (0xffffffff810082fc)
+ trace_irq_work_interrupt (0xffffffff816c7aaa)
+ irq_work_queue (0xffffffff81157e95)
+ ring_buffer_unlock_commit (0xffffffff8113039f)
+ __buffer_unlock_commit (0xffffffff811367d5)
+ trace_buffer_unlock_commit (0xffffffff811376a2)
+ ftrace_event_buffer_commit (0xffffffff81146d5f)
+ ftrace_raw_event_sched_process_exec (0xffffffff8109c511)
+ do_execveat_common.isra.31 (0xffffffff811de9a3)
+ do_execve (0xffffffff811dea8c)
+ SyS_execve (0xffffffff811ded1e)
+ return_to_handler (0xffffffff816c8458)
+ stub_execve (0xffffffff816c6929)
+
+
+ Event: softirq_entry:SCHED (2) Total: 2289 Avg: 1144 Max: 1350 Min:939
+ Event: softirq_entry:HI (3) Total: 180146 Avg: 60048 Max: 178969 Min:499
+ |
+ + ftrace_raw_event_sched_wakeup_template (0xffffffff8109d960)
+ 100% (1) time:178969 max:178969 min:0 avg:178969
+ ttwu_do_wakeup (0xffffffff810a01a2)
+ ttwu_do_activate.constprop.122 (0xffffffff810a0236)
+ try_to_wake_up (0xffffffff810a3ec3)
+ wake_up_process (0xffffffff810a4057)
+ wake_up_worker (0xffffffff8108de74)
+ insert_work (0xffffffff8108fca6)
+ __queue_work (0xffffffff8108fe12)
+ delayed_work_timer_fn (0xffffffff81090088)
+ call_timer_fn (0xffffffff810d8f89)
+ run_timer_softirq (0xffffffff810da8a1)
+ __do_softirq (0xffffffff8107d8fa)
+ irq_exit (0xffffffff8107dd66)
+ smp_trace_apic_timer_interrupt (0xffffffff816c8c7a)
+ trace_apic_timer_interrupt (0xffffffff816c725a)
+ prepare_ftrace_return (0xffffffff8103d4fd)
+ ftrace_graph_caller (0xffffffff816c8428)
+ mem_cgroup_begin_page_stat (0xffffffff811cfd25)
+ page_remove_rmap (0xffffffff811a4fc5)
+ stub_execve (0xffffffff816c6929)
+ unmap_single_vma (0xffffffff81198b1c)
+ unmap_vmas (0xffffffff81199174)
+ exit_mmap (0xffffffff811a1f5b)
+ mmput (0xffffffff8107699a)
+ flush_old_exec (0xffffffff811ddb75)
+ load_elf_binary (0xffffffff812287df)
+ search_binary_handler (0xffffffff811dd3e0)
+ do_execveat_common.isra.31 (0xffffffff811de8bd)
+ do_execve (0xffffffff811dea8c)
+ SyS_execve (0xffffffff811ded1e)
+ return_to_handler (0xffffffff816c8458)
+ ---
+
+The above uses *-F* to follow the sleep task. It filters only on events
+that pertain to sleep. Note, in order to follow forks, you need to also
+include the *-c* flag.
+
+Other tasks will appear in the profile as well if events reference more
+than one task (like sched_switch and sched_wakeup do. The "prev_pid" and
+"next_pid" of sched_switch, and the "common_pid" and "pid" of sched_wakeup).
+
+Stack traces are attached to events that are related to them.
+
+Taking a look at the above output:
+
+ Event: sched_switch:R (2) Total: 234559 Avg: 117279 Max: 129886 Min:104673
+
+This shows that task was preempted (it's in the running 'R' state).
+It was preempted twice '(2)' for a total of 234,559 nanoseconds, with a average
+preempt time of 117,279 ns, and maximum of 128,886 ns and minimum of 104,673 ns.
+
+The tree shows where it was preempted:
+
+
+ |
+ + ftrace_raw_event_sched_switch (0xffffffff8109f310)
+ 100% (2) time:234559 max:129886 min:104673 avg:117279
+ __schedule (0xffffffff816c1e81)
+ preempt_schedule (0xffffffff816c236e)
+ ___preempt_schedule (0xffffffff81351a59)
+ |
+ + unmap_single_vma (0xffffffff81198c05)
+ | 55% (1) time:129886 max:129886 min:0 avg:129886
+ | stop_one_cpu (0xffffffff8110909a)
+ | sched_exec (0xffffffff810a119b)
+ | do_execveat_common.isra.31 (0xffffffff811de528)
+ | do_execve (0xffffffff811dea8c)
+ | SyS_execve (0xffffffff811ded1e)
+ | return_to_handler (0xffffffff816c8458)
+ | stub_execve (0xffffffff816c6929)
+ |
+ + unmap_single_vma (0xffffffff81198c05)
+ 45% (1) time:104673 max:104673 min:0 avg:104673
+ unmap_vmas (0xffffffff81199174)
+ exit_mmap (0xffffffff811a1f5b)
+ mmput (0xffffffff8107699a)
+ flush_old_exec (0xffffffff811ddb75)
+ load_elf_binary (0xffffffff812287df)
+ search_binary_handler (0xffffffff811dd3e0)
+ do_execveat_common.isra.31 (0xffffffff811de8bd)
+ do_execve (0xffffffff811dea8c)
+ SyS_execve (0xffffffff811ded1e)
+ return_to_handler (0xffffffff816c8458)
+ stub_execve (0xffffffff816c6929)
+
+
+ Event: sched_switch:S (1) Total: 1000513242 Avg: 1000513242 Max: 1000513242 Min:10005132
+
+This shows that the task was scheduled out in the INTERRUPTIBLE state once
+for a total of 1,000,513,242 ns (~1s), which makes sense as the task was a
+"sleep 1".
+
+After the schedule events, the function events are shown. By default the
+profiler will use the function graph tracer if the depth setting is supported
+by the kernel. It will set the depth to one which will only trace the first
+function that enters the kernel. It will also record the amount of time
+it was in the kernel.
+
+ Event: func: sys_nanosleep() (1) Total: 1000598016 Avg: 1000598016 Max: 1000598016 Min:1000598016
+ Event: func: sys_munmap() (1) Total: 14300 Avg: 14300 Max: 14300 Min:14300
+ Event: func: sys_arch_prctl() (1) Total: 571 Avg: 571 Max: 571 Min:571
+ Event: func: sys_mprotect() (4) Total: 14382 Avg: 3595 Max: 7196 Min:2190
+ Event: func: SyS_read() (1) Total: 2640 Avg: 2640 Max: 2640 Min:2640
+ Event: func: sys_close() (5) Total: 4001 Avg: 800 Max: 1252 Min:414
+ Event: func: sys_newfstat() (3) Total: 11684 Avg: 3894 Max: 10206 Min:636
+ Event: func: SyS_open() (3) Total: 23615 Avg: 7871 Max: 10535 Min:4743
+ Event: func: sys_access() (1) Total: 5924 Avg: 5924 Max: 5924 Min:5924
+ Event: func: SyS_mmap() (8) Total: 39153 Avg: 4894 Max: 12354 Min:1518
+ Event: func: smp_trace_apic_timer_interrupt() (1) Total: 10298 Avg: 10298 Max: 10298 Min:10298
+ Event: func: SyS_brk() (4) Total: 2407 Avg: 601 Max: 1564 Min:206
+ Event: func: do_notify_resume() (2) Total: 4095 Avg: 2047 Max: 2521 Min:1574
+ Event: func: sys_execve() (5) Total: 1625251 Avg: 325050 Max: 1605698 Min:3570
+
+
+Count of times the event was hit is always in parenthesis '(5)'.
+
+The function graph trace may produce too much overhead as it is still
+triggering (just not tracing) on all functions. To limit functions just to
+system calls (not interrupts), add the following option:
+
+ -l 'sys_*' -l 'SyS_*'
+
+To disable function graph tracing totally, use:
+
+ -p nop
+
+To use function tracing instead (note, this will not record timings, but just
+the count of times a function is hit):
+
+ -p function
+
+
+Following the functions are the events that are recorded.
+
+
+ Event: sys_enter:35 (1) Total: 1000599765 Avg: 1000599765 Max: 1000599765 Min:1000599765
+ Event: sys_enter:11 (1) Total: 55025 Avg: 55025 Max: 55025 Min:55025
+ Event: sys_enter:158 (1) Total: 1584 Avg: 1584 Max: 1584 Min:1584
+ Event: sys_enter:10 (4) Total: 18359 Avg: 4589 Max: 8764 Min:2933
+ Event: sys_enter:0 (1) Total: 4223 Avg: 4223 Max: 4223 Min:4223
+ Event: sys_enter:3 (5) Total: 9948 Avg: 1989 Max: 2606 Min:1203
+ Event: sys_enter:5 (3) Total: 15530 Avg: 5176 Max: 11840 Min:1405
+ Event: sys_enter:2 (3) Total: 28002 Avg: 9334 Max: 12035 Min:5656
+ Event: sys_enter:21 (1) Total: 7814 Avg: 7814 Max: 7814 Min:7814
+ Event: sys_enter:9 (8) Total: 49583 Avg: 6197 Max: 14137 Min:2362
+ Event: sys_enter:12 (4) Total: 108493 Avg: 27123 Max: 104079 Min:922
+ Event: sys_enter:59 (5) Total: 1631608 Avg: 326321 Max: 1607529 Min:4563
+
+These are the raw system call events, with the raw system call ID after
+the "sys_enter:" For example, "59" is execve(2). Why did it execute 5 times?
+Looking at a strace of this run, we can see:
+
+ execve("/usr/lib64/ccache/sleep", ["sleep", "1"], [/* 27 vars */] <unfinished ...>
+ <... execve resumed> ) = -1 ENOENT (No such file or directory)
+ execve("/usr/local/sbin/sleep", ["sleep", "1"], [/* 27 vars */] <unfinished ...>
+ <... execve resumed> ) = -1 ENOENT (No such file or directory)
+ execve("/usr/local/bin/sleep", ["sleep", "1"], [/* 27 vars */] <unfinished ...>
+ <... execve resumed> ) = -1 ENOENT (No such file or directory)
+ execve("/usr/sbin/sleep", ["sleep", "1"], [/* 27 vars */] <unfinished ...>
+ <... execve resumed> ) = -1 ENOENT (No such file or directory)
+ execve("/usr/bin/sleep", ["sleep", "1"], [/* 27 vars */] <unfinished ...>
+ <... execve resumed> ) = 0
+
+It attempted to execve the "sleep" command for each path in $PATH until it found
+one.
+
+The page_fault_user events show what userspace address took a page fault.
+
+ Event: softirq_raise:RCU (3) Total: 252931 Avg: 84310 Max: 243288 Min:4639
+ Event: softirq_raise:SCHED (2) Total: 241249 Avg: 120624 Max: 239076 Min:2173
+ |
+ + ftrace_raw_event_sched_wakeup_template (0xffffffff8109d960)
+ 100% (1) time:239076 max:239076 min:0 avg:239076
+ ttwu_do_wakeup (0xffffffff810a01a2)
+ ttwu_do_activate.constprop.122 (0xffffffff810a0236)
+ try_to_wake_up (0xffffffff810a3ec3)
+ default_wake_function (0xffffffff810a4002)
+ autoremove_wake_function (0xffffffff810b50fd)
+ __wake_up_common (0xffffffff810b4958)
+ __wake_up (0xffffffff810b4cb8)
+ rb_wake_up_waiters (0xffffffff8112f126)
+ irq_work_run_list (0xffffffff81157d0f)
+ irq_work_run (0xffffffff81157d5e)
+ smp_trace_irq_work_interrupt (0xffffffff810082fc)
+ trace_irq_work_interrupt (0xffffffff816c7aaa)
+ irq_exit (0xffffffff8107dd66)
+
+The timings for the softirq_raise events measure the time it took from the raised
+softirq to the time it executed.
+
+The timings for the softirq_entry events measure the time the softirq took to
+execute.
+
+The stack traces for the softirqs (and possibly other events) are used when
+an event has a stack attached to it. This can happen if the profile ran
+more stacks than just the sched events, or when events are dropped and
+stacks
+
+
+To have full control of what gets traced, use the *-S* option that will have
+trace-cmd not enable any events or the function_graph tracer. Only the events
+listed on the command line are shown.
+
+If only the time of kmalloc is needed to be seen, and where it was recorded,
+using the *-S* option and enabling function_graph and stack tracing for just
+the function needed will give the profile of only that function.
+
+ ---
+# trace-cmd profile -S -p function_graph -l '*kmalloc*' -l '*kmalloc*:stacktrace' sleep 1
+task: sshd-11786
+ Event: func: __kmalloc_reserve.isra.59() (2) Total: 149684 Avg: 74842 Max: 75598 Min:74086
+ |
+ + __alloc_skb (0xffffffff815a8917)
+ | 67% (2) time:149684 max:75598 min:74086 avg:74842
+ | __kmalloc_node_track_caller (0xffffffff811c6635)
+ | __kmalloc_reserve.isra.59 (0xffffffff815a84ac)
+ | return_to_handler (0xffffffff816c8458)
+ | sk_stream_alloc_skb (0xffffffff81604ea1)
+ | tcp_sendmsg (0xffffffff8160592c)
+ | inet_sendmsg (0xffffffff8162fed1)
+ | sock_aio_write (0xffffffff8159f9fc)
+ | do_sync_write (0xffffffff811d694a)
+ | vfs_write (0xffffffff811d7825)
+ | SyS_write (0xffffffff811d7adf)
+ | system_call_fastpath (0xffffffff816c63d2)
+ |
+ + __alloc_skb (0xffffffff815a8917)
+ 33% (1) time:74086 max:74086 min:74086 avg:74086
+ __alloc_skb (0xffffffff815a8917)
+ sk_stream_alloc_skb (0xffffffff81604ea1)
+ tcp_sendmsg (0xffffffff8160592c)
+ inet_sendmsg (0xffffffff8162fed1)
+ sock_aio_write (0xffffffff8159f9fc)
+ do_sync_write (0xffffffff811d694a)
+ vfs_write (0xffffffff811d7825)
+ SyS_write (0xffffffff811d7adf)
+ system_call_fastpath (0xffffffff816c63d2)
+ [..]
+---
+
+To watch the command run but save the output of the profile to a file
+use --stderr, and redirect stderr to a file
+
+# trace-cmd profile --stderr cyclictest -p 80 -n -t1 2> profile.out
+
+Or simple use *-o*
+
+# trace-cmd profile -o profile.out cyclictest -p 80 -n -t1
+
+SEE ALSO
+--------
+trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1),
+trace-cmd-stop(1), trace-cmd-reset(1), trace-cmd-split(1),
+trace-cmd-list(1), trace-cmd-listen(1)
+
+AUTHOR
+------
+Written by Steven Rostedt, <rostedt@goodmis.org>
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/
+
+COPYING
+-------
+Copyright \(C) 2014 Red Hat, Inc. Free use of this software is granted under
+the terms of the GNU Public License (GPL).
+
diff --git a/Documentation/trace-cmd/trace-cmd-record.1.txt b/Documentation/trace-cmd/trace-cmd-record.1.txt
new file mode 100644
index 00000000..6b8e3b4a
--- /dev/null
+++ b/Documentation/trace-cmd/trace-cmd-record.1.txt
@@ -0,0 +1,519 @@
+TRACE-CMD-RECORD(1)
+===================
+
+NAME
+----
+trace-cmd-record - record a trace from the Ftrace Linux internal tracer
+
+SYNOPSIS
+--------
+*trace-cmd record* ['OPTIONS'] ['command']
+
+DESCRIPTION
+-----------
+The trace-cmd(1) record command will set up the Ftrace Linux kernel tracer to
+record the specified plugins or events that happen while the 'command'
+executes. If no command is given, then it will record until the user hits
+Ctrl-C.
+
+The record command of trace-cmd will set up the Ftrace tracer to start tracing
+the various events or plugins that are given on the command line. It will then
+create a number of tracing processes (one per CPU) that will start recording
+from the kernel ring buffer straight into temporary files. When the command is
+complete (or Ctrl-C is hit) all the files will be combined into a trace.dat
+file that can later be read (see trace-cmd-report(1)).
+
+OPTIONS
+-------
+*-p* 'tracer'::
+ Specify a tracer. Tracers usually do more than just trace an event.
+ Common tracers are: *function*, *function_graph*, *preemptirqsoff*,
+ *irqsoff*, *preemptoff* and *wakeup*. A tracer must be supported by the
+ running kernel. To see a list of available tracers, see trace-cmd-list(1).
+
+*-e* 'event'::
+ Specify an event to trace. Various static trace points have been added to
+ the Linux kernel. They are grouped by subsystem where you can enable all
+ events of a given subsystem or specify specific events to be enabled. The
+ 'event' is of the format "subsystem:event-name". You can also just specify
+ the subsystem without the ':event-name' or the event-name without the
+ "subsystem:". Using "-e sched_switch" will enable the "sched_switch" event
+ where as, "-e sched" will enable all events under the "sched" subsystem.
+
+ The 'event' can also contain glob expressions. That is, "*stat*" will
+ select all events (or subsystems) that have the characters "stat" in their
+ names.
+
+ The keyword 'all' can be used to enable all events.
+
+*-a*::
+ Every event that is being recorded has its output format file saved
+ in the output file to be able to display it later. But if other
+ events are enabled in the trace without trace-cmd's knowledge, the
+ formats of those events will not be recorded and trace-cmd report will
+ not be able to display them. If this is the case, then specify the
+ *-a* option and the format for all events in the system will be saved.
+
+*-T*::
+ Enable a stacktrace on each event. For example:
+
+ <idle>-0 [003] 58549.289091: sched_switch: kworker/0:1:0 [120] R ==> trace-cmd:2603 [120]
+ <idle>-0 [003] 58549.289092: kernel_stack: <stack trace>
+=> schedule (ffffffff814b260e)
+=> cpu_idle (ffffffff8100a38c)
+=> start_secondary (ffffffff814ab828)
+
+*--func-stack*::
+ Enable a stack trace on all functions. Note this is only applicable
+ for the "function" plugin tracer, and will only take effect if the
+ -l option is used and succeeds in limiting functions. If the function
+ tracer is not filtered, and the stack trace is enabled, you can live
+ lock the machine.
+
+*-f* 'filter'::
+ Specify a filter for the previous event. This must come after a *-e*. This
+ will filter what events get recorded based on the content of the event.
+ Filtering is passed to the kernel directly so what filtering is allowed
+ may depend on what version of the kernel you have. Basically, it will
+ let you use C notation to check if an event should be processed or not.
+
+----------------------------------------
+ ==, >=, <=, >, <, &, |, && and ||
+----------------------------------------
+
+ The above are usually safe to use to compare fields.
+
+*--no-filter*::
+ Do not filter out the trace-cmd threads. By default, the threads are
+ filtered out to not be traced by events. This option will have the trace-cmd
+ threads also be traced.
+
+*-R* 'trigger'::
+ Specify a trigger for the previous event. This must come after a *-e*.
+ This will add a given trigger to the given event. To only enable the trigger
+ and not the event itself, then place the event after the *-v* option.
+
+ See Documentation/trace/events.txt in the Linux kernel source for more
+ information on triggers.
+
+*-v*::
+ This will cause all events specified after it on the command line to not
+ be traced. This is useful for selecting a subsystem to be traced but to
+ leave out various events. For Example: "-e sched -v -e "\*stat\*"" will
+ enable all events in the sched subsystem except those that have "stat" in
+ their names.
+
+ Note: the *-v* option was taken from the way grep(1) inverts the following
+ matches.
+
+*-F*::
+ This will filter only the executable that is given on the command line. If
+ no command is given, then it will filter itself (pretty pointless).
+ Using *-F* will let you trace only events that are caused by the given
+ command.
+
+*-P* 'pid'::
+ Similar to *-F* but lets you specify a process ID to trace.
+
+*-c*::
+ Used with either *-F* (or *-P* if kernel supports it) to trace the process'
+ children too.
+
+*--user*::
+ Execute the specified *command* as given user.
+
+*-C* 'clock'::
+ Set the trace clock to "clock".
+
+ Use trace-cmd(1) list -C to see what clocks are available.
+
+*-o* 'output-file'::
+ By default, trace-cmd report will create a 'trace.dat' file. You can
+ specify a different file to write to with the *-o* option.
+
+*-l* 'function-name'::
+ This will limit the 'function' and 'function_graph' tracers to only trace
+ the given function name. More than one *-l* may be specified on the
+ command line to trace more than one function. This supports both full
+ regex(3) parsing, or basic glob parsing. If the filter has only alphanumeric,
+ '_', '*', '?' and '.' characters, then it will be parsed as a basic glob.
+ to force it to be a regex, prefix the filter with '^' or append it with '$'
+ and it will then be parsed as a regex.
+
+*-g* 'function-name'::
+ This option is for the function_graph plugin. It will graph the given
+ function. That is, it will only trace the function and all functions that
+ it calls. You can have more than one *-g* on the command line.
+
+*-n* 'function-name'::
+ This has the opposite effect of *-l*. The function given with the *-n*
+ option will not be traced. This takes precedence, that is, if you include
+ the same function for both *-n* and *-l*, it will not be traced.
+
+*-d*::
+ Some tracer plugins enable the function tracer by default. Like the
+ latency tracers. This option prevents the function tracer from being
+ enabled at start up.
+
+*-D*::
+ The option *-d* will try to use the function-trace option to disable the
+ function tracer (if available), otherwise it defaults to the proc file:
+ /proc/sys/kernel/ftrace_enabled, but will not touch it if the function-trace
+ option is available. The *-D* option will disable both the ftrace_enabled
+ proc file as well as the function-trace option if it exists.
+
+ Note, this disable function tracing for all users, which includes users
+ outside of ftrace tracers (stack_tracer, perf, etc).
+
+*-O* 'option'::
+ Ftrace has various options that can be enabled or disabled. This allows
+ you to set them. Appending the text 'no' to an option disables it.
+ For example: "-O nograph-time" will disable the "graph-time" Ftrace
+ option.
+
+*-s* 'interval'::
+ The processes that trace-cmd creates to record from the ring buffer need
+ to wake up to do the recording. Setting the 'interval' to zero will cause
+ the processes to wakeup every time new data is written into the buffer.
+ But since Ftrace is recording kernel activity, the act of this processes
+ going back to sleep may cause new events into the ring buffer which will
+ wake the process back up. This will needlessly add extra data into the
+ ring buffer.
+
+ The 'interval' metric is microseconds. The default is set to 1000 (1 ms).
+ This is the time each recording process will sleep before waking up to
+ record any new data that was written to the ring buffer.
+
+*-r* 'priority'::
+ The priority to run the capture threads at. In a busy system the trace
+ capturing threads may be staved and events can be lost. This increases
+ the priority of those threads to the real time (FIFO) priority.
+ But use this option with care, it can also change the behaviour of
+ the system being traced.
+
+*-b* 'size'::
+ This sets the ring buffer size to 'size' kilobytes. Because the Ftrace
+ ring buffer is per CPU, this size is the size of each per CPU ring buffer
+ inside the kernel. Using "-b 10000" on a machine with 4 CPUs will make
+ Ftrace have a total buffer size of 40 Megs.
+
+*-B* 'buffer-name'::
+ If the kernel supports multiple buffers, this will add a buffer with
+ the given name. If the buffer name already exists, that buffer is just
+ reset and will not be deleted at the end of record execution. If the
+ buffer is created, it will be removed at the end of execution (unless
+ the *-k* is set, or 'start' command was used).
+
+ After a buffer name is stated, all events added after that will be
+ associated with that buffer. If no buffer is specified, or an event
+ is specified before a buffer name, it will be associated with the
+ main (toplevel) buffer.
+
+ trace-cmd record -e sched -B block -e block -B time -e timer sleep 1
+
+ The above is will enable all sched events in the main buffer. It will
+ then create a 'block' buffer instance and enable all block events within
+ that buffer. A 'time' buffer instance is created and all timer events
+ will be enabled for that event.
+
+*-m* 'size'::
+ The max size in kilobytes that a per cpu buffer should be. Note, due
+ to rounding to page size, the number may not be totally correct.
+ Also, this is performed by switching between two buffers that are half
+ the given size thus the output may not be of the given size even if
+ much more was written.
+
+ Use this to prevent running out of diskspace for long runs.
+
+*-M* 'cpumask'::
+ Set the cpumask for to trace. It only affects the last buffer instance
+ given. If supplied before any buffer instance, then it affects the
+ main buffer. The value supplied must be a hex number.
+
+ trace-cmd record -p function -M c -B events13 -e all -M 5
+
+ If the -M is left out, then the mask stays the same. To enable all
+ CPUs, pass in a value of '-1'.
+
+*-k*::
+ By default, when trace-cmd is finished tracing, it will reset the buffers
+ and disable all the tracing that it enabled. This option keeps trace-cmd
+ from disabling the tracer and reseting the buffer. This option is useful for
+ debugging trace-cmd.
+
+ Note: usually trace-cmd will set the "tracing_on" file back to what it
+ was before it was called. This option will leave that file set to zero.
+
+*-i*::
+ By default, if an event is listed that trace-cmd does not find, it
+ will exit with an error. This option will just ignore events that are
+ listed on the command line but are not found on the system.
+
+*-N* 'host:port'::
+ If another machine is running "trace-cmd listen", this option is used to
+ have the data sent to that machine with UDP packets. Instead of writing
+ to an output file, the data is sent off to a remote box. This is ideal for
+ embedded machines with little storage, or having a single machine that
+ will keep all the data in a single repository.
+
+ Note: This option is not supported with latency tracer plugins:
+ wakeup, wakeup_rt, irqsoff, preemptoff and preemptirqsoff
+
+*-V* 'cid:port'::
+ If recording on a guest VM and the host is running *trace-cmd listen* with
+ the *-V* option as well, or if this is recording on the host, and a guest
+ in running *trace-cmd listen* with the *-V* option, then connect to the
+ listener (the same as connecting with the *-N* option via the network).
+ This has the same limitations as the *-N* option above with respect to
+ latency tracer plugins.
+
+*-t*::
+ This option is used with *-N*, when there's a need to send the live data
+ with TCP packets instead of UDP. Although TCP is not nearly as fast as
+ sending the UDP packets, but it may be needed if the network is not that
+ reliable, the amount of data is not that intensive, and a guarantee is
+ needed that all traced information is transfered successfully.
+
+*-q* | *--quiet*::
+ For use with recording an application. Suppresses normal output
+ (except for errors) to allow only the application's output to be displayed.
+
+*--date*::
+ With the *--date* option, "trace-cmd" will write timestamps into the
+ trace buffer after it has finished recording. It will then map the
+ timestamp to gettimeofday which will allow wall time output from the
+ timestamps reading the created 'trace.dat' file.
+
+*--max-graph-depth* 'depth'::
+ Set the maximum depth the function_graph tracer will trace into a function.
+ A value of one will only show where userspace enters the kernel but not any
+ functions called in the kernel. The default is zero, which means no limit.
+
+*--cmdlines-size* 'size'::
+ Set the number of entries the kernel tracing file "saved_cmdlines" can
+ contain. This file is a circular buffer which stores the mapping between
+ cmdlines and PIDs. If full, it leads to unresolved cmdlines ("<...>") within
+ the trace. The kernel default value is 128.
+
+*--module* 'module'::
+ Filter a module's name in function tracing. It is equivalent to adding
+ ':mod:module' after all other functions being filtered. If no other function
+ filter is listed, then all modules functions will be filtered in the filter.
+
+ '--module snd' is equivalent to '-l :mod:snd'
+
+ '--module snd -l "*jack*"' is equivalent to '-l "*jack*:mod:snd"'
+
+ '--module snd -n "*"' is equivalent to '-n :mod:snd'
+
+*--proc-map*::
+ Save the traced process address map into the trace.dat file. The traced
+ processes can be specified using the option *-P*, or as a given 'command'.
+
+*--profile*::
+ With the *--profile* option, "trace-cmd" will enable tracing that can
+ be used with trace-cmd-report(1) --profile option. If a tracer *-p* is
+ not set, and function graph depth is supported by the kernel, then
+ the function_graph tracer will be enabled with a depth of one (only
+ show where userspace enters into the kernel). It will also enable
+ various tracepoints with stack tracing such that the report can show
+ where tasks have been blocked for the longest time.
+
+ See trace-cmd-profile(1) for more details and examples.
+
+*-G*::
+ Set interrupt (soft and hard) events as global (associated to CPU
+ instead of tasks). Only works for --profile.
+
+*-H* 'event-hooks'::
+ Add custom event matching to connect any two events together. When not
+ used with *--profile*, it will save the parameter and this will be
+ used by trace-cmd report --profile, too. That is:
+
+ trace-cmd record -H hrtimer_expire_entry,hrtimer/hrtimer_expire_exit,hrtimer,sp
+ trace-cmd report --profile
+
+ Will profile hrtimer_expire_entry and hrtimer_expire_ext times.
+
+ See trace-cmd-profile(1) for format.
+
+*-S*:: (for --profile only)
+ Only enable the tracer or events speficied on the command line.
+ With this option, the function_graph tracer is not enabled, nor are
+ any events (like sched_switch), unless they are specifically specified
+ on the command line (i.e. -p function -e sched_switch -e sched_wakeup)
+
+*--ts-offset offset*::
+ Add an offset for the timestamp in the trace.dat file. This will add a
+ offset option into the trace.dat file such that a trace-cmd report will
+ offset all the timestamps of the events by the given offset. The offset
+ is in raw units. That is, if the event timestamps are in nanoseconds
+ the offset will also be in nanoseconds even if the displayed units are
+ in microseconds.
+
+*--tsync-interval*::
+ Set the loop interval, in ms, for timestamps synchronization with guests:
+ If a negative number is specified, timestamps synchronization is disabled
+ If 0 is specified, no loop is performed - timestamps offset is calculated only twice,"
+ at the beginning and at the end of the trace.
+ Timestamps synchronization with guests works only if there is support for VSOCK.\n"
+
+*--tsc2nsec*::
+ Convert the current clock to nanoseconds, using tsc multiplier and shift from the Linux
+ kernel's perf interface. This option does not change the trace clock, just assumes that
+ the tsc multiplier and shift are applicable for the selected clock. You may use the
+ "-C tsc2nsec" clock, if not sure what clock to select.
+*--stderr*::
+ Have output go to stderr instead of stdout, but the output of the command
+ executed will not be changed. This is useful if you want to monitor the
+ output of the command being executed, but not see the output from trace-cmd.
+
+*--poll*::
+ Waiting for data to be available on the trace ring-buffers may trigger
+ IPIs. This might generate unacceptable trace noise when tracing low latency
+ or real time systems. The poll option forces trace-cmd to use O_NONBLOCK.
+ Traces are extracted by busy waiting, which will hog the CPUs, so only use
+ when really needed.
+
+*--name*::
+ Give a specific name for the current agent being processed. Used after *-A* to
+ give the guest being traced a name. Useful when using the vsocket ID instead of
+ a name of the guest.
+
+*--verbose*[='level']::
+ Set the log level. Supported log levels are "none", "critical", "error", "warning",
+ "info", "debug", "all" or their identifiers "0", "1", "2", "3", "4", "5", "6". Setting the log
+ level to specific value enables all logs from that and all previous levels.
+ The level will default to "info" if one is not specified.
+
+ Example: enable all critical, error and warning logs
+
+ trace-cmd record --verbose=warning
+
+*--file-version*::
+ Desired version of the output file. Supported versions are 6 or 7.
+
+*--compression*::
+ Compression of the trace output file, one of these strings can be passed:
+
+ 'any' - auto select the best available compression algorithm
+
+ 'none' - do not compress the trace file
+
+ 'name' - the name of the desired compression algorithms. Available algorithms can be listed with
+ trace-cmd list -c
+
+EXAMPLES
+--------
+
+The basic way to trace all events:
+
+------------------------------
+ # trace-cmd record -e all ls > /dev/null
+ # trace-cmd report
+ trace-cmd-13541 [003] 106260.693809: filemap_fault: address=0x128122 offset=0xce
+ trace-cmd-13543 [001] 106260.693809: kmalloc: call_site=81128dd4 ptr=0xffff88003dd83800 bytes_req=768 bytes_alloc=1024 gfp_flags=GFP_KERNEL|GFP_ZERO
+ ls-13545 [002] 106260.693809: kfree: call_site=810a7abb ptr=0x0
+ ls-13545 [002] 106260.693818: sys_exit_write: 0x1
+
+
+------------------------------
+
+
+
+To use the function tracer with sched switch tracing:
+
+------------------------------
+ # trace-cmd record -p function -e sched_switch ls > /dev/null
+ # trace-cmd report
+ ls-13587 [002] 106467.860310: function: hrtick_start_fair <-- pick_next_task_fair
+ ls-13587 [002] 106467.860313: sched_switch: prev_comm=trace-cmd prev_pid=13587 prev_prio=120 prev_state=R ==> next_comm=trace-cmd next_pid=13583 next_prio=120
+ trace-cmd-13585 [001] 106467.860314: function: native_set_pte_at <-- __do_fault
+ trace-cmd-13586 [003] 106467.860314: function: up_read <-- do_page_fault
+ ls-13587 [002] 106467.860317: function: __phys_addr <-- schedule
+ trace-cmd-13585 [001] 106467.860318: function: _raw_spin_unlock <-- __do_fault
+ ls-13587 [002] 106467.860320: function: native_load_sp0 <-- __switch_to
+ trace-cmd-13586 [003] 106467.860322: function: down_read_trylock <-- do_page_fault
+
+
+------------------------------
+
+Here is a nice way to find what interrupts have the highest latency:
+------------------------------------------
+ # trace-cmd record -p function_graph -e irq_handler_entry -l do_IRQ sleep 10
+ # trace-cmd report
+ <idle>-0 [000] 157412.933969: funcgraph_entry: | do_IRQ() {
+ <idle>-0 [000] 157412.933974: irq_handler_entry: irq=48 name=eth0
+ <idle>-0 [000] 157412.934004: funcgraph_exit: + 36.358 us | }
+ <idle>-0 [000] 157413.895004: funcgraph_entry: | do_IRQ() {
+ <idle>-0 [000] 157413.895011: irq_handler_entry: irq=48 name=eth0
+ <idle>-0 [000] 157413.895026: funcgraph_exit: + 24.014 us | }
+ <idle>-0 [000] 157415.891762: funcgraph_entry: | do_IRQ() {
+ <idle>-0 [000] 157415.891769: irq_handler_entry: irq=48 name=eth0
+ <idle>-0 [000] 157415.891784: funcgraph_exit: + 22.928 us | }
+ <idle>-0 [000] 157415.934869: funcgraph_entry: | do_IRQ() {
+ <idle>-0 [000] 157415.934874: irq_handler_entry: irq=48 name=eth0
+ <idle>-0 [000] 157415.934906: funcgraph_exit: + 37.512 us | }
+ <idle>-0 [000] 157417.888373: funcgraph_entry: | do_IRQ() {
+ <idle>-0 [000] 157417.888381: irq_handler_entry: irq=48 name=eth0
+ <idle>-0 [000] 157417.888398: funcgraph_exit: + 25.943 us | }
+
+
+------------------------------------------
+
+An example of the profile:
+------------------------------------------
+ # trace-cmd record --profile sleep 1
+ # trace-cmd report --profile --comm sleep
+task: sleep-21611
+ Event: sched_switch:R (1) Total: 99442 Avg: 99442 Max: 99442 Min:99442
+ <stack> 1 total:99442 min:99442 max:99442 avg=99442
+ => ftrace_raw_event_sched_switch (0xffffffff8105f812)
+ => __schedule (0xffffffff8150810a)
+ => preempt_schedule (0xffffffff8150842e)
+ => ___preempt_schedule (0xffffffff81273354)
+ => cpu_stop_queue_work (0xffffffff810b03c5)
+ => stop_one_cpu (0xffffffff810b063b)
+ => sched_exec (0xffffffff8106136d)
+ => do_execve_common.isra.27 (0xffffffff81148c89)
+ => do_execve (0xffffffff811490b0)
+ => SyS_execve (0xffffffff811492c4)
+ => return_to_handler (0xffffffff8150e3c8)
+ => stub_execve (0xffffffff8150c699)
+ Event: sched_switch:S (1) Total: 1000506680 Avg: 1000506680 Max: 1000506680 Min:1000506680
+ <stack> 1 total:1000506680 min:1000506680 max:1000506680 avg=1000506680
+ => ftrace_raw_event_sched_switch (0xffffffff8105f812)
+ => __schedule (0xffffffff8150810a)
+ => schedule (0xffffffff815084b8)
+ => do_nanosleep (0xffffffff8150b22c)
+ => hrtimer_nanosleep (0xffffffff8108d647)
+ => SyS_nanosleep (0xffffffff8108d72c)
+ => return_to_handler (0xffffffff8150e3c8)
+ => tracesys_phase2 (0xffffffff8150c304)
+ Event: sched_wakeup:21611 (1) Total: 30326 Avg: 30326 Max: 30326 Min:30326
+ <stack> 1 total:30326 min:30326 max:30326 avg=30326
+ => ftrace_raw_event_sched_wakeup_template (0xffffffff8105f653)
+ => ttwu_do_wakeup (0xffffffff810606eb)
+ => ttwu_do_activate.constprop.124 (0xffffffff810607c8)
+ => try_to_wake_up (0xffffffff8106340a)
+------------------------------------------
+
+SEE ALSO
+--------
+trace-cmd(1), trace-cmd-report(1), trace-cmd-start(1), trace-cmd-stop(1),
+trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1),
+trace-cmd-list(1), trace-cmd-listen(1), trace-cmd-profile(1)
+
+AUTHOR
+------
+Written by Steven Rostedt, <rostedt@goodmis.org>
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/
+
+COPYING
+-------
+Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under
+the terms of the GNU Public License (GPL).
+
diff --git a/Documentation/trace-cmd/trace-cmd-report.1.txt b/Documentation/trace-cmd/trace-cmd-report.1.txt
new file mode 100644
index 00000000..aad8ab51
--- /dev/null
+++ b/Documentation/trace-cmd/trace-cmd-report.1.txt
@@ -0,0 +1,518 @@
+TRACE-CMD-REPORT(1)
+===================
+
+NAME
+----
+trace-cmd-report - show in ASCII a trace created by trace-cmd record
+
+SYNOPSIS
+--------
+*trace-cmd report* ['OPTIONS'] ['input-file']
+
+DESCRIPTION
+-----------
+The trace-cmd(1) report command will output a human readable report of a trace
+created by trace-cmd record.
+
+OPTIONS
+-------
+*-i* 'input-file'::
+ By default, trace-cmd report will read the file 'trace.dat'. But the *-i*
+ option open up the given 'input-file' instead. Note, the input file may
+ also be specified as the last item on the command line.
+
+*-e*::
+ This outputs the endianess of the file. trace-cmd report is smart enough
+ to be able to read big endian files on little endian machines, and vise
+ versa.
+
+*-f*::
+ This outputs the list of all functions that have been mapped in the trace.dat file.
+ Note, this list may contain functions that may not appear in the trace, as
+ it is the list of mappings to translate function addresses into function names.
+
+*-P*::
+ This outputs the list of "trace_printk()" data. The raw trace data points
+ to static pointers in the kernel. This must be stored in the trace.dat
+ file.
+
+*-E*::
+ This lists the possible events in the file (but this list is not
+ necessarily the list of events in the file).
+
+*--events*::
+ This will list the event formats that are stored in the trace.dat file.
+
+*--event* regex::
+ This will print events that match the given regex. If a colon is specified,
+ then the characters before the colon will be used to match the system and
+ the characters after the colon will match the event.
+
+ trace-cmd report --event sys:read
+
+ The above will only match events where the system name contains "sys"
+ and the event name contains "read".
+
+ trace-cmd report --event read
+
+ The above will match all events that contain "read" in its name. Also it
+ may list all events of a system that contains "read" as well.
+
+*--check-events*::
+ This will parse the event format strings that are stored in the trace.dat
+ file and return whether the formats can be parsed correctly. It will load
+ plugins unless *-N* is specified.
+
+*-t*::
+ Print the full timestamp. The timestamps in the data file are usually
+ recorded to the nanosecond. But the default display of the timestamp
+ is only to the microsecond. To see the full timestamp, add the *-t* option.
+
+*-F* 'filter'::
+ Add a filter to limit what events are displayed. The format of the filter
+ is:
+
+------------------------------------------
+ <events> ':' <filter>
+ <events> = SYSTEM'/'EVENT | SYSTEM | EVENT | <events> ',' <events>
+ <filter> = EVENT_FIELD <op> <value> | <filter> '&&' <filter> |
+ <filter> '||' <filter> | '(' <filter> ')' | '!' <filter>
+ <op> = '==' | '!=' | '>=' | '<=' | '>' | '<' | '&' | '|' | '^' |
+ '+' | '-' | '*' | '/' | '%'
+ <value> = NUM | STRING | EVENT_FIELD
+------------------------------------------
+
+ SYSTEM is the name of the system to filter on. If the EVENT is left out,
+ then it applies to all events under the SYSTEM. If only one string is used
+ without the '/' to deliminate between SYSTEM and EVENT, then the filter
+ will be applied to all systems and events that match the given string.
+
+ Whitespace is ignored, such that "sched:next_pid==123" is equivalent to
+ "sched : next_pid == 123".
+
+ STRING is defined with single or double quotes (single quote must end with
+ single quote, and double with double). Whitespace within quotes are not
+ ignored.
+
+ The representation of a SYSTEM or EVENT may also be a regular expression
+ as defined by 'regcomp(3)'.
+
+ The EVENT_FIELD is the name of the field of an event that is being
+ filtered. If the event does not contain the EVENT_FIELD, that part of the
+ equation will be considered false.
+
+------------------------------------------
+ -F 'sched : bogus == 1 || common_pid == 2'
+------------------------------------------
+
+ The "bogus == 1" will always evaluate to FALSE because no event has a
+ field called "bogus", but the "common_pid == 2" will still be evaluated
+ since all events have the field "common_pid". Any "sched" event that was
+ traced by the process with the PID of 2 will be shown.
+
+ Note, the EVENT_FIELD is the field name as shown by an events format
+ (as displayed with *--events*), and not what is found in the output.
+ If the output shows "ID:foo" but the field that "foo" belongs to was
+ called "name" in the event format, then "name" must be used in the filter.
+ The same is true about values. If the value that is displayed is converted
+ by to a string symbol, the filter checks the original value and not the
+ value displayed. For example, to filter on all tasks that were in the
+ running state at a context switch:
+
+------------------------------------------
+ -F 'sched/sched_switch : prev_state==0'
+------------------------------------------
+
+ Although the output displays 'R', having 'prev_stat=="R"' will not work.
+
+ Note: You can also specify 'COMM' as an EVENT_FIELD. This will use the
+ task name (or comm) of the record to compare. For example, to filter out
+ all of the "trace-cmd" tasks:
+
+------------------------------------------
+ -F '.*:COMM != "trace-cmd"'
+------------------------------------------
+
+*-I*::
+ Do not print events where the HARDIRQ latency flag is set.
+ This will filter out most events that are from interrupt context.
+ Note, it may not filter out function traced functions that are
+ in interrupt context but were called before the kernel "in interrupt"
+ flag was set.
+
+*-S*::
+ Do not print events where the SOFTIRQ latency flag is set.
+ This will filter out most events that are from soft interrupt context.
+
+*-v*::
+ This causes the following filters of *-F* to filter out the matching
+ events.
+
+------------------------------------------
+ -v -F 'sched/sched_switch : prev_state == 0'
+------------------------------------------
+
+ Will not display any sched_switch events that have a prev_state of 0.
+ Removing the *-v* will only print out those events.
+
+*-T*::
+ Test the filters of -F. After processing a filter string, the
+ resulting filter will be displayed for each event. This is useful
+ for using a filter for more than one event where a field may not
+ exist in all events. Also it can be used to make sure there are no
+ misspelled event field names, as they will simply be ignored.
+ *-T* is ignored if *-F* is not specified.
+
+*-V*::
+ Show verbose messages (see *--verbose* but only for the numbers)
+
+*-L*::
+ This will not load system wide plugins. It loads "local only". That is
+ what it finds in the ~/.trace-cmd/plugins directory.
+
+*-N*::
+ This will not load any plugins.
+
+*-n* 'event-re'::
+ This will cause all events that match the option to ignore any registered
+ handler (by the plugins) to print the event. The normal event will be printed
+ instead. The 'event-re' is a regular expression as defined by 'regcomp(3)'.
+
+*--profile*::
+ With the *--profile* option, "trace-cmd report" will process all the events
+ first, and then output a format showing where tasks have spent their time
+ in the kernel, as well as where they are blocked the most, and where wake up
+ latencies are.
+
+ See trace-cmd-profile(1) for more details and examples.
+
+*-G*::
+ Set interrupt (soft and hard) events as global (associated to CPU
+ instead of tasks). Only works for --profile.
+
+*-H* 'event-hooks'::
+ Add custom event matching to connect any two events together.
+
+ See trace-cmd-profile(1) for format.
+
+*-R*::
+ This will show the events in "raw" format. That is, it will ignore the event's
+ print formatting and just print the contents of each field.
+
+*-r* 'event-re'::
+ This will cause all events that match the option to print its raw fields.
+ The 'event-re' is a regular expression as defined by 'regcomp(3)'.
+
+*-l*::
+ This adds a "latency output" format. Information about interrupts being
+ disabled, soft irq being disabled, the "need_resched" flag being set,
+ preempt count, and big kernel lock are all being recorded with every
+ event. But the default display does not show this information. This option
+ will set display this information with 6 characters. When one of the
+ fields is zero or N/A a \'.\' is shown.
+
+------------------------------------------
+ <idle>-0 0d.h1. 106467.859747: function: ktime_get <-- tick_check_idle
+------------------------------------------
+
+ The 0d.h1. denotes this information. The first character is never a '.'
+ and represents what CPU the trace was recorded on (CPU 0). The 'd' denotes
+ that interrupts were disabled. The 'h' means that this was called inside
+ an interrupt handler. The '1' is the preemption disabled (preempt_count)
+ was set to one. The two '.'s are "need_resched" flag and kernel lock
+ counter. If the "need_resched" flag is set, then that character would be a
+ 'N'.
+
+*-w*::
+ If both the 'sched_switch' and 'sched_wakeup' events are enabled, then
+ this option will report the latency between the time the task was first
+ woken, and the time it was scheduled in.
+
+*-q*::
+ Quiet non critical warnings.
+
+*-O*::
+ Pass options to the trace-cmd plugins that are loaded.
+
+ -O plugin:var=value
+
+ The 'plugin:' and '=value' are optional. Value may be left off for options
+ that are boolean. If the 'plugin:' is left off, then any variable that matches
+ in all plugins will be set.
+
+ Example: -O fgraph:tailprint
+
+*--cpu* <cpu list>::
+ List of CPUs, separated by "," or ":", used for filtering the events.
+ A range of CPUs can be specified using "cpuX-cpuY" notation, where all CPUs
+ in the range between cpuX and cpuY will be included in the list. The order
+ of CPUs in the list must be from lower to greater.
+
+ Example: "--cpu 0,3" - show events from CPUs 0 and 3
+ "--cpu 2-4" - show events from CPUs 2, 3 and 4
+
+*--cpus*::
+ List the CPUs that have data in the trace file then exit.
+
+*--stat*::
+ If the trace.dat file recorded the final stats (outputed at the end of record)
+ the *--stat* option can be used to retrieve them.
+
+*--uname*::
+ If the trace.dat file recorded uname during the run, this will retrieve that
+ information.
+
+*--version*::
+ If the trace.dat file recorded the version of the executable used to create
+ it, report that version.
+
+*--ts-offset* offset::
+ Add (or subtract if negative) an offset for all timestamps of the previous
+ data file specified with *-i*. This is useful to merge sort multiple trace.dat
+ files where the difference in the timestamp is known. For example if a trace
+ is done on a virtual guest, and another trace is done on the host. If the
+ host timestamp is 1000 units ahead of the guest, the following can be done:
+
+ trace-cmd report -i host.dat --ts-offset -1000 -i guest.dat
+
+ This will subtract 1000 timestamp units from all the host events as it merges
+ with the guest.dat events. Note, the units is for the raw units recorded in
+ the trace. If the units are nanoseconds, the addition (or subtraction) from
+ the offset will be nanoseconds even if the displayed units are microseconds.
+
+*--ts2secs* HZ::
+ Convert the current clock source into a second (nanosecond resolution)
+ output. When using clocks like x86-tsc, if the frequency is known,
+ by passing in the clock frequency, this will convert the time to seconds.
+
+ This option affects any trace.dat file given with *-i* proceeding it.
+ If this option comes before any *-i* option, then that value becomes
+ the default conversion for all other trace.dat files. If another
+ --ts2secs option appears after a *-i* trace.dat file, than that option
+ will override the default value.
+
+ Example: On a 3.4 GHz machine
+
+ trace-cmd record -p function -C x86-tsc
+
+ trace-cmd report --ts2ns 3400000000
+
+ The report will convert the cycles timestamps into a readable second
+ display. The default display resolution is microseconds, unless *-t*
+ is used.
+
+ The value of --ts-offset must still be in the raw timestamp units, even
+ with this option. The offset will be converted as well.
+
+*--ts-diff*::
+ Show the time differences between events. The difference will appear in
+ parenthesis just after the timestamp.
+
+*--ts-check*::
+ Make sure no timestamp goes backwards, and if it does, print out a warning
+ message of the fact.
+
+*--nodate*::
+ Ignore converting the timestamps to the date set by *trace-cmd record*(3) --date option.
+
+*--raw-ts*::
+ Display raw timestamps, without any corrections.
+
+*--align-ts*::
+ Display timestamps aligned to the first event.
+
+*--verbose*[='level']::
+ Set the log level. Supported log levels are "none", "crit", "err", "warn",
+ "info", "debug", "all" or their identifiers "0", "1", "2", "3", "4", "5", "6". Setting the log
+ level to specific value enables all logs from that and all previous levels.
+ The level will default to "info" if one is not specified.
+
+ Example: enable all critical, error and warning logs
+
+ trace-cmd report --verbose=warning
+
+EXAMPLES
+--------
+
+Using a trace.dat file that was created with:
+
+------------------------------------------
+ # trace-cmd record -p function -e all sleep 5
+
+
+------------------------------------------
+
+The default report shows:
+
+------------------------------------------
+ # trace-cmd report
+ trace-cmd-16129 [002] 158126.498411: function: __mutex_unlock_slowpath <-- mutex_unlock
+ trace-cmd-16131 [000] 158126.498411: kmem_cache_alloc: call_site=811223c5 ptr=0xffff88003ecf2b40 bytes_req=272 bytes_alloc=320 gfp_flags=GFP_KERNEL|GFP_ZERO
+ trace-cmd-16130 [003] 158126.498411: function: do_splice_to <-- sys_splice
+ sleep-16133 [001] 158126.498412: function: inotify_inode_queue_event <-- vfs_write
+ trace-cmd-16129 [002] 158126.498420: lock_release: 0xffff88003f1fa4f8 &sb->s_type->i_mutex_key
+ trace-cmd-16131 [000] 158126.498421: function: security_file_alloc <-- get_empty_filp
+ sleep-16133 [001] 158126.498422: function: __fsnotify_parent <-- vfs_write
+ trace-cmd-16130 [003] 158126.498422: function: rw_verify_area <-- do_splice_to
+ trace-cmd-16131 [000] 158126.498424: function: cap_file_alloc_security <-- security_file_alloc
+ trace-cmd-16129 [002] 158126.498425: function: syscall_trace_leave <-- int_check_syscall_exit_work
+ sleep-16133 [001] 158126.498426: function: inotify_dentry_parent_queue_event <-- vfs_write
+ trace-cmd-16130 [003] 158126.498426: function: security_file_permission <-- rw_verify_area
+ trace-cmd-16129 [002] 158126.498428: function: audit_syscall_exit <-- syscall_trace_leave
+[...]
+
+
+------------------------------------------
+
+To see everything but the function traces:
+
+------------------------------------------
+ # trace-cmd report -v -F 'function'
+ trace-cmd-16131 [000] 158126.498411: kmem_cache_alloc: call_site=811223c5 ptr=0xffff88003ecf2b40 bytes_req=272 bytes_alloc=320 gfp_flags=GFP_KERNEL|GFP_ZERO
+ trace-cmd-16129 [002] 158126.498420: lock_release: 0xffff88003f1fa4f8 &sb->s_type->i_mutex_key
+ trace-cmd-16130 [003] 158126.498436: lock_acquire: 0xffffffff8166bf78 read all_cpu_access_lock
+ trace-cmd-16131 [000] 158126.498438: lock_acquire: 0xffff88003df5b520 read &fs->lock
+ trace-cmd-16129 [002] 158126.498446: kfree: call_site=810a7abb ptr=0x0
+ trace-cmd-16130 [003] 158126.498448: lock_acquire: 0xffff880002250a80 &per_cpu(cpu_access_lock, cpu)
+ trace-cmd-16129 [002] 158126.498450: sys_exit_splice: 0xfffffff5
+ trace-cmd-16131 [000] 158126.498454: lock_release: 0xffff88003df5b520 &fs->lock
+ sleep-16133 [001] 158126.498456: kfree: call_site=810a7abb ptr=0x0
+ sleep-16133 [001] 158126.498460: sys_exit_write: 0x1
+ trace-cmd-16130 [003] 158126.498462: kmalloc: call_site=810bf95b ptr=0xffff88003dedc040 bytes_req=24 bytes_alloc=32 gfp_flags=GFP_KERNEL|GFP_ZERO
+
+
+------------------------------------------
+
+To see only the kmalloc calls that were greater than 1000 bytes:
+
+------------------------------------------
+ #trace-cmd report -F 'kmalloc: bytes_req > 1000'
+ <idle>-0 [000] 158128.126641: kmalloc: call_site=81330635 ptr=0xffff88003c2fd000 bytes_req=2096 bytes_alloc=4096 gfp_flags=GFP_ATOMIC
+
+
+------------------------------------------
+
+To see wakeups and sched switches that left the previous task in the running
+state:
+------------------------------------------
+ # trace-cmd report -F 'sched: prev_state == 0 || (success == 1)'
+ trace-cmd-16132 [002] 158126.499951: sched_wakeup: comm=trace-cmd pid=16129 prio=120 success=1 target_cpu=002
+ trace-cmd-16132 [002] 158126.500401: sched_switch: prev_comm=trace-cmd prev_pid=16132 prev_prio=120 prev_state=R ==> next_comm=trace-cmd next_pid=16129 next_prio=120
+ <idle>-0 [003] 158126.500585: sched_wakeup: comm=trace-cmd pid=16130 prio=120 success=1 target_cpu=003
+ <idle>-0 [003] 158126.501241: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=trace-cmd next_pid=16130 next_prio=120
+ trace-cmd-16132 [000] 158126.502475: sched_wakeup: comm=trace-cmd pid=16131 prio=120 success=1 target_cpu=000
+ trace-cmd-16131 [002] 158126.506516: sched_wakeup: comm=trace-cmd pid=16129 prio=120 success=1 target_cpu=002
+ <idle>-0 [003] 158126.550110: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=trace-cmd next_pid=16130 next_prio=120
+ trace-cmd-16131 [003] 158126.570243: sched_wakeup: comm=trace-cmd pid=16129 prio=120 success=1 target_cpu=003
+ trace-cmd-16130 [002] 158126.618202: sched_switch: prev_comm=trace-cmd prev_pid=16130 prev_prio=120 prev_state=R ==> next_comm=yum-updatesd next_pid=3088 next_prio=1 20
+ trace-cmd-16129 [003] 158126.622379: sched_wakeup: comm=trace-cmd pid=16131 prio=120 success=1 target_cpu=003
+ trace-cmd-16129 [000] 158126.649287: sched_wakeup: comm=trace-cmd pid=16131 prio=120 success=1 target_cpu=000
+
+
+------------------------------------------
+
+The above needs a little explanation. The filter specifies the "sched"
+subsystem, which includes both sched_switch and sched_wakeup events. Any event
+that does not have the format field "prev_state" or "success", will evaluate
+those expressions as FALSE, and will not produce a match. Using "||" will have
+the "prev_state" test happen for the "sched_switch" event and the "success"
+test happen for the "sched_wakeup" event.
+
+
+------------------------------------------
+ # trace-cmd report -w -F 'sched_switch, sched_wakeup.*'
+[...]
+ trace-cmd-16130 [003] 158131.580616: sched_wakeup: comm=trace-cmd pid=16131 prio=120 success=1 target_cpu=003
+ trace-cmd-16129 [000] 158131.581502: sched_switch: prev_comm=trace-cmd prev_pid=16129 prev_prio=120 prev_state=S ==> next_comm=trace-cmd next_pid=16131 next_prio=120 Latency: 885.901 usecs
+ trace-cmd-16131 [000] 158131.582414: sched_wakeup: comm=trace-cmd pid=16129 prio=120 success=1 target_cpu=000
+ trace-cmd-16132 [001] 158131.583219: sched_switch: prev_comm=trace-cmd prev_pid=16132 prev_prio=120 prev_state=S ==> next_comm=trace-cmd next_pid=16129 next_prio=120 Latency: 804.809 usecs
+ sleep-16133 [002] 158131.584121: sched_wakeup: comm=trace-cmd pid=16120 prio=120 success=1 target_cpu=002
+ trace-cmd-16129 [001] 158131.584128: sched_wakeup: comm=trace-cmd pid=16132 prio=120 success=1 target_cpu=001
+ sleep-16133 [002] 158131.584275: sched_switch: prev_comm=sleep prev_pid=16133 prev_prio=120 prev_state=R ==> next_comm=trace-cmd next_pid=16120 next_prio=120 Latency: 153.915 usecs
+ trace-cmd-16130 [003] 158131.585284: sched_switch: prev_comm=trace-cmd prev_pid=16130 prev_prio=120 prev_state=S ==> next_comm=trace-cmd next_pid=16132 next_prio=120 Latency: 1155.677 usecs
+
+Average wakeup latency: 26626.656 usecs
+
+
+------------------------------------------
+
+The above trace produces the wakeup latencies of the tasks. The "sched_switch"
+event reports each individual latency after writing the event information.
+At the end of the report, the average wakeup latency is reported.
+
+------------------------------------------
+ # trace-cmd report -w -F 'sched_switch, sched_wakeup.*: prio < 100 || next_prio < 100'
+ <idle>-0 [003] 158131.516753: sched_wakeup: comm=ksoftirqd/3 pid=13 prio=49 success=1 target_cpu=003
+ <idle>-0 [003] 158131.516855: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=ksoftirqd/3 next_pid=13 next_prio=49 Latency: 101.244 usecs
+ <idle>-0 [003] 158131.533781: sched_wakeup: comm=ksoftirqd/3 pid=13 prio=49 success=1 target_cpu=003
+ <idle>-0 [003] 158131.533897: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=ksoftirqd/3 next_pid=13 next_prio=49 Latency: 115.608 usecs
+ <idle>-0 [003] 158131.569730: sched_wakeup: comm=ksoftirqd/3 pid=13 prio=49 success=1 target_cpu=003
+ <idle>-0 [003] 158131.569851: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=ksoftirqd/3 next_pid=13 next_prio=49 Latency: 121.024 usecs
+
+Average wakeup latency: 110.021 usecs
+
+
+------------------------------------------
+
+The above version will only show the wakeups and context switches of Real Time
+tasks. The 'prio' used inside the kernel starts at 0 for highest priority.
+That is 'prio' 0 is equivalent to user space real time priority 99, and
+priority 98 is equivalent to user space real time priority 1.
+Prios less than 100 represent Real Time tasks.
+
+An example of the profile:
+------------------------------------------
+ # trace-cmd record --profile sleep 1
+ # trace-cmd report --profile --comm sleep
+task: sleep-21611
+ Event: sched_switch:R (1) Total: 99442 Avg: 99442 Max: 99442 Min:99442
+ <stack> 1 total:99442 min:99442 max:99442 avg=99442
+ => ftrace_raw_event_sched_switch (0xffffffff8105f812)
+ => __schedule (0xffffffff8150810a)
+ => preempt_schedule (0xffffffff8150842e)
+ => ___preempt_schedule (0xffffffff81273354)
+ => cpu_stop_queue_work (0xffffffff810b03c5)
+ => stop_one_cpu (0xffffffff810b063b)
+ => sched_exec (0xffffffff8106136d)
+ => do_execve_common.isra.27 (0xffffffff81148c89)
+ => do_execve (0xffffffff811490b0)
+ => SyS_execve (0xffffffff811492c4)
+ => return_to_handler (0xffffffff8150e3c8)
+ => stub_execve (0xffffffff8150c699)
+ Event: sched_switch:S (1) Total: 1000506680 Avg: 1000506680 Max: 1000506680 Min:1000506680
+ <stack> 1 total:1000506680 min:1000506680 max:1000506680 avg=1000506680
+ => ftrace_raw_event_sched_switch (0xffffffff8105f812)
+ => __schedule (0xffffffff8150810a)
+ => schedule (0xffffffff815084b8)
+ => do_nanosleep (0xffffffff8150b22c)
+ => hrtimer_nanosleep (0xffffffff8108d647)
+ => SyS_nanosleep (0xffffffff8108d72c)
+ => return_to_handler (0xffffffff8150e3c8)
+ => tracesys_phase2 (0xffffffff8150c304)
+ Event: sched_wakeup:21611 (1) Total: 30326 Avg: 30326 Max: 30326 Min:30326
+ <stack> 1 total:30326 min:30326 max:30326 avg=30326
+ => ftrace_raw_event_sched_wakeup_template (0xffffffff8105f653)
+ => ttwu_do_wakeup (0xffffffff810606eb)
+ => ttwu_do_activate.constprop.124 (0xffffffff810607c8)
+ => try_to_wake_up (0xffffffff8106340a)
+------------------------------------------
+
+SEE ALSO
+--------
+trace-cmd(1), trace-cmd-record(1), trace-cmd-start(1), trace-cmd-stop(1),
+trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1),
+trace-cmd-list(1), trace-cmd-listen(1), trace-cmd-profile(1)
+
+AUTHOR
+------
+Written by Steven Rostedt, <rostedt@goodmis.org>
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/
+
+COPYING
+-------
+Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under
+the terms of the GNU Public License (GPL).
+
diff --git a/Documentation/trace-cmd/trace-cmd-reset.1.txt b/Documentation/trace-cmd/trace-cmd-reset.1.txt
new file mode 100644
index 00000000..eee86751
--- /dev/null
+++ b/Documentation/trace-cmd/trace-cmd-reset.1.txt
@@ -0,0 +1,116 @@
+TRACE-CMD-RESET(1)
+==================
+
+NAME
+----
+trace-cmd-reset - turn off all Ftrace tracing to bring back full performance
+
+SYNOPSIS
+--------
+*trace-cmd reset* ['OPTIONS']
+
+DESCRIPTION
+-----------
+The trace-cmd(1) reset command turns off all tracing of Ftrace. This will
+bring back the performance of the system before tracing was enabled. This is
+necessary since 'trace-cmd-record(1)', 'trace-cmd-stop(1)' and
+'trace-cmd-extract(1)' do not disable the tracer, event after the data has
+been pulled from the buffers. The rational is that the user may want to
+manually enable the tracer with the Ftrace pseudo file system, or examine other
+parts of Ftrace to see what trace-cmd did. After the reset command happens,
+the data in the ring buffer, and the options that were used are all lost.
+
+OPTIONS
+-------
+Please note that the order that options are specified on the command line is
+significant. See EXAMPLES.
+
+*-b* 'buffer_size'::
+ When the kernel boots, the Ftrace ring buffer is of a minimal size (3
+ pages per CPU). The first time the tracer is used, the ring buffer size
+ expands to what it was set for (default 1.4 Megs per CPU).
+
+ If no more tracing is to be done, this option allows you to shrink the
+ ring buffer down to free up available memory.
+
+ trace-cmd reset -b 1
+
+ The buffer instance affected is the one (or ones) specified by the most
+ recently preceding *-B*, *-t*, or *-a* option:
+
+ When used after *-B*, resizes the buffer instance that precedes it on
+ the command line.
+
+ When used after *-a*, resizes all buffer instances except the top one.
+
+ When used after *-t* or before any *-B* or *-a*, resizes the top
+ instance.
+
+*-B* 'buffer-name'::
+ If the kernel supports multiple buffers, this will reset the trace for
+ only the given buffer. It does not affect any other buffer. This may be
+ used multiple times to specify different buffers. The top level buffer
+ will not be reset if this option is given (unless the *-t* option is
+ also supplied).
+
+*-a*::
+ Reset the trace for all existing buffer instances. When this option
+ is used, the top level instance will not be reset unless *-t* is given.
+
+*-d*::
+ This option deletes the instance buffer(s) specified by the most recently
+ preceding *-B* or *-a* option. Because the top-level instance buffer
+ cannot be deleted, it is invalid to use this immediatly following *-t* or
+ prior to any *-B* or *-a* option on the command line.
+
+*-t*::
+ Resets the top level instance buffer. Without the *-B* or *-a* option
+ this is the same as the default. But if *-B* or *-a* is used, this is
+ required if the top level instance buffer should also be reset.
+
+EXAMPLES
+--------
+
+Reset tracing for instance-one and set its per-cpu buffer size to 4096kb.
+Also deletes instance-two. The top level instance and any other instances
+remain unaffected:
+
+ trace-cmd reset -B instance-one -b 4096 -B instance-two -d
+
+Delete all instance buffers. Top level instance remains unaffected:
+
+ trace-cmd reset -a -d
+
+Delete all instance buffers and also reset the top instance:
+
+ trace-cmd reset -t -a -d
+
+Invalid. This command implies an attempt to delete the top instance:
+
+ trace-cmd reset -a -t -d
+
+Reset the top instance and set its per-cpu buffer size to 1024kb. If any
+instance buffers exist, they will be unaffected:
+
+ trace-cmd reset -b 1024
+
+
+SEE ALSO
+--------
+trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1),
+trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-split(1),
+trace-cmd-list(1), trace-cmd-listen(1)
+
+AUTHOR
+------
+Written by Steven Rostedt, <rostedt@goodmis.org>
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/
+
+COPYING
+-------
+Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under
+the terms of the GNU Public License (GPL).
+
diff --git a/Documentation/trace-cmd/trace-cmd-restore.1.txt b/Documentation/trace-cmd/trace-cmd-restore.1.txt
new file mode 100644
index 00000000..ebcbb1b6
--- /dev/null
+++ b/Documentation/trace-cmd/trace-cmd-restore.1.txt
@@ -0,0 +1,105 @@
+TRACE-CMD-RESTORE(1)
+====================
+
+NAME
+----
+trace-cmd-restore - restore a failed trace record
+
+SYNOPSIS
+--------
+*trace-cmd restore* ['OPTIONS'] ['command'] cpu-file [cpu-file ...]
+
+DESCRIPTION
+-----------
+The trace-cmd(1) restore command will restore a crashed trace-cmd-record(1)
+file. If for some reason a trace-cmd record fails, it will leave a the
+per-cpu data files and not create the final trace.dat file. The trace-cmd
+restore will append the files to create a working trace.dat file that can
+be read with trace-cmd-report(1).
+
+When trace-cmd record runs, it spawns off a process per CPU and writes
+to a per cpu file usually called 'trace.dat.cpuX', where X represents the
+CPU number that it is tracing. If the -o option was used in the trace-cmd
+record, then the CPU data files will have that name instead of the
+'trace.dat' name. If a unexpected crash occurs before the tracing
+is finished, then the per CPU files will still exist but there will
+not be any trace.dat file to read from. trace-cmd restore will allow you
+to create a trace.dat file with the existing data files.
+
+OPTIONS
+-------
+*-c*::
+ Create a partial trace.dat file from the machine, to be used with
+ a full trace-cmd restore at another time. This option is useful for
+ embedded devices. If a server contains the cpu files of a crashed
+ trace-cmd record (or trace-cmd listen), trace-cmd restore can be
+ executed on the embedded device with the -c option to get all the
+ stored information of that embedded device. Then the file created
+ could be copied to the server to run the trace-cmd restore there
+ with the cpu files.
+
+ If *-o* is not specified, then the file created will be called
+ 'trace-partial.dat'. This is because the file is not a full version
+ of something that trace-cmd-report(1) could use.
+
+*-t* tracing_dir::
+ Used with *-c*, it overrides the location to read the events from.
+ By default, tracing information is read from the debugfs/tracing
+ directory. *-t* will use that location instead. This can be useful
+ if the trace.dat file to create is from another machine.
+ Just tar -cvf events.tar debugfs/tracing and copy and untar that
+ file locally, and use that directory instead.
+
+*-k* kallsyms::
+ Used with *-c*, it overrides where to read the kallsyms file from.
+ By default, /proc/kallsyms is used. *-k* will override the file to
+ read the kallsyms from. This can be useful if the trace.dat file
+ to create is from another machine. Just copy the /proc/kallsyms
+ file locally, and use *-k* to point to that file.
+
+*-o* output'::
+ By default, trace-cmd restore will create a 'trace.dat' file
+ (or 'trace-partial.dat' if *-c* is specified). You can
+ specify a different file to write to with the *-o* option.
+
+*-i* input::
+ By default, trace-cmd restore will read the information of the
+ current system to create the initial data stored in the 'trace.dat'
+ file. If the crash was on another machine, then that machine should
+ have the trace-cmd restore run with the *-c* option to create the
+ trace.dat partial file. Then that file can be copied to the current
+ machine where trace-cmd restore will use *-i* to load that file
+ instead of reading from the current system.
+
+EXAMPLES
+--------
+
+If a crash happened on another box, you could run:
+
+ $ trace-cmd restore -c -o box-partial.dat
+
+Then on the server that has the cpu files:
+
+ $ trace-cmd restore -i box-partial.dat trace.dat.cpu0 trace.dat.cpu1
+
+This would create a trace.dat file for the embedded box.
+
+SEE ALSO
+--------
+trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1),
+trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1),
+trace-cmd-list(1), trace-cmd-listen(1)
+
+AUTHOR
+------
+Written by Steven Rostedt, <rostedt@goodmis.org>
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/
+
+COPYING
+-------
+Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under
+the terms of the GNU Public License (GPL).
+
diff --git a/Documentation/trace-cmd/trace-cmd-set.1.txt b/Documentation/trace-cmd/trace-cmd-set.1.txt
new file mode 100644
index 00000000..a182d191
--- /dev/null
+++ b/Documentation/trace-cmd/trace-cmd-set.1.txt
@@ -0,0 +1,273 @@
+TRACE-CMD-SET(1)
+================
+
+NAME
+----
+trace-cmd-set - set a configuration parameter of the Ftrace Linux internal tracer
+
+SYNOPSIS
+--------
+*trace-cmd set* ['OPTIONS'] ['command']
+
+DESCRIPTION
+-----------
+The trace-cmd(1) set command will set a configuration parameter of the Ftrace
+Linux kernel tracer. The specified *command* will be run after the ftrace state
+is set. The configured ftrace state can be restored to default
+using the trace-cmd-reset(1) command.
+
+OPTIONS
+-------
+*-p* 'tracer'::
+ Specify a tracer. Tracers usually do more than just trace an event.
+ Common tracers are: *function*, *function_graph*, *preemptirqsoff*,
+ *irqsoff*, *preemptoff* and *wakeup*. A tracer must be supported by the
+ running kernel. To see a list of available tracers, see trace-cmd-list(1).
+
+*-e* 'event'::
+ Specify an event to trace. Various static trace points have been added to
+ the Linux kernel. They are grouped by subsystem where you can enable all
+ events of a given subsystem or specify specific events to be enabled. The
+ 'event' is of the format "subsystem:event-name". You can also just specify
+ the subsystem without the ':event-name' or the event-name without the
+ "subsystem:". Using "-e sched_switch" will enable the "sched_switch" event
+ where as, "-e sched" will enable all events under the "sched" subsystem.
+
+ The 'event' can also contain glob expressions. That is, "*stat*" will
+ select all events (or subsystems) that have the characters "stat" in their
+ names.
+
+ The keyword 'all' can be used to enable all events.
+
+*-T*::
+ Enable a stacktrace on each event. For example:
+
+ <idle>-0 [003] 58549.289091: sched_switch: kworker/0:1:0 [120] R ==> trace-cmd:2603 [120]
+ <idle>-0 [003] 58549.289092: kernel_stack: <stack trace>
+=> schedule (ffffffff814b260e)
+=> cpu_idle (ffffffff8100a38c)
+=> start_secondary (ffffffff814ab828)
+
+*--func-stack*::
+ Enable a stack trace on all functions. Note this is only applicable
+ for the "function" plugin tracer, and will only take effect if the
+ -l option is used and succeeds in limiting functions. If the function
+ tracer is not filtered, and the stack trace is enabled, you can live
+ lock the machine.
+
+*-f* 'filter'::
+ Specify a filter for the previous event. This must come after a *-e*. This
+ will filter what events get recorded based on the content of the event.
+ Filtering is passed to the kernel directly so what filtering is allowed
+ may depend on what version of the kernel you have. Basically, it will
+ let you use C notation to check if an event should be processed or not.
+
+----------------------------------------
+ ==, >=, <=, >, <, &, |, && and ||
+----------------------------------------
+
+ The above are usually safe to use to compare fields.
+
+*-R* 'trigger'::
+ Specify a trigger for the previous event. This must come after a *-e*.
+ This will add a given trigger to the given event. To only enable the trigger
+ and not the event itself, then place the event after the *-v* option.
+
+ See Documentation/trace/events.txt in the Linux kernel source for more
+ information on triggers.
+
+*-v*::
+ This will negate options specified after it on the command line. It affects:
+[verse]
+--
+ *-e*: Causes all specified events to not be traced. This is useful for
+ selecting a subsystem to be traced but to leave out various events.
+ For example: "-e sched -v -e "\*stat\*"" will enable all events in
+ the sched subsystem except those that have "stat" in their names.
+ *-B*: Deletes the specified ftrace instance. There must be no
+ configuration options related to this instance in the command line.
+ For example: "-v -B bar -B foo" will delete instance bar and create
+ a new instance foo.
+ Note: the *-v* option was taken from the way grep(1) inverts the following
+ matches.
+--
+*-P* 'pid'::
+ This will filter only the specified process IDs. Using *-P* will let you
+ trace only events that are caused by the process.
+
+*-c*::
+ Used *-P* to trace the process' children too (if kernel supports it).
+
+*--user*::
+ Execute the specified *command* as given user.
+
+*-C* 'clock'::
+ Set the trace clock to "clock".
+
+ Use trace-cmd(1) list -C to see what clocks are available.
+
+*-l* 'function-name'::
+ This will limit the 'function' and 'function_graph' tracers to only trace
+ the given function name. More than one *-l* may be specified on the
+ command line to trace more than one function. The limited use of glob
+ expressions are also allowed. These are 'match\*' to only filter functions
+ that start with 'match'. '\*match' to only filter functions that end with
+ 'match'. '\*match\*' to only filter on functions that contain 'match'.
+
+*-g* 'function-name'::
+ This option is for the function_graph plugin. It will graph the given
+ function. That is, it will only trace the function and all functions that
+ it calls. You can have more than one *-g* on the command line.
+
+*-n* 'function-name'::
+ This has the opposite effect of *-l*. The function given with the *-n*
+ option will not be traced. This takes precedence, that is, if you include
+ the same function for both *-n* and *-l*, it will not be traced.
+
+*-d*::
+ Some tracer plugins enable the function tracer by default. Like the
+ latency tracers. This option prevents the function tracer from being
+ enabled at start up.
+
+*-D*::
+ The option *-d* will try to use the function-trace option to disable the
+ function tracer (if available), otherwise it defaults to the proc file:
+ /proc/sys/kernel/ftrace_enabled, but will not touch it if the function-trace
+ option is available. The *-D* option will disable both the ftrace_enabled
+ proc file as well as the function-trace option if it exists.
+
+ Note, this disable function tracing for all users, which includes users
+ outside of ftrace tracers (stack_tracer, perf, etc).
+
+*-O* 'option'::
+ Ftrace has various options that can be enabled or disabled. This allows
+ you to set them. Appending the text 'no' to an option disables it.
+ For example: "-O nograph-time" will disable the "graph-time" Ftrace
+ option.
+
+*-b* 'size'::
+ This sets the ring buffer size to 'size' kilobytes. Because the Ftrace
+ ring buffer is per CPU, this size is the size of each per CPU ring buffer
+ inside the kernel. Using "-b 10000" on a machine with 4 CPUs will make
+ Ftrace have a total buffer size of 40 Megs.
+
+*-B* 'buffer-name'::
+ If the kernel supports multiple buffers, this will add a buffer with
+ the given name. If the buffer name already exists, that buffer is just
+ reset.
+
+ After a buffer name is stated, all events added after that will be
+ associated with that buffer. If no buffer is specified, or an event
+ is specified before a buffer name, it will be associated with the
+ main (toplevel) buffer.
+
+ trace-cmd set -e sched -B block -e block -B time -e timer sleep 1
+
+ The above is will enable all sched events in the main buffer. It will
+ then create a 'block' buffer instance and enable all block events within
+ that buffer. A 'time' buffer instance is created and all timer events
+ will be enabled for that event.
+
+*-m* 'size'::
+ The max size in kilobytes that a per cpu buffer should be. Note, due
+ to rounding to page size, the number may not be totally correct.
+ Also, this is performed by switching between two buffers that are half
+ the given size thus the output may not be of the given size even if
+ much more was written.
+
+ Use this to prevent running out of diskspace for long runs.
+
+*-M* 'cpumask'::
+ Set the cpumask for to trace. It only affects the last buffer instance
+ given. If supplied before any buffer instance, then it affects the
+ main buffer. The value supplied must be a hex number.
+
+ trace-cmd set -p function -M c -B events13 -e all -M 5
+
+ If the -M is left out, then the mask stays the same. To enable all
+ CPUs, pass in a value of '-1'.
+
+*-i*::
+ By default, if an event is listed that trace-cmd does not find, it
+ will exit with an error. This option will just ignore events that are
+ listed on the command line but are not found on the system.
+
+*-q* | *--quiet*::
+ Suppresses normal output, except for errors.
+
+*--max-graph-depth* 'depth'::
+ Set the maximum depth the function_graph tracer will trace into a function.
+ A value of one will only show where userspace enters the kernel but not any
+ functions called in the kernel. The default is zero, which means no limit.
+
+*--cmdlines-size* 'size'::
+ Set the number of entries the kernel tracing file "saved_cmdlines" can
+ contain. This file is a circular buffer which stores the mapping between
+ cmdlines and PIDs. If full, it leads to unresolved cmdlines ("<...>") within
+ the trace. The kernel default value is 128.
+
+*--module* 'module'::
+ Filter a module's name in function tracing. It is equivalent to adding
+ ':mod:module' after all other functions being filtered. If no other function
+ filter is listed, then all modules functions will be filtered in the filter.
+
+ '--module snd' is equivalent to '-l :mod:snd'
+
+ '--module snd -l "*jack*"' is equivalent to '-l "*jack*:mod:snd"'
+
+ '--module snd -n "*"' is equivalent to '-n :mod:snd'
+
+*--stderr*::
+ Have output go to stderr instead of stdout, but the output of the command
+ executed will not be changed. This is useful if you want to monitor the
+ output of the command being executed, but not see the output from trace-cmd.
+
+*--fork*::
+ If a command is listed, then trace-cmd will wait for that command to finish,
+ unless the *--fork* option is specified. Then it will fork the command and
+ return immediately.
+
+*--verbose*[='level']::
+ Set the log level. Supported log levels are "none", "critical", "error", "warning",
+ "info", "debug", "all" or their identifiers "0", "1", "2", "3", "4", "5", "6". Setting the log
+ level to specific value enables all logs from that and all previous levels.
+ The level will default to "info" if one is not specified.
+
+ Example: enable all critical, error and warning logs
+
+ trace-cmd set --verbose=warning
+
+EXAMPLES
+--------
+
+Enable all events for tracing:
+
+------------------------------
+ # trace-cmd set -e all
+------------------------------
+
+Set the function tracer:
+
+------------------------------
+ # trace-cmd set -p function
+------------------------------
+
+
+SEE ALSO
+--------
+trace-cmd(1), trace-cmd-report(1), trace-cmd-start(1), trace-cmd-stop(1),
+trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1),
+trace-cmd-list(1), trace-cmd-listen(1), trace-cmd-profile(1)
+
+AUTHOR
+------
+Written by Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/
+
+COPYING
+-------
+Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under
+the terms of the GNU Public License (GPL).
diff --git a/Documentation/trace-cmd/trace-cmd-show.1.txt b/Documentation/trace-cmd/trace-cmd-show.1.txt
new file mode 100644
index 00000000..ea2fda28
--- /dev/null
+++ b/Documentation/trace-cmd/trace-cmd-show.1.txt
@@ -0,0 +1,100 @@
+TRACE-CMD-SHOW(1)
+=================
+
+NAME
+----
+trace-cmd-show - show the contents of the Ftrace Linux kernel tracing buffer.
+
+SYNOPSIS
+--------
+*trace-cmd show* ['OPTIONS']
+
+DESCRIPTION
+-----------
+The trace-cmd(1) show displays the contents of one of the Ftrace Linux
+kernel tracing files: trace, snapshot, or trace_pipe. It is basically
+the equivalent of doing:
+
+ cat /sys/kernel/debug/tracing/trace
+
+OPTIONS
+-------
+*-p*::
+ Instead of displaying the contents of the "trace" file, use the
+ "trace_pipe" file. The difference between the two is that the "trace"
+ file is static. That is, if tracing is stopped, the "trace" file
+ will show the same contents each time.
+
+ The "trace_pipe" file is a consuming read, where a read of the file
+ will consume the output of what was read and it will not read the
+ same thing a second time even if tracing is stopped. This file
+ als will block. If no data is available, trace-cmd show will stop
+ and wait for data to appear.
+
+*-s*::
+ Instead of reading the "trace" file, read the snapshot file. The snapshot
+ is made by an application writing into it and the kernel will perform
+ as swap between the currently active buffer and the current snapshot
+ buffer. If no more swaps are made, the snapshot will remain static.
+ This is not a consuming read.
+
+*-c* 'cpu'::
+ Read only the trace file for a specified CPU.
+
+*-f*::
+ Display the full path name of the file that is being displayed.
+
+*-B* 'buf'::
+ If a buffer instance was created, then the *-B* option will access the
+ files associated with the given buffer.
+
+*--tracing_on*::
+ Show if tracing is on for the given instance.
+
+*--current_tracer*::
+ Show what the current tracer is.
+
+*--buffer_size*::
+ Show the current buffer size (per-cpu)
+
+*--buffer_total_size*::
+ Show the total size of all buffers.
+
+*--ftrace_filter*::
+ Show what function filters are set.
+
+*--ftrace_notrace*::
+ Show what function disabled filters are set.
+
+*--ftrace_pid*::
+ Show the PIDs the function tracer is limited to (if any).
+
+*--graph_function*::
+ Show the functions that will be graphed.
+
+*--graph_notrace*::
+ Show the functions that will not be graphed.
+
+*--cpumask*::
+ Show the mask of CPUs that tracing will trace.
+
+
+SEE ALSO
+--------
+trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1),
+trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1),
+trace-cmd-list(1), trace-cmd-listen(1)
+
+AUTHOR
+------
+Written by Steven Rostedt, <rostedt@goodmis.org>
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/
+
+COPYING
+-------
+Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under
+the terms of the GNU Public License (GPL).
+
diff --git a/Documentation/trace-cmd/trace-cmd-snapshot.1.txt b/Documentation/trace-cmd/trace-cmd-snapshot.1.txt
new file mode 100644
index 00000000..0a34bcd9
--- /dev/null
+++ b/Documentation/trace-cmd/trace-cmd-snapshot.1.txt
@@ -0,0 +1,65 @@
+TRACE-CMD-SNAPSHOT(1)
+=====================
+
+NAME
+----
+trace-cmd-snapshot - take, reset, free, or show a Ftrace kernel snapshot
+
+SYNOPSIS
+--------
+*trace-cmd snapshot* ['OPTIONS']
+
+DESCRIPTION
+-----------
+The trace-cmd(1) snapshot controls or displays the Ftrace Linux kernel
+snapshot feature (if the kernel supports it). This is useful to "freeze"
+an instance of a live trace but without stopping the trace.
+
+ trace-cmd start -p function
+ trace-cmd snapshot -s
+ trace-cmd snapshot
+ [ dumps the content of buffer at 'trace-cmd snapshot -s' ]
+ trace-cmd snapshot -s
+ trace-cmd snapshot
+ [ dumps the new content of the buffer at the last -s operation ]
+
+OPTIONS
+-------
+*-s*::
+ Take a snapshot of the currently running buffer.
+
+*-r*::
+ Clear out the buffer.
+
+*-f*::
+ Free the snapshot buffer. The buffer takes up memory inside the
+ kernel. It is best to free it when not in use. The first -s
+ operation will allocate it if it is not already allocated.
+
+*-c* 'cpu'::
+ Operate on a per cpu snapshot (may not be fully supported by all kernels)
+
+*-B* 'buf'::
+ If a buffer instance was created, then the *-B* option will operate on
+ the snapshot within the buffer.
+
+
+SEE ALSO
+--------
+trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1),
+trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1),
+trace-cmd-list(1), trace-cmd-listen(1)
+
+AUTHOR
+------
+Written by Steven Rostedt, <rostedt@goodmis.org>
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/
+
+COPYING
+-------
+Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under
+the terms of the GNU Public License (GPL).
+
diff --git a/Documentation/trace-cmd/trace-cmd-split.1.txt b/Documentation/trace-cmd/trace-cmd-split.1.txt
new file mode 100644
index 00000000..25385796
--- /dev/null
+++ b/Documentation/trace-cmd/trace-cmd-split.1.txt
@@ -0,0 +1,107 @@
+TRACE-CMD-SPLIT(1)
+==================
+
+NAME
+----
+trace-cmd-split - split a trace.dat file into smaller files
+
+SYNOPSIS
+--------
+*trace-cmd split* ['OPTIONS'] ['start-time' ['end-time']]
+
+DESCRIPTION
+-----------
+The trace-cmd(1) split is used to break up a trace.dat into small files.
+The 'start-time' specifies where the new file will start at. Using
+'trace-cmd-report(1)' and copying the time stamp given at a particular event,
+can be used as input for either 'start-time' or 'end-time'. The split will
+stop creating files when it reaches an event after 'end-time'. If only the
+end-time is needed, use 0.0 as the start-time.
+
+If start-time is left out, then the split will start at the beginning of the
+file. If end-time is left out, then split will continue to the end unless it
+meets one of the requirements specified by the options.
+
+OPTIONS
+-------
+*-i* 'file'::
+ If this option is not specified, then the split command will look for the
+ file named 'trace.dat'. This options will allow the reading of another
+ file other than 'trace.dat'.
+
+*-o* 'file'::
+ By default, the split command will use the input file name as a basis of
+ where to write the split files. The output file will be the input file
+ with an attached \'.#\' to the end: trace.dat.1, trace.dat.2, etc.
+
+ This option will change the name of the base file used.
+
+ -o file will create file.1, file.2, etc.
+
+*-s* 'seconds'::
+ This specifies how many seconds should be recorded before the new file
+ should stop.
+
+*-m* 'milliseconds'::
+ This specifies how many milliseconds should be recorded before the new
+ file should stop.
+
+*-u* 'microseconds'::
+ This specifies how many microseconds should be recorded before the new
+ file should stop.
+
+*-e* 'events'::
+ This specifies how many events should be recorded before the new file
+ should stop.
+
+*-p* 'pages'::
+ This specifies the number of pages that should be recorded before the new
+ file should stop.
+
+ Note: only one of *-p*, *-e*, *-u*, *-m*, *-s* may be specified at a time.
+
+ If *-p* is specified, then *-c* is automatically set.
+
+*-r*::
+ This option causes the break up to repeat until end-time is reached (or
+ end of the input if end-time is not specified).
+
+ trace-cmd split -r -e 10000
+
+ This will break up trace.dat into several smaller files, each with at most
+ 10,000 events in it.
+
+*-c*::
+ This option causes the above break up to be per CPU.
+
+ trace-cmd split -c -p 10
+
+ This will create a file that has 10 pages per each CPU from the input.
+
+*-C* 'cpu'::
+ This option will split for a single CPU. Only the cpu named will be extracted
+ from the file.
+
+ trace-cmd split -C 1
+
+ This will split out all the events for cpu 1 in the file.
+
+SEE ALSO
+--------
+trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1),
+trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1),
+trace-cmd-list(1), trace-cmd-listen(1)
+
+AUTHOR
+------
+Written by Steven Rostedt, <rostedt@goodmis.org>
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/
+
+COPYING
+-------
+Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under
+the terms of the GNU Public License (GPL).
+
diff --git a/Documentation/trace-cmd/trace-cmd-stack.1.txt b/Documentation/trace-cmd/trace-cmd-stack.1.txt
new file mode 100644
index 00000000..20752407
--- /dev/null
+++ b/Documentation/trace-cmd/trace-cmd-stack.1.txt
@@ -0,0 +1,50 @@
+TRACE-CMD-STACK(1)
+==================
+
+NAME
+----
+trace-cmd-stack - read, enable or disable Ftrace Linux kernel stack tracing.
+
+SYNOPSIS
+--------
+*trace-cmd stack*
+
+DESCRIPTION
+-----------
+The trace-cmd(1) stack enables the Ftrace stack tracer within the kernel.
+The stack tracer enables the function tracer and at each function call
+within the kernel, the stack is checked. When a new maximum usage stack
+is discovered, it is recorded.
+
+When no option is used, the current stack is displayed.
+
+To enable the stack tracer, use the option *--start*, and to disable
+the stack tracer, use the option *--stop*. The output will be the maximum
+stack found since the start was enabled.
+
+Use *--reset* to reset the stack counter to zero.
+
+User *--verbose*[='level'] to set the log level. Supported log levels are "none", "critical", "error",
+"warning", "info", "debug", "all" or their identifiers "0", "1", "2", "3", "4", "5", "6". Setting
+the log level to specific value enables all logs from that and all previous levels. The level will
+default to "info" if one is not specified.
+
+SEE ALSO
+--------
+trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1),
+trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1),
+trace-cmd-list(1), trace-cmd-listen(1)
+
+AUTHOR
+------
+Written by Steven Rostedt, <rostedt@goodmis.org>
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/
+
+COPYING
+-------
+Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under
+the terms of the GNU Public License (GPL).
+
diff --git a/Documentation/trace-cmd/trace-cmd-start.1.txt b/Documentation/trace-cmd/trace-cmd-start.1.txt
new file mode 100644
index 00000000..03c5d127
--- /dev/null
+++ b/Documentation/trace-cmd/trace-cmd-start.1.txt
@@ -0,0 +1,51 @@
+TRACE-CMD-START(1)
+==================
+
+NAME
+----
+trace-cmd-start - start the Ftrace Linux kernel tracer without recording
+
+SYNOPSIS
+--------
+*trace-cmd start* ['OPTIONS']
+
+DESCRIPTION
+-----------
+The trace-cmd(1) start enables all the Ftrace tracing the same way
+trace-cmd-record(1) does. The difference is that it does not run threads to
+create a trace.dat file. This is useful just to enable Ftrace and you are only
+interested in the trace after some event has occurred and the trace is
+stopped. Then the trace can be read straight from the Ftrace pseudo file
+system or can be extracted with trace-cmd-extract(1).
+
+OPTIONS
+-------
+The options are the same as 'trace-cmd-record(1)', except that it does not
+take options specific to recording (*-s*, *-o*, *-N*, and *-t*).
+
+*--fork* ::
+ This option is only available for trace-cmd start. It tells trace-cmd
+ to not wait for the process to finish before returning.
+ With this option, trace-cmd start will return right after it forks
+ the process on the command line. This option only has an effect if
+ trace-cmd start also executes a command.
+
+SEE ALSO
+--------
+trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-stop(1),
+trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1),
+trace-cmd-list(1), trace-cmd-listen(1)
+
+AUTHOR
+------
+Written by Steven Rostedt, <rostedt@goodmis.org>
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/
+
+COPYING
+-------
+Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under
+the terms of the GNU Public License (GPL).
+
diff --git a/Documentation/trace-cmd/trace-cmd-stat.1.txt b/Documentation/trace-cmd/trace-cmd-stat.1.txt
new file mode 100644
index 00000000..fb800f91
--- /dev/null
+++ b/Documentation/trace-cmd/trace-cmd-stat.1.txt
@@ -0,0 +1,80 @@
+TRACE-CMD-STAT(1)
+=================
+
+NAME
+----
+trace-cmd-stat - show the status of the tracing (ftrace) system
+
+SYNOPSIS
+--------
+*trace-cmd stat* ['OPTIONS']
+
+DESCRIPTION
+-----------
+The trace-cmd(1) stat displays the various status of the tracing (ftrace)
+system. The status that it shows is:
+
+*Instances:* List all configured ftrace instances.
+
+*Tracer:* if one of the tracers (like function_graph) is active. Otherwise
+ nothing is displayed.
+
+*Events:* Lists the events that are enable.
+
+*Event filters:* Shows any filters that are set for any events
+
+*Function filters:* Shows any filters for the function tracers
+
+*Graph functions:* Shows any functions that the function graph tracer should graph
+
+*Buffers:* Shows the trace buffer size if they have been expanded.
+ By default, tracing buffers are in a compressed format until they are used.
+ If they are compressed, the buffer display will not be shown.
+
+*Trace clock:* If the tracing clock is anything other than the default "local"
+ it will be displayed.
+
+*Trace CPU mask:* If not all available CPUs are in the tracing CPU mask, then
+ the tracing CPU mask will be displayed.
+
+*Trace max latency:* Shows the value of the trace max latency if it is other than zero.
+
+*Kprobes:* Shows any kprobes that are defined for tracing.
+
+*Uprobes:* Shows any uprobes that are defined for tracing.
+
+*Error log:* Dump the content of ftrace error_log file.
+
+OPTIONS
+-------
+*-B* 'buffer-name'::
+ Display the status of a given buffer instance. May be specified more than once
+ to display the status of multiple instances.
+
+*-t*::
+ If *-B* is also specified, show the status of the top level tracing directory
+ as well as the instance(s).
+
+*-o*::
+ Display the all the options along with their values. If they start with "no", then
+ the option is disabled.
+
+SEE ALSO
+--------
+trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1),
+trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1),
+trace-cmd-split(1), trace-cmd-listen(1)
+
+AUTHOR
+------
+Written by Steven Rostedt, <rostedt@goodmis.org>
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/
+
+COPYING
+-------
+Copyright \(C) 2014 Red Hat, Inc. Free use of this software is granted under
+the terms of the GNU Public License (GPL).
+
diff --git a/Documentation/trace-cmd/trace-cmd-stop.1.txt b/Documentation/trace-cmd/trace-cmd-stop.1.txt
new file mode 100644
index 00000000..313192c3
--- /dev/null
+++ b/Documentation/trace-cmd/trace-cmd-stop.1.txt
@@ -0,0 +1,63 @@
+TRACE-CMD-STOP(1)
+=================
+
+NAME
+----
+trace-cmd-stop - stop the Ftrace Linux kernel tracer from writing to the ring
+buffer.
+
+SYNOPSIS
+--------
+*trace-cmd stop* ['OPTIONS']
+
+DESCRIPTION
+-----------
+The trace-cmd(1) stop is a complement to 'trace-cmd-start(1)'. This will
+disable Ftrace from writing to the ring buffer. This does not stop the
+overhead that the tracing may incur. Only the updating of the ring buffer is
+disabled, the Ftrace tracing may still be inducing overhead.
+
+After stopping the trace, the 'trace-cmd-extract(1)' may strip out the data
+from the ring buffer and create a trace.dat file. The Ftrace pseudo file
+system may also be examined.
+
+To disable the tracing completely to remove the overhead it causes, use
+'trace-cmd-reset(1)'. But after a reset is performed, the data that has been
+recorded is lost.
+
+OPTIONS
+-------
+*-B* 'buffer-name'::
+ If the kernel supports multiple buffers, this will stop the trace for
+ only the given buffer. It does not affect any other buffer. This may be
+ used multiple times to specify different buffers. When this option is
+ used, the top level instance will not be stopped unless *-t* is given.
+
+*-a*::
+ Stop the trace for all existing buffer instances. When this option
+ is used, the top level instance will not be stopped unless *-t* is given.
+
+*-t*::
+ Stops the top level instance buffer. Without the *-B* or *-a* option this
+ is the same as the default. But if *-B* or *-a* is used, this is
+ required if the top level instance buffer should also be stopped.
+
+SEE ALSO
+--------
+trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1),
+trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1),
+trace-cmd-list(1), trace-cmd-listen(1)
+
+AUTHOR
+------
+Written by Steven Rostedt, <rostedt@goodmis.org>
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/
+
+COPYING
+-------
+Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under
+the terms of the GNU Public License (GPL).
+
diff --git a/Documentation/trace-cmd/trace-cmd-stream.1.txt b/Documentation/trace-cmd/trace-cmd-stream.1.txt
new file mode 100644
index 00000000..f83652b8
--- /dev/null
+++ b/Documentation/trace-cmd/trace-cmd-stream.1.txt
@@ -0,0 +1,50 @@
+TRACE-CMD-STREAM(1)
+===================
+
+NAME
+----
+trace-cmd-stream - stream a trace to stdout as it is happening
+
+SYNOPSIS
+--------
+*trace-cmd stream ['OPTIONS']* ['command']
+
+DESCRIPTION
+-----------
+The trace-cmd(1) stream will start tracing just like trace-cmd-record(1), except
+it will not record to a file and instead it will read the binary buffer
+as it is happening, convert it to a human readable format and write it to
+stdout.
+
+This is basically the same as trace-cmd-start(1) and then doing a trace-cmd-show(1)
+with the *-p* option. trace-cmd-stream is not as efficient as reading from the
+pipe file as most of the stream work is done in userspace. This is useful if
+it is needed to do the work mostly in userspace instead of the kernel, and stream
+also helps to debug trace-cmd-profile(1) which uses the stream code to perform
+the live data analysis for the profile.
+
+
+OPTIONS
+-------
+ These are the same as trace-cmd-record(1), except that it does not take
+ the *-o* option.
+
+SEE ALSO
+--------
+trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1),
+trace-cmd-stop(1), trace-cmd-reset(1), trace-cmd-split(1),
+trace-cmd-list(1), trace-cmd-listen(1)
+
+AUTHOR
+------
+Written by Steven Rostedt, <rostedt@goodmis.org>
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/
+
+COPYING
+-------
+Copyright \(C) 2014 Red Hat, Inc. Free use of this software is granted under
+the terms of the GNU Public License (GPL).
+
diff --git a/Documentation/trace-cmd/trace-cmd.1.txt b/Documentation/trace-cmd/trace-cmd.1.txt
new file mode 100644
index 00000000..7e161273
--- /dev/null
+++ b/Documentation/trace-cmd/trace-cmd.1.txt
@@ -0,0 +1,109 @@
+TRACE-CMD(1)
+============
+
+NAME
+----
+trace-cmd - interacts with Ftrace Linux kernel internal tracer
+
+SYNOPSIS
+--------
+*trace-cmd* 'COMMAND' ['OPTIONS']
+
+DESCRIPTION
+-----------
+The trace-cmd(1) command interacts with the Ftrace tracer that is built inside
+the Linux kernel. It interfaces with the Ftrace specific files found in the
+debugfs file system under the tracing directory. A 'COMMAND' must be
+specified to tell trace-cmd what to do.
+
+
+COMMANDS
+--------
+
+ record - record a live trace and write a trace.dat file to the
+ local disk or to the network.
+
+ set - set a ftrace configuration parameter.
+
+ report - reads a trace.dat file and converts the binary data to a
+ ASCII text readable format.
+
+ stream - Start tracing and read the output directly
+
+ profile - Start profiling and read the output directly
+
+ hist - show a histogram of the events.
+
+ stat - show tracing (ftrace) status of the running system
+
+ options - list the plugin options that are available to *report*
+
+ start - start the tracing without recording to a trace.dat file.
+
+ stop - stop tracing (only disables recording, overhead of tracer
+ is still in effect)
+
+ restart - restart tracing from a previous stop (only effects recording)
+
+ extract - extract the data from the kernel buffer and create a trace.dat
+ file.
+
+ show - display the contents of one of the Ftrace Linux kernel tracing files
+
+ reset - disables all tracing and gives back the system performance.
+ (clears all data from the kernel buffers)
+
+ clear - clear the content of the Ftrace ring buffers.
+
+ split - splits a trace.dat file into smaller files.
+
+ list - list the available plugins or events that can be recorded.
+
+ listen - open up a port to listen for remote tracing connections.
+
+ agent - listen on a vsocket for trace clients
+
+ setup-guest - create FIFOs for tracing guest VMs
+
+ restore - restore the data files of a crashed run of trace-cmd record
+
+ snapshot- take snapshot of running trace
+
+ stack - run and display the stack tracer
+
+ check-events - parse format strings for all trace events and return
+ whether all formats are parseable
+
+ convert - convert trace files
+
+ dump - read out the meta data from a trace file
+
+OPTIONS
+-------
+
+*-h*, --help::
+ Display the help text.
+
+Other options see the man page for the corresponding command.
+
+SEE ALSO
+--------
+trace-cmd-record(1), trace-cmd-report(1), trace-cmd-hist(1), trace-cmd-start(1),
+trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1),
+trace-cmd-restore(1), trace-cmd-stack(1), trace-cmd-convert(1),
+trace-cmd-split(1), trace-cmd-list(1), trace-cmd-listen(1),
+trace-cmd.dat(5), trace-cmd-check-events(1) trace-cmd-stat(1)
+
+AUTHOR
+------
+Written by Steven Rostedt, <rostedt@goodmis.org>
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/
+
+COPYING
+-------
+Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under
+the terms of the GNU Public License (GPL).
+
diff --git a/Documentation/trace-cmd/trace-cmd.dat.v6.5.txt b/Documentation/trace-cmd/trace-cmd.dat.v6.5.txt
new file mode 100644
index 00000000..8437b363
--- /dev/null
+++ b/Documentation/trace-cmd/trace-cmd.dat.v6.5.txt
@@ -0,0 +1,266 @@
+TRACE-CMD.DAT.v6(5)
+===================
+
+NAME
+----
+trace-cmd.dat.v6 - trace-cmd version 6 file format
+
+SYNOPSIS
+--------
+*trace-cmd.dat* ignore
+
+DESCRIPTION
+-----------
+The trace-cmd(1) utility produces a "trace.dat" file. The file may also
+be named anything depending if the user specifies a different output name,
+but it must have a certain binary format. The file is used
+by trace-cmd to save kernel traces into it and be able to extract
+the trace from it at a later point (see *trace-cmd-report(1)*).
+
+
+INITIAL FORMAT
+--------------
+
+ The first three bytes contain the magic value:
+
+ 0x17 0x08 0x44
+
+ The next 7 bytes contain the characters:
+
+ "tracing"
+
+ The next set of characters contain a null '\0' terminated string
+ that contains the version of the file:
+
+ "6\0"
+
+ The next 1 byte contains the flags for the file endianess:
+
+ 0 = little endian
+ 1 = big endian
+
+ The next byte contains the number of bytes per "long" value:
+
+ 4 - 32-bit long values
+ 8 - 64-bit long values
+
+ Note: This is the long size of the target's userspace. Not the
+ kernel space size.
+
+ [ Now all numbers are written in file defined endianess. ]
+
+ The next 4 bytes are a 32-bit word that defines what the traced
+ host machine page size was.
+
+HEADER INFO FORMAT
+------------------
+
+ Directly after the initial format comes information about the
+ trace headers recorded from the target box.
+
+ The next 12 bytes contain the string:
+
+ "header_page\0"
+
+ The next 8 bytes are a 64-bit word containing the size of the
+ page header information stored next.
+
+ The next set of data is of the size read from the previous 8 bytes,
+ and contains the data retrieved from debugfs/tracing/events/header_page.
+
+ Note: The size of the second field \fBcommit\fR contains the target
+ kernel long size. For example:
+
+ field: local_t commit; offset:8; \fBsize:8;\fR signed:1;
+
+ shows the kernel has a 64-bit long.
+
+ The next 13 bytes contain the string:
+
+ "header_event\0"
+
+ The next 8 bytes are a 64-bit word containing the size of the
+ event header information stored next.
+
+ The next set of data is of the size read from the previous 8 bytes
+ and contains the data retrieved from debugfs/tracing/events/header_event.
+
+ This data allows the trace-cmd tool to know if the ring buffer format
+ of the kernel made any changes.
+
+FTRACE EVENT FORMATS
+--------------------
+
+ Directly after the header information comes the information about
+ the Ftrace specific events. These are the events used by the Ftrace plugins
+ and are not enabled by the event tracing.
+
+ The next 4 bytes contain a 32-bit word of the number of Ftrace event
+ format files that are stored in the file.
+
+ For the number of times defined by the previous 4 bytes is the
+ following:
+
+ 8 bytes for the size of the Ftrace event format file.
+
+ The Ftrace event format file copied from the target machine:
+ debugfs/tracing/events/ftrace/<event>/format
+
+EVENT FORMATS
+-------------
+
+ Directly after the Ftrace formats comes the information about
+ the event layout.
+
+ The next 4 bytes are a 32-bit word containing the number of
+ event systems that are stored in the file. These are the
+ directories in debugfs/tracing/events excluding the \fBftrace\fR
+ directory.
+
+ For the number of times defined by the previous 4 bytes is the
+ following:
+
+ A null-terminated string containing the system name.
+
+ 4 bytes containing a 32-bit word containing the number
+ of events within the system.
+
+ For the number of times defined in the previous 4 bytes is the
+ following:
+
+ 8 bytes for the size of the event format file.
+
+ The event format file copied from the target machine:
+ debugfs/tracing/events/<system>/<event>/format
+
+KALLSYMS INFORMATION
+--------------------
+
+ Directly after the event formats comes the information of the mapping
+ of function addresses to the function names.
+
+ The next 4 bytes are a 32-bit word containing the size of the
+ data holding the function mappings.
+
+ The next set of data is of the size defined by the previous 4 bytes
+ and contains the information from the target machine's file:
+ /proc/kallsyms
+
+
+TRACE_PRINTK INFORMATION
+------------------------
+
+ If a developer used trace_printk() within the kernel, it may
+ store the format string outside the ring buffer.
+ This information can be found in:
+ debugfs/tracing/printk_formats
+
+ The next 4 bytes are a 32-bit word containing the size of the
+ data holding the printk formats.
+
+ The next set of data is of the size defined by the previous 4 bytes
+ and contains the information from debugfs/tracing/printk_formats.
+
+
+PROCESS INFORMATION
+-------------------
+
+ Directly after the trace_printk formats comes the information mapping
+ a PID to a process name.
+
+ The next 8 bytes contain a 64-bit word that holds the size of the
+ data mapping the PID to a process name.
+
+ The next set of data is of the size defined by the previous 8 bytes
+ and contains the information from debugfs/tracing/saved_cmdlines.
+
+
+REST OF TRACE-CMD HEADER
+------------------------
+
+ Directly after the process information comes the last bit of the
+ trace.dat file header.
+
+ The next 4 bytes are a 32-bit word defining the number of CPUs that
+ were discovered on the target machine (and has matching trace data
+ for it).
+
+ The next 10 bytes are one of the following:
+
+ "options \0"
+
+ "latency \0"
+
+ "flyrecord\0"
+
+ If it is "options \0" then:
+
+ The next 2 bytes are a 16-bit word defining the current option.
+ If the the value is zero then there are no more options.
+
+ Otherwise, the next 4 bytes contain a 32-bit word containing the
+ option size. If the reader does not know how to handle the option
+ it can simply skip it. Currently there are no options defined,
+ but this is here to extend the data.
+
+ The next option will be directly after the previous option, and
+ the options ends with a zero in the option type field.
+
+ The next 10 bytes after the options are one of the following:
+
+ "latency \0"
+
+ "flyrecord\0"
+
+ which would follow the same as if options were not present.
+
+ If the value is "latency \0", then the rest of the file is
+ simply ASCII text that was taken from the target's:
+ debugfs/tracing/trace
+
+ If the value is "flyrecord\0", the following is present:
+
+ For the number of CPUs that were read earlier, the
+ following is present:
+
+ 8 bytes that are a 64-bit word containing the offset into the file
+ that holds the data for the CPU.
+
+ 8 bytes that are a 64-bit word containing the size of the CPU
+ data at that offset.
+
+CPU DATA
+--------
+
+ The CPU data is located in the part of the file that is specified
+ in the end of the header. Padding is placed between the header and
+ the CPU data, placing the CPU data at a page aligned (target page) position
+ in the file.
+
+ This data is copied directly from the Ftrace ring buffer and is of the
+ same format as the ring buffer specified by the event header files
+ loaded in the header format file.
+
+ The trace-cmd tool will try to \fBmmap(2)\fR the data page by page with the
+ target's page size if possible. If it fails to mmap, it will just read the
+ data instead.
+
+SEE ALSO
+--------
+trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1),
+trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1),
+trace-cmd-split(1), trace-cmd-list(1), trace-cmd-listen(1),
+trace-cmd.dat(5)
+
+AUTHOR
+------
+Written by Steven Rostedt, <rostedt@goodmis.org>
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/
+
+COPYING
+-------
+Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under
+the terms of the GNU Public License (GPL).
diff --git a/Documentation/trace-cmd/trace-cmd.dat.v7.5.txt b/Documentation/trace-cmd/trace-cmd.dat.v7.5.txt
new file mode 100644
index 00000000..e5bcac76
--- /dev/null
+++ b/Documentation/trace-cmd/trace-cmd.dat.v7.5.txt
@@ -0,0 +1,451 @@
+TRACE-CMD.DAT.v7(5)
+===================
+
+NAME
+----
+trace-cmd.dat.v7 - trace-cmd version 7 file format
+
+SYNOPSIS
+--------
+*trace-cmd.dat* ignore
+
+DESCRIPTION
+-----------
+The trace-cmd(1) utility produces a "trace.dat" file. The file may also
+be named anything depending if the user specifies a different output name,
+but it must have a certain binary format. The file is used
+by trace-cmd to save kernel traces into it and be able to extract
+the trace from it at a later point (see *trace-cmd-report(1)*).
+
+
+INITIAL FORMAT
+--------------
+
+ The first three bytes contain the magic value:
+
+ 0x17 0x08 0x44
+
+ The next 7 bytes contain the characters:
+
+ "tracing"
+
+ The next set of characters contain a null '\0' terminated string
+ that contains the version of the file:
+
+ "7\0"
+
+ The next 1 byte contains the flags for the file endianess:
+
+ 0 = little endian
+ 1 = big endian
+
+ The next byte contains the number of bytes per "long" value:
+
+ 4 - 32-bit long values
+ 8 - 64-bit long values
+
+ Note: This is the long size of the target's user space. Not the
+ kernel space size.
+
+ [ Now all numbers are written in file defined endianess. ]
+
+ The next 4 bytes are a 32-bit word that defines what the traced
+ host machine page size was.
+
+ The compression algorithm header is written next:
+ "name\0version\0"
+ where "name" and "version" are strings, name and version of the
+ compression algorithm used to compress the trace file. If the name
+ is "none", the data in the file is not compressed.
+
+ The next 8 bytes are 64-bit integer, the offset within the file where
+ the first OPTIONS section is located.
+
+ The rest of the file consists of different sections. The only mandatory
+ is the first OPTIONS section, all others are optional. The location and
+ the order of the sections is not strict. Each section starts with a header:
+
+FORMAT OF THE SECTION HEADER
+----------------------------
+ <2 bytes> unsigned short integer, ID of the section.
+ <2 bytes> unsigned short integer, section flags:
+ 1 = the section is compressed.
+ <4 bytes> ID of a string, description of the section.
+ <4 bytes> unsigned integer, size of the section in the file.
+
+ If the section is compressed, the above is the compressed size.
+ The section must be uncompressed on reading. The described format of
+ the sections refers to the uncompressed data.
+
+COMPRESSION FORMAT OF THE FILE SECTIONS
+---------------------------------------
+
+ Some of the sections in the file may be compressed with the compression algorithm,
+ specified in the compression algorithm header. Compressed sections have a compression
+ header, written after the section header and right before the compressed data:
+ <4 bytes> unsigned int, size of compressed data in this section.
+ <4 bytes> unsigned int, size of uncompressed data.
+ <data> binary compressed data, with the specified size.
+
+COMPRESSION FORMAT OF THE TRACE DATA
+------------------------------------
+
+ There are two special sections, BUFFER FLYRECORD and BUFFER LATENCY, containing
+ trace data. These sections may be compressed with the compression algorithm, specified
+ in the compression header. Usually the size of these sections is huge, that's why its
+ compression format is different from the other sections. The trace data is compressed
+ in chunks The size of one chunk is specified in the file creation time. The format
+ of compressed trace data is:
+ <4 bytes> unsigned int, count of chunks.
+ Follows the compressed chunks of given count. For each chunk:
+ <4 bytes> unsigned int, size of compressed data in this chunk.
+ <4 bytes> unsigned int, size of uncompressed data, aligned with the trace page size.
+ <data> binary compressed data, with the specified size.
+ These chunks must be uncompressed on reading. The described format of
+ trace data refers to the uncompressed data.
+
+OPTIONS SECTION
+---------------
+
+ Section ID: 0
+
+ This is the the only mandatory section in the file. There can be multiple
+ options sections, the first one is located at the offset specified right
+ after the compression algorithm header. The section consists of multiple
+ trace options, each option has the following format:
+ <2 bytes> unsigned short integer, ID of the option.
+ <4 bytes> unsigned integer, size of the option's data.
+ <binary data> bytes of the size specified above, data of the option.
+
+
+ Options, supported by the trace file version 7:
+
+ DONE: id 0, size 8
+ This option indicates the end of the options section, it is written
+ always as last option. The DONE option data is:
+ <8 bytes> long long unsigned integer, offset in the trace file where
+ the next options section is located. If this offset is 0, then there
+ are no more options sections.
+
+ DATE: id 1, size vary
+ The DATE option data is a null terminated ASCII string, which represents
+ the time difference between trace events timestamps and the Generic Time
+ of Day of the system.
+
+ CPUSTAT: id 2, size vary
+ The CPUSTAT option data is a null terminated ASCII string, the content of the
+ "per_cpu/cpu<id>/stats" file from the trace directory. There is a CPUSTAT option
+ for each CPU.
+
+ BUFFER: id 3, size vary
+ The BUFFER option describes the flyrecord trace data saved in the file, collected
+ from one trace instance. There is BUFFER option for each trace instance. The format
+ of the BUFFER data is:
+ <8 bytes> long long unsigned integer, offset in the trace file where the
+ BUFFER FLYRECORD section is located, containing flyrecord trace data.
+ <string> a null terminated ASCII string, name of the trace instance. Empty string ""
+ is saved as name of the top instance.
+ <string> a null terminated ASCII string, trace clock used for events timestamps in
+ this trace instance.
+ <4 bytes> unsigned integer, size of the trace buffer page.
+ <4 bytes> unsigned integer, count of the CPUs with trace data.
+ For each CPU of the above count:
+ <4 bytes> unsigned integer, ID of the CPU.
+ <8 bytes> long long unsigned integer, offset in the trace file where the trace data
+ for this CPU is located.
+ <8 bytes> long long unsigned integer, size of the trace data for this CPU.
+
+ TRACECLOCK: id 4, size vary
+ The TRACECLOCK option data is a null terminated ASCII string, the content of the
+ "trace_clock" file from the trace directory.
+
+ UNAME: id 5, size vary
+ The UNAME option data is a null terminated ASCII string, identifying the system where
+ the trace data is collected. The string is retrieved by the uname() system call.
+
+ HOOK: id 6, size vary
+ The HOOK option data is a null terminated ASCII string, describing event hooks: custom
+ event matching to connect any two events together.
+
+ OFFSET: id 7, size vary
+ The OFFSET option data is a null terminated ASCII string, representing a fixed time that
+ is added to each event timestamp on reading.
+
+ CPUCOUNT: id 8, size 4
+ The CPUCOUNT option data is:
+ <4 bytes> unsigned integer, number of CPUs in the system.
+
+ VERSION: id 9, size vary
+ The VERSION option data is a null terminated ASCII string, representing the version of
+ the trace-cmd application, used to collect these trace logs.
+
+ PROCMAPS: id 10, size vary
+ The PROCMAPS option data is a null terminated ASCII string, representing the memory map
+ of each traced filtered process. The format of the string is, for each filtered process:
+ <procss ID> <libraries count> <process command> \n
+ <memory start address> <memory end address> <full path of the mapped library file> \n
+ ...
+ separate line for each library, used by this process
+ ...
+ ...
+
+ TRACEID: id 11, size 8
+ The TRACEID option data is a unique identifier of this tracing session:
+ <8 bytes> long long unsigned integer, trace session identifier.
+
+ TIME_SHIFT: id 12, size vary
+ The TIME_SHIFT option stores time synchronization information, collected during host and guest
+ tracing session. Usually it is saved in the guest trace file. This information is used to
+ synchronize guest with host events timestamps, when displaying all files from this tracing
+ session. The format of the TIME_SHIFT option data is:
+ <8 bytes> long long unsigned integer, trace identifier of the peer (usually the host).
+ <4 bytes> unsigned integer, flags specific to the time synchronization protocol, used in this
+ trace session.
+ <4 bytes> unsigned integer, number of traced CPUs. For each CPU, timestamps corrections
+ are recorded:
+ <4 bytes> unsigned integer, count of the recorded timestamps corrections for this CPU.
+ <array of unsigned long long integers of the above count>, times when the corrections are calculated
+ <array of unsigned long long integers of the above count>, corrections offsets
+ <array of unsigned long long integers of the above count>, corrections scaling ratio
+
+ GUEST: id 13, size vary
+ The GUEST option stores information about traced guests in this tracing session. Usually it is
+ saved in the host trace file. There is a separate GUEST option for each traced guest.
+ The information is used when displaying all files from this tracing session. The format of
+ the GUEST option data is:
+ <string> a null terminated ASCII string, name of the guest.
+ <8 bytes> long long unsigned integer, trace identifier of the guest for this session.
+ <4 bytes> unsigned integer, number of guest's CPUs. For each CPU:
+ <4 bytes> unsigned integer, ID of the CPU.
+ <4 bytes> unsigned integer, PID of the host task, emulating this guest CPU.
+
+ TSC2NSEC: id 14, size 16
+ The TSC2NSEC option stores information, used to convert TSC events timestamps to nanoseconds.
+ The format of the TSC2NSEC option data is:
+ <4 bytes> unsigned integer, time multiplier.
+ <4 bytes> unsigned integer, time shift.
+ <8 bytes> unsigned long long integer, time offset.
+
+ HEADER_INFO: id 16, size 8
+ The HEADER_INFO option data is:
+ <8 bytes> long long unsigned integer, offset into the trace file where the HEADER INFO
+ section is located
+
+ FTRACE_EVENTS: id 17, size 8
+ The FTRACE_EVENTS option data is:
+ <8 bytes> long long unsigned integer, offset into the trace file where the
+ FTRACE EVENT FORMATS section is located.
+
+ EVENT_FORMATS: id 18, size 8
+ The EVENT_FORMATS option data is:
+ <8 bytes> long long unsigned integer, offset into the trace file where the EVENT FORMATS
+ section is located.
+
+ KALLSYMS: id 19, size 8
+ The KALLSYMS option data is:
+ <8 bytes> long long unsigned integer, offset into the trace file where the KALLSYMS
+ section is located.
+
+ PRINTK: id 20, size 8
+ The PRINTK option data is:
+ <8 bytes> long long unsigned integer, offset into the trace file where the TRACE_PRINTK
+ section is located.
+
+ CMDLINES: id 21, size 8
+ The CMDLINES option data is:
+ <8 bytes> long long unsigned integer, offset into the trace file where the
+ SAVED COMMAND LINES section is located.
+
+ BUFFER_TEXT: id 22, size
+ The BUFFER_LAT option describes the latency trace data saved in the file. The format
+ of the BUFFER_LAT data is:
+ <8 bytes> long long unsigned integer, offset in the trace file where the
+ BUFFER LATENCY section is located, containing latency trace data.
+ <string> a null terminated ASCII string, name of the trace instance. Empty string ""
+ is saved as name of the top instance.
+ <string> a null terminated ASCII string, trace clock used for events timestamps in
+ this trace instance.
+
+
+HEADER INFO SECTION
+-------------------
+
+ Section ID: 16
+
+ The first 12 bytes of the section, after the section header, contain the string:
+
+ "header_page\0"
+
+ The next 8 bytes are a 64-bit word containing the size of the
+ page header information stored next.
+
+ The next set of data is of the size read from the previous 8 bytes,
+ and contains the data retrieved from debugfs/tracing/events/header_page.
+
+ Note: The size of the second field \fBcommit\fR contains the target
+ kernel long size. For example:
+
+ field: local_t commit; offset:8; \fBsize:8;\fR signed:1;
+
+ shows the kernel has a 64-bit long.
+
+ The next 13 bytes contain the string:
+
+ "header_event\0"
+
+ The next 8 bytes are a 64-bit word containing the size of the
+ event header information stored next.
+
+ The next set of data is of the size read from the previous 8 bytes
+ and contains the data retrieved from debugfs/tracing/events/header_event.
+
+ This data allows the trace-cmd tool to know if the ring buffer format
+ of the kernel made any changes.
+
+FTRACE EVENT FORMATS SECTION
+----------------------------
+
+ Section ID: 17
+
+ Directly after the section header comes the information about
+ the Ftrace specific events. These are the events used by the Ftrace plugins
+ and are not enabled by the event tracing.
+
+ The next 4 bytes contain a 32-bit word of the number of Ftrace event
+ format files that are stored in the file.
+
+ For the number of times defined by the previous 4 bytes is the
+ following:
+
+ 8 bytes for the size of the Ftrace event format file.
+
+ The Ftrace event format file copied from the target machine:
+ debugfs/tracing/events/ftrace/<event>/format
+
+EVENT FORMATS SECTION
+---------------------
+
+ Section ID: 18
+
+ Directly after the section header comes the information about
+ the event layout.
+
+ The next 4 bytes are a 32-bit word containing the number of
+ event systems that are stored in the file. These are the
+ directories in debugfs/tracing/events excluding the \fBftrace\fR
+ directory.
+
+ For the number of times defined by the previous 4 bytes is the
+ following:
+
+ A null-terminated string containing the system name.
+
+ 4 bytes containing a 32-bit word containing the number
+ of events within the system.
+
+ For the number of times defined in the previous 4 bytes is the
+ following:
+
+ 8 bytes for the size of the event format file.
+
+ The event format file copied from the target machine:
+ debugfs/tracing/events/<system>/<event>/format
+
+KALLSYMS SECTION
+----------------
+
+ Section ID: 19
+
+ Directly after the section header comes the information of the mapping
+ of function addresses to the function names.
+
+ The next 4 bytes are a 32-bit word containing the size of the
+ data holding the function mappings.
+
+ The next set of data is of the size defined by the previous 4 bytes
+ and contains the information from the target machine's file:
+ /proc/kallsyms
+
+
+TRACE_PRINTK SECTION
+--------------------
+
+ Section ID: 20
+
+ If a developer used trace_printk() within the kernel, it may
+ store the format string outside the ring buffer.
+ This information can be found in:
+ debugfs/tracing/printk_formats
+
+ The next 4 bytes are a 32-bit word containing the size of the
+ data holding the printk formats.
+
+ The next set of data is of the size defined by the previous 4 bytes
+ and contains the information from debugfs/tracing/printk_formats.
+
+
+SAVED COMMAND LINES SECTION
+---------------------------
+
+ Section ID: 21
+
+ Directly after the section header comes the information mapping
+ a PID to a process name.
+
+ The next 8 bytes contain a 64-bit word that holds the size of the
+ data mapping the PID to a process name.
+
+ The next set of data is of the size defined by the previous 8 bytes
+ and contains the information from debugfs/tracing/saved_cmdlines.
+
+
+BUFFER FLYRECORD SECTION
+------------------------
+
+ This section contains flyrecord tracing data, collected in one trace instance.
+ The data is saved per CPU. Each BUFFER FLYRECORD section has a corresponding BUFFER
+ option, containing information about saved CPU's trace data. Padding is placed between
+ the section header and the CPU data, placing the CPU data at a page aligned (target page)
+ position in the file.
+
+ This data is copied directly from the Ftrace ring buffer and is of the
+ same format as the ring buffer specified by the event header files
+ loaded in the header format file.
+
+ The trace-cmd tool will try to \fBmmap(2)\fR the data page by page with the
+ target's page size if possible. If it fails to mmap, it will just read the
+ data instead.
+
+BUFFER TEXT SECTION
+------------------------
+
+ This section contains latency tracing data, ASCII text taken from the
+ target's debugfs/tracing/trace file.
+
+STRINGS SECTION
+------------------------
+
+ All strings from trace file metadata are stored in string section in the file. The section
+ contains a list of NULL terminated ASCII strings. An ID of the string is used in the file
+ meta data, which is the offset of the actual string into the string section. Strings can be stored
+ into multiple string sections in the file.
+
+SEE ALSO
+--------
+trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1),
+trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1),
+trace-cmd-split(1), trace-cmd-list(1), trace-cmd-listen(1),
+trace-cmd.dat(5)
+
+AUTHOR
+------
+Written by Steven Rostedt, <rostedt@goodmis.org>
+
+RESOURCES
+---------
+https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/
+
+COPYING
+-------
+Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under
+the terms of the GNU Public License (GPL).
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 00000000..915311d1
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,865 @@
+Valid-License-Identifier: GPL-2.0
+Valid-License-Identifier: GPL-2.0-only
+Valid-License-Identifier: GPL-2.0+
+Valid-License-Identifier: GPL-2.0-or-later
+SPDX-URL: https://spdx.org/licenses/GPL-2.0.html
+Usage-Guide:
+ To use this license in source code, put one of the following SPDX
+ tag/value pairs into a comment according to the placement
+ guidelines in the licensing rules documentation.
+ For 'GNU General Public License (GPL) version 2 only' use:
+ SPDX-License-Identifier: GPL-2.0
+ or
+ SPDX-License-Identifier: GPL-2.0-only
+ For 'GNU General Public License (GPL) version 2 or any later version' use:
+ SPDX-License-Identifier: GPL-2.0+
+ or
+ SPDX-License-Identifier: GPL-2.0-or-later
+License-Text:
+
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
+
+-------------------------------------------------------------------------
+
+Valid-License-Identifier: LGPL-2.1
+Valid-License-Identifier: LGPL-2.1+
+SPDX-URL: https://spdx.org/licenses/LGPL-2.1.html
+Usage-Guide:
+ To use this license in source code, put one of the following SPDX
+ tag/value pairs into a comment according to the placement
+ guidelines in the licensing rules documentation.
+ For 'GNU Lesser General Public License (LGPL) version 2.1 only' use:
+ SPDX-License-Identifier: LGPL-2.1
+ For 'GNU Lesser General Public License (LGPL) version 2.1 or any later
+ version' use:
+ SPDX-License-Identifier: LGPL-2.1+
+License-Text:
+
+GNU LESSER GENERAL PUBLIC LICENSE
+Version 2.1, February 1999
+
+Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+Everyone is permitted to copy and distribute verbatim copies of this
+license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL. It also counts as
+the successor of the GNU Library Public License, version 2, hence the
+version number 2.1.]
+
+Preamble
+
+The licenses for most software are designed to take away your freedom to
+share and change it. By contrast, the GNU General Public Licenses are
+intended to guarantee your freedom to share and change free software--to
+make sure the software is free for all its users.
+
+This license, the Lesser General Public License, applies to some specially
+designated software packages--typically libraries--of the Free Software
+Foundation and other authors who decide to use it. You can use it too, but
+we suggest you first think carefully about whether this license or the
+ordinary General Public License is the better strategy to use in any
+particular case, based on the explanations below.
+
+When we speak of free software, we are referring to freedom of use, not
+price. Our General Public Licenses are designed to make sure that you have
+the freedom to distribute copies of free software (and charge for this
+service if you wish); that you receive source code or can get it if you
+want it; that you can change the software and use pieces of it in new free
+programs; and that you are informed that you can do these things.
+
+To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights. These restrictions translate to certain responsibilities for you if
+you distribute copies of the library or if you modify it.
+
+For example, if you distribute copies of the library, whether gratis or for
+a fee, you must give the recipients all the rights that we gave you. You
+must make sure that they, too, receive or can get the source code. If you
+link other code with the library, you must provide complete object files to
+the recipients, so that they can relink them with the library after making
+changes to the library and recompiling it. And you must show them these
+terms so they know their rights.
+
+We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+To protect each distributor, we want to make it very clear that there is no
+warranty for the free library. Also, if the library is modified by someone
+else and passed on, the recipients should know that what they have is not
+the original version, so that the original author's reputation will not be
+affected by problems that might be introduced by others.
+
+Finally, software patents pose a constant threat to the existence of any
+free program. We wish to make sure that a company cannot effectively
+restrict the users of a free program by obtaining a restrictive license
+from a patent holder. Therefore, we insist that any patent license obtained
+for a version of the library must be consistent with the full freedom of
+use specified in this license.
+
+Most GNU software, including some libraries, is covered by the ordinary GNU
+General Public License. This license, the GNU Lesser General Public
+License, applies to certain designated libraries, and is quite different
+from the ordinary General Public License. We use this license for certain
+libraries in order to permit linking those libraries into non-free
+programs.
+
+When a program is linked with a library, whether statically or using a
+shared library, the combination of the two is legally speaking a combined
+work, a derivative of the original library. The ordinary General Public
+License therefore permits such linking only if the entire combination fits
+its criteria of freedom. The Lesser General Public License permits more lax
+criteria for linking other code with the library.
+
+We call this license the "Lesser" General Public License because it does
+Less to protect the user's freedom than the ordinary General Public
+License. It also provides other free software developers Less of an
+advantage over competing non-free programs. These disadvantages are the
+reason we use the ordinary General Public License for many
+libraries. However, the Lesser license provides advantages in certain
+special circumstances.
+
+For example, on rare occasions, there may be a special need to encourage
+the widest possible use of a certain library, so that it becomes a de-facto
+standard. To achieve this, non-free programs must be allowed to use the
+library. A more frequent case is that a free library does the same job as
+widely used non-free libraries. In this case, there is little to gain by
+limiting the free library to free software only, so we use the Lesser
+General Public License.
+
+In other cases, permission to use a particular library in non-free programs
+enables a greater number of people to use a large body of free
+software. For example, permission to use the GNU C Library in non-free
+programs enables many more people to use the whole GNU operating system, as
+well as its variant, the GNU/Linux operating system.
+
+Although the Lesser General Public License is Less protective of the users'
+freedom, it does ensure that the user of a program that is linked with the
+Library has the freedom and the wherewithal to run that program using a
+modified version of the Library.
+
+The precise terms and conditions for copying, distribution and modification
+follow. Pay close attention to the difference between a "work based on the
+library" and a "work that uses the library". The former contains code
+derived from the library, whereas the latter must be combined with the
+library in order to run.
+
+TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+0. This License Agreement applies to any software library or other program
+ which contains a notice placed by the copyright holder or other
+ authorized party saying it may be distributed under the terms of this
+ Lesser General Public License (also called "this License"). Each
+ licensee is addressed as "you".
+
+ A "library" means a collection of software functions and/or data
+ prepared so as to be conveniently linked with application programs
+ (which use some of those functions and data) to form executables.
+
+ The "Library", below, refers to any such software library or work which
+ has been distributed under these terms. A "work based on the Library"
+ means either the Library or any derivative work under copyright law:
+ that is to say, a work containing the Library or a portion of it, either
+ verbatim or with modifications and/or translated straightforwardly into
+ another language. (Hereinafter, translation is included without
+ limitation in the term "modification".)
+
+ "Source code" for a work means the preferred form of the work for making
+ modifications to it. For a library, complete source code means all the
+ source code for all modules it contains, plus any associated interface
+ definition files, plus the scripts used to control compilation and
+ installation of the library.
+
+ Activities other than copying, distribution and modification are not
+ covered by this License; they are outside its scope. The act of running
+ a program using the Library is not restricted, and output from such a
+ program is covered only if its contents constitute a work based on the
+ Library (independent of the use of the Library in a tool for writing
+ it). Whether that is true depends on what the Library does and what the
+ program that uses the Library does.
+
+1. You may copy and distribute verbatim copies of the Library's complete
+ source code as you receive it, in any medium, provided that you
+ conspicuously and appropriately publish on each copy an appropriate
+ copyright notice and disclaimer of warranty; keep intact all the notices
+ that refer to this License and to the absence of any warranty; and
+ distribute a copy of this License along with the Library.
+
+ You may charge a fee for the physical act of transferring a copy, and
+ you may at your option offer warranty protection in exchange for a fee.
+
+2. You may modify your copy or copies of the Library or any portion of it,
+ thus forming a work based on the Library, and copy and distribute such
+ modifications or work under the terms of Section 1 above, provided that
+ you also meet all of these conditions:
+
+ a) The modified work must itself be a software library.
+
+ b) You must cause the files modified to carry prominent notices stating
+ that you changed the files and the date of any change.
+
+ c) You must cause the whole of the work to be licensed at no charge to
+ all third parties under the terms of this License.
+
+ d) If a facility in the modified Library refers to a function or a table
+ of data to be supplied by an application program that uses the
+ facility, other than as an argument passed when the facility is
+ invoked, then you must make a good faith effort to ensure that, in
+ the event an application does not supply such function or table, the
+ facility still operates, and performs whatever part of its purpose
+ remains meaningful.
+
+ (For example, a function in a library to compute square roots has a
+ purpose that is entirely well-defined independent of the
+ application. Therefore, Subsection 2d requires that any
+ application-supplied function or table used by this function must be
+ optional: if the application does not supply it, the square root
+ function must still compute square roots.)
+
+ These requirements apply to the modified work as a whole. If
+ identifiable sections of that work are not derived from the Library, and
+ can be reasonably considered independent and separate works in
+ themselves, then this License, and its terms, do not apply to those
+ sections when you distribute them as separate works. But when you
+ distribute the same sections as part of a whole which is a work based on
+ the Library, the distribution of the whole must be on the terms of this
+ License, whose permissions for other licensees extend to the entire
+ whole, and thus to each and every part regardless of who wrote it.
+
+ Thus, it is not the intent of this section to claim rights or contest
+ your rights to work written entirely by you; rather, the intent is to
+ exercise the right to control the distribution of derivative or
+ collective works based on the Library.
+
+ In addition, mere aggregation of another work not based on the Library
+ with the Library (or with a work based on the Library) on a volume of a
+ storage or distribution medium does not bring the other work under the
+ scope of this License.
+
+3. You may opt to apply the terms of the ordinary GNU General Public
+ License instead of this License to a given copy of the Library. To do
+ this, you must alter all the notices that refer to this License, so that
+ they refer to the ordinary GNU General Public License, version 2,
+ instead of to this License. (If a newer version than version 2 of the
+ ordinary GNU General Public License has appeared, then you can specify
+ that version instead if you wish.) Do not make any other change in these
+ notices.
+
+ Once this change is made in a given copy, it is irreversible for that
+ copy, so the ordinary GNU General Public License applies to all
+ subsequent copies and derivative works made from that copy.
+
+ This option is useful when you wish to copy part of the code of the
+ Library into a program that is not a library.
+
+4. You may copy and distribute the Library (or a portion or derivative of
+ it, under Section 2) in object code or executable form under the terms
+ of Sections 1 and 2 above provided that you accompany it with the
+ complete corresponding machine-readable source code, which must be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange.
+
+ If distribution of object code is made by offering access to copy from a
+ designated place, then offering equivalent access to copy the source
+ code from the same place satisfies the requirement to distribute the
+ source code, even though third parties are not compelled to copy the
+ source along with the object code.
+
+5. A program that contains no derivative of any portion of the Library, but
+ is designed to work with the Library by being compiled or linked with
+ it, is called a "work that uses the Library". Such a work, in isolation,
+ is not a derivative work of the Library, and therefore falls outside the
+ scope of this License.
+
+ However, linking a "work that uses the Library" with the Library creates
+ an executable that is a derivative of the Library (because it contains
+ portions of the Library), rather than a "work that uses the
+ library". The executable is therefore covered by this License. Section 6
+ states terms for distribution of such executables.
+
+ When a "work that uses the Library" uses material from a header file
+ that is part of the Library, the object code for the work may be a
+ derivative work of the Library even though the source code is
+ not. Whether this is true is especially significant if the work can be
+ linked without the Library, or if the work is itself a library. The
+ threshold for this to be true is not precisely defined by law.
+
+ If such an object file uses only numerical parameters, data structure
+ layouts and accessors, and small macros and small inline functions (ten
+ lines or less in length), then the use of the object file is
+ unrestricted, regardless of whether it is legally a derivative
+ work. (Executables containing this object code plus portions of the
+ Library will still fall under Section 6.)
+
+ Otherwise, if the work is a derivative of the Library, you may
+ distribute the object code for the work under the terms of Section
+ 6. Any executables containing that work also fall under Section 6,
+ whether or not they are linked directly with the Library itself.
+
+6. As an exception to the Sections above, you may also combine or link a
+ "work that uses the Library" with the Library to produce a work
+ containing portions of the Library, and distribute that work under terms
+ of your choice, provided that the terms permit modification of the work
+ for the customer's own use and reverse engineering for debugging such
+ modifications.
+
+ You must give prominent notice with each copy of the work that the
+ Library is used in it and that the Library and its use are covered by
+ this License. You must supply a copy of this License. If the work during
+ execution displays copyright notices, you must include the copyright
+ notice for the Library among them, as well as a reference directing the
+ user to the copy of this License. Also, you must do one of these things:
+
+ a) Accompany the work with the complete corresponding machine-readable
+ source code for the Library including whatever changes were used in
+ the work (which must be distributed under Sections 1 and 2 above);
+ and, if the work is an executable linked with the Library, with the
+ complete machine-readable "work that uses the Library", as object
+ code and/or source code, so that the user can modify the Library and
+ then relink to produce a modified executable containing the modified
+ Library. (It is understood that the user who changes the contents of
+ definitions files in the Library will not necessarily be able to
+ recompile the application to use the modified definitions.)
+
+ b) Use a suitable shared library mechanism for linking with the
+ Library. A suitable mechanism is one that (1) uses at run time a copy
+ of the library already present on the user's computer system, rather
+ than copying library functions into the executable, and (2) will
+ operate properly with a modified version of the library, if the user
+ installs one, as long as the modified version is interface-compatible
+ with the version that the work was made with.
+
+ c) Accompany the work with a written offer, valid for at least three
+ years, to give the same user the materials specified in Subsection
+ 6a, above, for a charge no more than the cost of performing this
+ distribution.
+
+ d) If distribution of the work is made by offering access to copy from a
+ designated place, offer equivalent access to copy the above specified
+ materials from the same place.
+
+ e) Verify that the user has already received a copy of these materials
+ or that you have already sent this user a copy.
+
+ For an executable, the required form of the "work that uses the Library"
+ must include any data and utility programs needed for reproducing the
+ executable from it. However, as a special exception, the materials to be
+ distributed need not include anything that is normally distributed (in
+ either source or binary form) with the major components (compiler,
+ kernel, and so on) of the operating system on which the executable runs,
+ unless that component itself accompanies the executable.
+
+ It may happen that this requirement contradicts the license restrictions
+ of other proprietary libraries that do not normally accompany the
+ operating system. Such a contradiction means you cannot use both them
+ and the Library together in an executable that you distribute.
+
+7. You may place library facilities that are a work based on the Library
+ side-by-side in a single library together with other library facilities
+ not covered by this License, and distribute such a combined library,
+ provided that the separate distribution of the work based on the Library
+ and of the other library facilities is otherwise permitted, and provided
+ that you do these two things:
+
+ a) Accompany the combined library with a copy of the same work based on
+ the Library, uncombined with any other library facilities. This must
+ be distributed under the terms of the Sections above.
+
+ b) Give prominent notice with the combined library of the fact that part
+ of it is a work based on the Library, and explaining where to find
+ the accompanying uncombined form of the same work.
+
+8. You may not copy, modify, sublicense, link with, or distribute the
+ Library except as expressly provided under this License. Any attempt
+ otherwise to copy, modify, sublicense, link with, or distribute the
+ Library is void, and will automatically terminate your rights under this
+ License. However, parties who have received copies, or rights, from you
+ under this License will not have their licenses terminated so long as
+ such parties remain in full compliance.
+
+9. You are not required to accept this License, since you have not signed
+ it. However, nothing else grants you permission to modify or distribute
+ the Library or its derivative works. These actions are prohibited by law
+ if you do not accept this License. Therefore, by modifying or
+ distributing the Library (or any work based on the Library), you
+ indicate your acceptance of this License to do so, and all its terms and
+ conditions for copying, distributing or modifying the Library or works
+ based on it.
+
+10. Each time you redistribute the Library (or any work based on the
+ Library), the recipient automatically receives a license from the
+ original licensor to copy, distribute, link with or modify the Library
+ subject to these terms and conditions. You may not impose any further
+ restrictions on the recipients' exercise of the rights granted
+ herein. You are not responsible for enforcing compliance by third
+ parties with this License.
+
+11. If, as a consequence of a court judgment or allegation of patent
+ infringement or for any other reason (not limited to patent issues),
+ conditions are imposed on you (whether by court order, agreement or
+ otherwise) that contradict the conditions of this License, they do not
+ excuse you from the conditions of this License. If you cannot
+ distribute so as to satisfy simultaneously your obligations under this
+ License and any other pertinent obligations, then as a consequence you
+ may not distribute the Library at all. For example, if a patent license
+ would not permit royalty-free redistribution of the Library by all
+ those who receive copies directly or indirectly through you, then the
+ only way you could satisfy both it and this License would be to refrain
+ entirely from distribution of the Library.
+
+ If any portion of this section is held invalid or unenforceable under
+ any particular circumstance, the balance of the section is intended to
+ apply, and the section as a whole is intended to apply in other
+ circumstances.
+
+ It is not the purpose of this section to induce you to infringe any
+ patents or other property right claims or to contest validity of any
+ such claims; this section has the sole purpose of protecting the
+ integrity of the free software distribution system which is implemented
+ by public license practices. Many people have made generous
+ contributions to the wide range of software distributed through that
+ system in reliance on consistent application of that system; it is up
+ to the author/donor to decide if he or she is willing to distribute
+ software through any other system and a licensee cannot impose that
+ choice.
+
+ This section is intended to make thoroughly clear what is believed to
+ be a consequence of the rest of this License.
+
+12. If the distribution and/or use of the Library is restricted in certain
+ countries either by patents or by copyrighted interfaces, the original
+ copyright holder who places the Library under this License may add an
+ explicit geographical distribution limitation excluding those
+ countries, so that distribution is permitted only in or among countries
+ not thus excluded. In such case, this License incorporates the
+ limitation as if written in the body of this License.
+
+13. The Free Software Foundation may publish revised and/or new versions of
+ the Lesser General Public License from time to time. Such new versions
+ will be similar in spirit to the present version, but may differ in
+ detail to address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the Library
+ specifies a version number of this License which applies to it and "any
+ later version", you have the option of following the terms and
+ conditions either of that version or of any later version published by
+ the Free Software Foundation. If the Library does not specify a license
+ version number, you may choose any version ever published by the Free
+ Software Foundation.
+
+14. If you wish to incorporate parts of the Library into other free
+ programs whose distribution conditions are incompatible with these,
+ write to the author to ask for permission. For software which is
+ copyrighted by the Free Software Foundation, write to the Free Software
+ Foundation; we sometimes make exceptions for this. Our decision will be
+ guided by the two goals of preserving the free status of all
+ derivatives of our free software and of promoting the sharing and reuse
+ of software generally.
+
+NO WARRANTY
+
+15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+ FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+ OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+ PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
+ EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
+ ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LIBRARY IS WITH
+ YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL
+ NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+ WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+ REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU FOR
+ DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL
+ DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE LIBRARY
+ (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED
+ INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF
+ THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR
+ OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+END OF TERMS AND CONDITIONS
+
+How to Apply These Terms to Your New Libraries
+
+If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change. You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms of the
+ordinary General Public License).
+
+To apply these terms, attach the following notices to the library. It is
+safest to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+one line to give the library's name and an idea of what it does.
+Copyright (C) year name of author
+
+This library is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at
+your option) any later version.
+
+This library is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Also add
+information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the library, if
+necessary. Here is a sample; alter the names:
+
+Yoyodyne, Inc., hereby disclaims all copyright interest in
+the library `Frob' (a library for tweaking knobs) written
+by James Random Hacker.
+
+signature of Ty Coon, 1 April 1990
+Ty Coon, President of Vice
+That's all there is to it!
diff --git a/LICENSES/GPL-2.0 b/LICENSES/GPL-2.0
new file mode 100644
index 00000000..ff0812fd
--- /dev/null
+++ b/LICENSES/GPL-2.0
@@ -0,0 +1,359 @@
+Valid-License-Identifier: GPL-2.0
+Valid-License-Identifier: GPL-2.0-only
+Valid-License-Identifier: GPL-2.0+
+Valid-License-Identifier: GPL-2.0-or-later
+SPDX-URL: https://spdx.org/licenses/GPL-2.0.html
+Usage-Guide:
+ To use this license in source code, put one of the following SPDX
+ tag/value pairs into a comment according to the placement
+ guidelines in the licensing rules documentation.
+ For 'GNU General Public License (GPL) version 2 only' use:
+ SPDX-License-Identifier: GPL-2.0
+ or
+ SPDX-License-Identifier: GPL-2.0-only
+ For 'GNU General Public License (GPL) version 2 or any later version' use:
+ SPDX-License-Identifier: GPL-2.0+
+ or
+ SPDX-License-Identifier: GPL-2.0-or-later
+License-Text:
+
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/LICENSES/LGPL-2.1 b/LICENSES/LGPL-2.1
new file mode 100644
index 00000000..27bb4342
--- /dev/null
+++ b/LICENSES/LGPL-2.1
@@ -0,0 +1,503 @@
+Valid-License-Identifier: LGPL-2.1
+Valid-License-Identifier: LGPL-2.1+
+SPDX-URL: https://spdx.org/licenses/LGPL-2.1.html
+Usage-Guide:
+ To use this license in source code, put one of the following SPDX
+ tag/value pairs into a comment according to the placement
+ guidelines in the licensing rules documentation.
+ For 'GNU Lesser General Public License (LGPL) version 2.1 only' use:
+ SPDX-License-Identifier: LGPL-2.1
+ For 'GNU Lesser General Public License (LGPL) version 2.1 or any later
+ version' use:
+ SPDX-License-Identifier: LGPL-2.1+
+License-Text:
+
+GNU LESSER GENERAL PUBLIC LICENSE
+Version 2.1, February 1999
+
+Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+Everyone is permitted to copy and distribute verbatim copies of this
+license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL. It also counts as
+the successor of the GNU Library Public License, version 2, hence the
+version number 2.1.]
+
+Preamble
+
+The licenses for most software are designed to take away your freedom to
+share and change it. By contrast, the GNU General Public Licenses are
+intended to guarantee your freedom to share and change free software--to
+make sure the software is free for all its users.
+
+This license, the Lesser General Public License, applies to some specially
+designated software packages--typically libraries--of the Free Software
+Foundation and other authors who decide to use it. You can use it too, but
+we suggest you first think carefully about whether this license or the
+ordinary General Public License is the better strategy to use in any
+particular case, based on the explanations below.
+
+When we speak of free software, we are referring to freedom of use, not
+price. Our General Public Licenses are designed to make sure that you have
+the freedom to distribute copies of free software (and charge for this
+service if you wish); that you receive source code or can get it if you
+want it; that you can change the software and use pieces of it in new free
+programs; and that you are informed that you can do these things.
+
+To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights. These restrictions translate to certain responsibilities for you if
+you distribute copies of the library or if you modify it.
+
+For example, if you distribute copies of the library, whether gratis or for
+a fee, you must give the recipients all the rights that we gave you. You
+must make sure that they, too, receive or can get the source code. If you
+link other code with the library, you must provide complete object files to
+the recipients, so that they can relink them with the library after making
+changes to the library and recompiling it. And you must show them these
+terms so they know their rights.
+
+We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+To protect each distributor, we want to make it very clear that there is no
+warranty for the free library. Also, if the library is modified by someone
+else and passed on, the recipients should know that what they have is not
+the original version, so that the original author's reputation will not be
+affected by problems that might be introduced by others.
+
+Finally, software patents pose a constant threat to the existence of any
+free program. We wish to make sure that a company cannot effectively
+restrict the users of a free program by obtaining a restrictive license
+from a patent holder. Therefore, we insist that any patent license obtained
+for a version of the library must be consistent with the full freedom of
+use specified in this license.
+
+Most GNU software, including some libraries, is covered by the ordinary GNU
+General Public License. This license, the GNU Lesser General Public
+License, applies to certain designated libraries, and is quite different
+from the ordinary General Public License. We use this license for certain
+libraries in order to permit linking those libraries into non-free
+programs.
+
+When a program is linked with a library, whether statically or using a
+shared library, the combination of the two is legally speaking a combined
+work, a derivative of the original library. The ordinary General Public
+License therefore permits such linking only if the entire combination fits
+its criteria of freedom. The Lesser General Public License permits more lax
+criteria for linking other code with the library.
+
+We call this license the "Lesser" General Public License because it does
+Less to protect the user's freedom than the ordinary General Public
+License. It also provides other free software developers Less of an
+advantage over competing non-free programs. These disadvantages are the
+reason we use the ordinary General Public License for many
+libraries. However, the Lesser license provides advantages in certain
+special circumstances.
+
+For example, on rare occasions, there may be a special need to encourage
+the widest possible use of a certain library, so that it becomes a de-facto
+standard. To achieve this, non-free programs must be allowed to use the
+library. A more frequent case is that a free library does the same job as
+widely used non-free libraries. In this case, there is little to gain by
+limiting the free library to free software only, so we use the Lesser
+General Public License.
+
+In other cases, permission to use a particular library in non-free programs
+enables a greater number of people to use a large body of free
+software. For example, permission to use the GNU C Library in non-free
+programs enables many more people to use the whole GNU operating system, as
+well as its variant, the GNU/Linux operating system.
+
+Although the Lesser General Public License is Less protective of the users'
+freedom, it does ensure that the user of a program that is linked with the
+Library has the freedom and the wherewithal to run that program using a
+modified version of the Library.
+
+The precise terms and conditions for copying, distribution and modification
+follow. Pay close attention to the difference between a "work based on the
+library" and a "work that uses the library". The former contains code
+derived from the library, whereas the latter must be combined with the
+library in order to run.
+
+TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+0. This License Agreement applies to any software library or other program
+ which contains a notice placed by the copyright holder or other
+ authorized party saying it may be distributed under the terms of this
+ Lesser General Public License (also called "this License"). Each
+ licensee is addressed as "you".
+
+ A "library" means a collection of software functions and/or data
+ prepared so as to be conveniently linked with application programs
+ (which use some of those functions and data) to form executables.
+
+ The "Library", below, refers to any such software library or work which
+ has been distributed under these terms. A "work based on the Library"
+ means either the Library or any derivative work under copyright law:
+ that is to say, a work containing the Library or a portion of it, either
+ verbatim or with modifications and/or translated straightforwardly into
+ another language. (Hereinafter, translation is included without
+ limitation in the term "modification".)
+
+ "Source code" for a work means the preferred form of the work for making
+ modifications to it. For a library, complete source code means all the
+ source code for all modules it contains, plus any associated interface
+ definition files, plus the scripts used to control compilation and
+ installation of the library.
+
+ Activities other than copying, distribution and modification are not
+ covered by this License; they are outside its scope. The act of running
+ a program using the Library is not restricted, and output from such a
+ program is covered only if its contents constitute a work based on the
+ Library (independent of the use of the Library in a tool for writing
+ it). Whether that is true depends on what the Library does and what the
+ program that uses the Library does.
+
+1. You may copy and distribute verbatim copies of the Library's complete
+ source code as you receive it, in any medium, provided that you
+ conspicuously and appropriately publish on each copy an appropriate
+ copyright notice and disclaimer of warranty; keep intact all the notices
+ that refer to this License and to the absence of any warranty; and
+ distribute a copy of this License along with the Library.
+
+ You may charge a fee for the physical act of transferring a copy, and
+ you may at your option offer warranty protection in exchange for a fee.
+
+2. You may modify your copy or copies of the Library or any portion of it,
+ thus forming a work based on the Library, and copy and distribute such
+ modifications or work under the terms of Section 1 above, provided that
+ you also meet all of these conditions:
+
+ a) The modified work must itself be a software library.
+
+ b) You must cause the files modified to carry prominent notices stating
+ that you changed the files and the date of any change.
+
+ c) You must cause the whole of the work to be licensed at no charge to
+ all third parties under the terms of this License.
+
+ d) If a facility in the modified Library refers to a function or a table
+ of data to be supplied by an application program that uses the
+ facility, other than as an argument passed when the facility is
+ invoked, then you must make a good faith effort to ensure that, in
+ the event an application does not supply such function or table, the
+ facility still operates, and performs whatever part of its purpose
+ remains meaningful.
+
+ (For example, a function in a library to compute square roots has a
+ purpose that is entirely well-defined independent of the
+ application. Therefore, Subsection 2d requires that any
+ application-supplied function or table used by this function must be
+ optional: if the application does not supply it, the square root
+ function must still compute square roots.)
+
+ These requirements apply to the modified work as a whole. If
+ identifiable sections of that work are not derived from the Library, and
+ can be reasonably considered independent and separate works in
+ themselves, then this License, and its terms, do not apply to those
+ sections when you distribute them as separate works. But when you
+ distribute the same sections as part of a whole which is a work based on
+ the Library, the distribution of the whole must be on the terms of this
+ License, whose permissions for other licensees extend to the entire
+ whole, and thus to each and every part regardless of who wrote it.
+
+ Thus, it is not the intent of this section to claim rights or contest
+ your rights to work written entirely by you; rather, the intent is to
+ exercise the right to control the distribution of derivative or
+ collective works based on the Library.
+
+ In addition, mere aggregation of another work not based on the Library
+ with the Library (or with a work based on the Library) on a volume of a
+ storage or distribution medium does not bring the other work under the
+ scope of this License.
+
+3. You may opt to apply the terms of the ordinary GNU General Public
+ License instead of this License to a given copy of the Library. To do
+ this, you must alter all the notices that refer to this License, so that
+ they refer to the ordinary GNU General Public License, version 2,
+ instead of to this License. (If a newer version than version 2 of the
+ ordinary GNU General Public License has appeared, then you can specify
+ that version instead if you wish.) Do not make any other change in these
+ notices.
+
+ Once this change is made in a given copy, it is irreversible for that
+ copy, so the ordinary GNU General Public License applies to all
+ subsequent copies and derivative works made from that copy.
+
+ This option is useful when you wish to copy part of the code of the
+ Library into a program that is not a library.
+
+4. You may copy and distribute the Library (or a portion or derivative of
+ it, under Section 2) in object code or executable form under the terms
+ of Sections 1 and 2 above provided that you accompany it with the
+ complete corresponding machine-readable source code, which must be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange.
+
+ If distribution of object code is made by offering access to copy from a
+ designated place, then offering equivalent access to copy the source
+ code from the same place satisfies the requirement to distribute the
+ source code, even though third parties are not compelled to copy the
+ source along with the object code.
+
+5. A program that contains no derivative of any portion of the Library, but
+ is designed to work with the Library by being compiled or linked with
+ it, is called a "work that uses the Library". Such a work, in isolation,
+ is not a derivative work of the Library, and therefore falls outside the
+ scope of this License.
+
+ However, linking a "work that uses the Library" with the Library creates
+ an executable that is a derivative of the Library (because it contains
+ portions of the Library), rather than a "work that uses the
+ library". The executable is therefore covered by this License. Section 6
+ states terms for distribution of such executables.
+
+ When a "work that uses the Library" uses material from a header file
+ that is part of the Library, the object code for the work may be a
+ derivative work of the Library even though the source code is
+ not. Whether this is true is especially significant if the work can be
+ linked without the Library, or if the work is itself a library. The
+ threshold for this to be true is not precisely defined by law.
+
+ If such an object file uses only numerical parameters, data structure
+ layouts and accessors, and small macros and small inline functions (ten
+ lines or less in length), then the use of the object file is
+ unrestricted, regardless of whether it is legally a derivative
+ work. (Executables containing this object code plus portions of the
+ Library will still fall under Section 6.)
+
+ Otherwise, if the work is a derivative of the Library, you may
+ distribute the object code for the work under the terms of Section
+ 6. Any executables containing that work also fall under Section 6,
+ whether or not they are linked directly with the Library itself.
+
+6. As an exception to the Sections above, you may also combine or link a
+ "work that uses the Library" with the Library to produce a work
+ containing portions of the Library, and distribute that work under terms
+ of your choice, provided that the terms permit modification of the work
+ for the customer's own use and reverse engineering for debugging such
+ modifications.
+
+ You must give prominent notice with each copy of the work that the
+ Library is used in it and that the Library and its use are covered by
+ this License. You must supply a copy of this License. If the work during
+ execution displays copyright notices, you must include the copyright
+ notice for the Library among them, as well as a reference directing the
+ user to the copy of this License. Also, you must do one of these things:
+
+ a) Accompany the work with the complete corresponding machine-readable
+ source code for the Library including whatever changes were used in
+ the work (which must be distributed under Sections 1 and 2 above);
+ and, if the work is an executable linked with the Library, with the
+ complete machine-readable "work that uses the Library", as object
+ code and/or source code, so that the user can modify the Library and
+ then relink to produce a modified executable containing the modified
+ Library. (It is understood that the user who changes the contents of
+ definitions files in the Library will not necessarily be able to
+ recompile the application to use the modified definitions.)
+
+ b) Use a suitable shared library mechanism for linking with the
+ Library. A suitable mechanism is one that (1) uses at run time a copy
+ of the library already present on the user's computer system, rather
+ than copying library functions into the executable, and (2) will
+ operate properly with a modified version of the library, if the user
+ installs one, as long as the modified version is interface-compatible
+ with the version that the work was made with.
+
+ c) Accompany the work with a written offer, valid for at least three
+ years, to give the same user the materials specified in Subsection
+ 6a, above, for a charge no more than the cost of performing this
+ distribution.
+
+ d) If distribution of the work is made by offering access to copy from a
+ designated place, offer equivalent access to copy the above specified
+ materials from the same place.
+
+ e) Verify that the user has already received a copy of these materials
+ or that you have already sent this user a copy.
+
+ For an executable, the required form of the "work that uses the Library"
+ must include any data and utility programs needed for reproducing the
+ executable from it. However, as a special exception, the materials to be
+ distributed need not include anything that is normally distributed (in
+ either source or binary form) with the major components (compiler,
+ kernel, and so on) of the operating system on which the executable runs,
+ unless that component itself accompanies the executable.
+
+ It may happen that this requirement contradicts the license restrictions
+ of other proprietary libraries that do not normally accompany the
+ operating system. Such a contradiction means you cannot use both them
+ and the Library together in an executable that you distribute.
+
+7. You may place library facilities that are a work based on the Library
+ side-by-side in a single library together with other library facilities
+ not covered by this License, and distribute such a combined library,
+ provided that the separate distribution of the work based on the Library
+ and of the other library facilities is otherwise permitted, and provided
+ that you do these two things:
+
+ a) Accompany the combined library with a copy of the same work based on
+ the Library, uncombined with any other library facilities. This must
+ be distributed under the terms of the Sections above.
+
+ b) Give prominent notice with the combined library of the fact that part
+ of it is a work based on the Library, and explaining where to find
+ the accompanying uncombined form of the same work.
+
+8. You may not copy, modify, sublicense, link with, or distribute the
+ Library except as expressly provided under this License. Any attempt
+ otherwise to copy, modify, sublicense, link with, or distribute the
+ Library is void, and will automatically terminate your rights under this
+ License. However, parties who have received copies, or rights, from you
+ under this License will not have their licenses terminated so long as
+ such parties remain in full compliance.
+
+9. You are not required to accept this License, since you have not signed
+ it. However, nothing else grants you permission to modify or distribute
+ the Library or its derivative works. These actions are prohibited by law
+ if you do not accept this License. Therefore, by modifying or
+ distributing the Library (or any work based on the Library), you
+ indicate your acceptance of this License to do so, and all its terms and
+ conditions for copying, distributing or modifying the Library or works
+ based on it.
+
+10. Each time you redistribute the Library (or any work based on the
+ Library), the recipient automatically receives a license from the
+ original licensor to copy, distribute, link with or modify the Library
+ subject to these terms and conditions. You may not impose any further
+ restrictions on the recipients' exercise of the rights granted
+ herein. You are not responsible for enforcing compliance by third
+ parties with this License.
+
+11. If, as a consequence of a court judgment or allegation of patent
+ infringement or for any other reason (not limited to patent issues),
+ conditions are imposed on you (whether by court order, agreement or
+ otherwise) that contradict the conditions of this License, they do not
+ excuse you from the conditions of this License. If you cannot
+ distribute so as to satisfy simultaneously your obligations under this
+ License and any other pertinent obligations, then as a consequence you
+ may not distribute the Library at all. For example, if a patent license
+ would not permit royalty-free redistribution of the Library by all
+ those who receive copies directly or indirectly through you, then the
+ only way you could satisfy both it and this License would be to refrain
+ entirely from distribution of the Library.
+
+ If any portion of this section is held invalid or unenforceable under
+ any particular circumstance, the balance of the section is intended to
+ apply, and the section as a whole is intended to apply in other
+ circumstances.
+
+ It is not the purpose of this section to induce you to infringe any
+ patents or other property right claims or to contest validity of any
+ such claims; this section has the sole purpose of protecting the
+ integrity of the free software distribution system which is implemented
+ by public license practices. Many people have made generous
+ contributions to the wide range of software distributed through that
+ system in reliance on consistent application of that system; it is up
+ to the author/donor to decide if he or she is willing to distribute
+ software through any other system and a licensee cannot impose that
+ choice.
+
+ This section is intended to make thoroughly clear what is believed to
+ be a consequence of the rest of this License.
+
+12. If the distribution and/or use of the Library is restricted in certain
+ countries either by patents or by copyrighted interfaces, the original
+ copyright holder who places the Library under this License may add an
+ explicit geographical distribution limitation excluding those
+ countries, so that distribution is permitted only in or among countries
+ not thus excluded. In such case, this License incorporates the
+ limitation as if written in the body of this License.
+
+13. The Free Software Foundation may publish revised and/or new versions of
+ the Lesser General Public License from time to time. Such new versions
+ will be similar in spirit to the present version, but may differ in
+ detail to address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the Library
+ specifies a version number of this License which applies to it and "any
+ later version", you have the option of following the terms and
+ conditions either of that version or of any later version published by
+ the Free Software Foundation. If the Library does not specify a license
+ version number, you may choose any version ever published by the Free
+ Software Foundation.
+
+14. If you wish to incorporate parts of the Library into other free
+ programs whose distribution conditions are incompatible with these,
+ write to the author to ask for permission. For software which is
+ copyrighted by the Free Software Foundation, write to the Free Software
+ Foundation; we sometimes make exceptions for this. Our decision will be
+ guided by the two goals of preserving the free status of all
+ derivatives of our free software and of promoting the sharing and reuse
+ of software generally.
+
+NO WARRANTY
+
+15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+ FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+ OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+ PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
+ EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
+ ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LIBRARY IS WITH
+ YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL
+ NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+ WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+ REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU FOR
+ DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL
+ DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE LIBRARY
+ (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED
+ INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF
+ THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR
+ OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+END OF TERMS AND CONDITIONS
+
+How to Apply These Terms to Your New Libraries
+
+If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change. You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms of the
+ordinary General Public License).
+
+To apply these terms, attach the following notices to the library. It is
+safest to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+one line to give the library's name and an idea of what it does.
+Copyright (C) year name of author
+
+This library is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at
+your option) any later version.
+
+This library is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Also add
+information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the library, if
+necessary. Here is a sample; alter the names:
+
+Yoyodyne, Inc., hereby disclaims all copyright interest in
+the library `Frob' (a library for tweaking knobs) written
+by James Random Hacker.
+
+signature of Ty Coon, 1 April 1990
+Ty Coon, President of Vice
+That's all there is to it!
diff --git a/METADATA b/METADATA
new file mode 100644
index 00000000..eca2378b
--- /dev/null
+++ b/METADATA
@@ -0,0 +1,13 @@
+name: "trace-cmd"
+description:
+ "trace-cmd is a front-end to the ftrace Linux kernel tracer."
+
+third_party {
+ url {
+ type: GIT
+ value: "https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git"
+ }
+ version: "trace-cmd-v3.0.2"
+ last_upgrade_date { year: 2022 month: 5 day: 02 }
+ license_type: RESTRICTED
+}
diff --git a/MODULE_LICENSE_GPL b/MODULE_LICENSE_GPL
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/MODULE_LICENSE_GPL
diff --git a/Makefile b/Makefile
new file mode 100644
index 00000000..982514ba
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,591 @@
+# SPDX-License-Identifier: GPL-2.0
+# trace-cmd version
+TC_VERSION = 3
+TC_PATCHLEVEL = 0
+TC_EXTRAVERSION = 3
+TRACECMD_VERSION = $(TC_VERSION).$(TC_PATCHLEVEL).$(TC_EXTRAVERSION)
+
+export TC_VERSION
+export TC_PATCHLEVEL
+export TC_EXTRAVERSION
+export TRACECMD_VERSION
+
+LIBTC_VERSION = 1
+LIBTC_PATCHLEVEL = 1
+LIBTC_EXTRAVERSION = 3
+LIBTRACECMD_VERSION = $(LIBTC_VERSION).$(LIBTC_PATCHLEVEL).$(LIBTC_EXTRAVERSION)
+
+export LIBTC_VERSION
+export LIBTC_PATCHLEVEL
+export LIBTC_EXTRAVERSION
+export LIBTRACECMD_VERSION
+
+VERSION_FILE = ltc_version.h
+
+LIBTRACEEVENT_MIN_VERSION = 1.5
+LIBTRACEFS_MIN_VERSION = 1.3
+
+MAKEFLAGS += --no-print-directory
+
+# Makefiles suck: This macro sets a default value of $(2) for the
+# variable named by $(1), unless the variable has been set by
+# environment or command line. This is necessary for CC and AR
+# because make sets default values, so the simpler ?= approach
+# won't work as expected.
+define allow-override
+ $(if $(or $(findstring environment,$(origin $(1))),\
+ $(findstring command line,$(origin $(1)))),,\
+ $(eval $(1) = $(2)))
+endef
+
+# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix.
+$(call allow-override,CC,$(CROSS_COMPILE)gcc)
+$(call allow-override,AR,$(CROSS_COMPILE)ar)
+$(call allow-override,PKG_CONFIG,pkg-config)
+$(call allow-override,LD_SO_CONF_PATH,/etc/ld.so.conf.d/)
+$(call allow-override,LDCONFIG,ldconfig)
+
+export LD_SO_CONF_PATH LDCONFIG
+
+EXT = -std=gnu99
+INSTALL = install
+
+# Use DESTDIR for installing into a different root directory.
+# This is useful for building a package. The program will be
+# installed in this directory as if it was the root directory.
+# Then the build tool can move it later.
+DESTDIR ?=
+DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
+
+LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1)
+ifeq ($(LP64), 1)
+ libdir_relative_temp = lib64
+else
+ libdir_relative_temp = lib
+endif
+
+libdir_relative ?= $(libdir_relative_temp)
+prefix ?= /usr/local
+bindir_relative = bin
+bindir = $(prefix)/$(bindir_relative)
+man_dir = $(prefix)/share/man
+man_dir_SQ = '$(subst ','\'',$(man_dir))'
+html_install_SQ = '$(subst ','\'',$(html_install))'
+img_install_SQ = '$(subst ','\'',$(img_install))'
+libdir = $(prefix)/$(libdir_relative)
+libdir_SQ = '$(subst ','\'',$(libdir))'
+includedir = $(prefix)/include
+includedir_SQ = '$(subst ','\'',$(includedir))'
+pkgconfig_dir ?= $(word 1,$(shell $(PKG_CONFIG) \
+ --variable pc_path pkg-config | tr ":" " "))
+
+etcdir ?= /etc
+etcdir_SQ = '$(subst ','\'',$(etcdir))'
+
+export man_dir man_dir_SQ html_install html_install_SQ INSTALL
+export img_install img_install_SQ libdir libdir_SQ includedir_SQ
+export DESTDIR DESTDIR_SQ
+
+ifeq ($(prefix),$(HOME))
+plugin_tracecmd_dir = $(libdir)/trace-cmd/plugins
+python_dir ?= $(libdir)/trace-cmd/python
+var_dir = $(HOME)/.trace-cmd/
+else
+python_dir ?= $(libdir)/trace-cmd/python
+PLUGIN_DIR_TRACECMD = -DPLUGIN_TRACECMD_DIR="$(plugin_tracecmd_dir)"
+PYTHON_DIR = -DPYTHON_DIR="$(python_dir)"
+PLUGIN_DIR_TRACECMD_SQ = '$(subst ','\'',$(PLUGIN_DIR_TRACECMD))'
+PYTHON_DIR_SQ = '$(subst ','\'',$(PYTHON_DIR))'
+var_dir = /var
+endif
+
+# Shell quotes
+bindir_SQ = $(subst ','\'',$(bindir))
+bindir_relative_SQ = $(subst ','\'',$(bindir_relative))
+plugin_tracecmd_dir_SQ = $(subst ','\'',$(plugin_tracecmd_dir))
+python_dir_SQ = $(subst ','\'',$(python_dir))
+
+pound := \#
+
+VAR_DIR = -DVAR_DIR="$(var_dir)"
+VAR_DIR_SQ = '$(subst ','\'',$(VAR_DIR))'
+var_dir_SQ = '$(subst ','\'',$(var_dir))'
+
+HELP_DIR = -DHELP_DIR=$(html_install)
+HELP_DIR_SQ = '$(subst ','\'',$(HELP_DIR))'
+#' emacs highlighting gets confused by the above escaped quote.
+
+BASH_COMPLETE_DIR ?= $(etcdir)/bash_completion.d
+
+export PLUGIN_DIR_TRACECMD
+export PYTHON_DIR
+export PYTHON_DIR_SQ
+export plugin_tracecmd_dir_SQ
+export python_dir_SQ
+export var_dir
+
+# copy a bit from Linux kbuild
+
+ifeq ("$(origin V)", "command line")
+ VERBOSE = $(V)
+endif
+ifndef VERBOSE
+ VERBOSE = 0
+endif
+
+SILENT := $(if $(findstring s,$(filter-out --%,$(MAKEFLAGS))),1)
+
+SWIG_DEFINED := $(shell if command -v swig; then echo 1; else echo 0; fi)
+ifeq ($(SWIG_DEFINED), 0)
+BUILD_PYTHON := report_noswig
+NO_PYTHON = 1
+endif
+
+ifndef NO_PYTHON
+PYTHON := ctracecmd.so
+
+PYTHON_VERS ?= python
+PYTHON_PKGCONFIG_VERS ?= $(PYTHON_VERS)
+
+# Can build python?
+ifeq ($(shell sh -c "$(PKG_CONFIG) --cflags $(PYTHON_PKGCONFIG_VERS) > /dev/null 2>&1 && echo y"), y)
+ BUILD_PYTHON := $(PYTHON)
+ BUILD_PYTHON_WORKS := 1
+else
+ BUILD_PYTHON := report_nopythondev
+ NO_PYTHON = 1
+endif
+endif # NO_PYTHON
+
+export BUILD_PYTHON_WORKS
+export NO_PYTHON
+
+# $(call test-build, snippet, ret) -> ret if snippet compiles
+# -> empty otherwise
+test-build = $(if $(shell sh -c 'echo "$(1)" | \
+ $(CC) -o /dev/null -c -x c - > /dev/null 2>&1 && echo y'), $2)
+
+UDIS86_AVAILABLE := $(call test-build,\#include <udis86.h>, y)
+ifneq ($(strip $(UDIS86_AVAILABLE)), y)
+NO_UDIS86 := 1
+endif
+
+ifndef NO_UDIS86
+# have udis86 disassembler library?
+udis86-flags := -DHAVE_UDIS86 -ludis86
+udis86-ldflags := -ludis86
+endif # NO_UDIS86
+
+define BLK_TC_FLUSH_SOURCE
+#include <linux/blktrace_api.h>
+int main(void) { return BLK_TC_FLUSH; }
+endef
+
+# have flush/fua block layer instead of barriers?
+blk-flags := $(call test-build,$(BLK_TC_FLUSH_SOURCE),-DHAVE_BLK_TC_FLUSH)
+
+ifeq ("$(origin O)", "command line")
+
+ saved-output := $(O)
+ BUILD_OUTPUT := $(shell cd $(O) && /bin/pwd)
+ $(if $(BUILD_OUTPUT),, \
+ $(error output directory "$(saved-output)" does not exist))
+
+else
+ BUILD_OUTPUT = $(CURDIR)
+endif
+
+srctree := $(if $(BUILD_SRC),$(BUILD_SRC),$(CURDIR))
+objtree := $(BUILD_OUTPUT)
+src := $(srctree)
+obj := $(objtree)
+
+PKG_CONFIG_SOURCE_FILE = libtracecmd.pc
+PKG_CONFIG_FILE := $(addprefix $(BUILD_OUTPUT)/,$(PKG_CONFIG_SOURCE_FILE))
+
+export pkgconfig_dir PKG_CONFIG_FILE
+
+export prefix bindir src obj
+
+LIBS = -ldl
+
+LIBTRACECMD_DIR = $(obj)/lib/trace-cmd
+LIBTRACECMD_STATIC = $(LIBTRACECMD_DIR)/libtracecmd.a
+LIBTRACECMD_SHARED = $(LIBTRACECMD_DIR)/libtracecmd.so.$(LIBTRACECMD_VERSION)
+LIBTRACECMD_SHARED_VERSION = $(shell echo $(LIBTRACECMD_SHARED) | sed -e 's/\(\.so\.[0-9]*\).*/\1/')
+LIBTRACECMD_SHARED_SO = $(shell echo $(LIBTRACECMD_SHARED) | sed -e 's/\(\.so\).*/\1/')
+
+export LIBTRACECMD_STATIC LIBTRACECMD_SHARED
+export LIBTRACECMD_SHARED_VERSION LIBTRACECMD_SHARED_SO
+
+LIBTRACEEVENT=libtraceevent
+LIBTRACEFS=libtracefs
+
+TEST_LIBTRACEEVENT = $(shell sh -c "$(PKG_CONFIG) --atleast-version $(LIBTRACEEVENT_MIN_VERSION) $(LIBTRACEEVENT) > /dev/null 2>&1 && echo y")
+TEST_LIBTRACEFS = $(shell sh -c "$(PKG_CONFIG) --atleast-version $(LIBTRACEFS_MIN_VERSION) $(LIBTRACEFS) > /dev/null 2>&1 && echo y")
+
+ifeq ("$(TEST_LIBTRACEEVENT)", "y")
+LIBTRACEEVENT_CFLAGS = $(shell sh -c "$(PKG_CONFIG) --cflags $(LIBTRACEEVENT)")
+LIBTRACEEVENT_LDLAGS = $(shell sh -c "$(PKG_CONFIG) --libs $(LIBTRACEEVENT)")
+else
+.PHONY: warning
+warning:
+ @echo "********************************************"
+ @echo "** NOTICE: libtraceevent version $(LIBTRACEEVENT_MIN_VERSION) or higher not found on system"
+ @echo "**"
+ @echo "** Consider installing the latest libtraceevent from your"
+ @echo "** distribution, or from source:"
+ @echo "**"
+ @echo "** https://git.kernel.org/pub/scm/libs/libtrace/libtraceevent.git/ "
+ @echo "**"
+ @echo "********************************************"
+endif
+
+export LIBTRACEEVENT_CFLAGS LIBTRACEEVENT_LDLAGS
+
+ifeq ("$(TEST_LIBTRACEFS)", "y")
+LIBTRACEFS_CFLAGS = $(shell sh -c "$(PKG_CONFIG) --cflags $(LIBTRACEFS)")
+LIBTRACEFS_LDLAGS = $(shell sh -c "$(PKG_CONFIG) --libs $(LIBTRACEFS)")
+else
+.PHONY: warning
+warning:
+ @echo "********************************************"
+ @echo "** NOTICE: libtracefs version $(LIBTRACEFS_MIN_VERSION) or higher not found on system"
+ @echo "**"
+ @echo "** Consider installing the latest libtracefs from your"
+ @echo "** distribution, or from source:"
+ @echo "**"
+ @echo "** https://git.kernel.org/pub/scm/libs/libtrace/libtracefs.git/ "
+ @echo "**"
+ @echo "********************************************"
+endif
+
+export LIBTRACEFS_CFLAGS LIBTRACEFS_LDLAGS
+
+TRACE_LIBS = -L$(LIBTRACECMD_DIR) -ltracecmd \
+ $(LIBTRACEEVENT_LDLAGS) $(LIBTRACEFS_LDLAGS)
+
+export LIBS TRACE_LIBS
+export LIBTRACECMD_DIR
+export Q SILENT VERBOSE EXT
+
+# Include the utils
+include scripts/utils.mk
+
+INCLUDES = -I$(src)/include -I$(src)/../../include
+INCLUDES += -I$(src)/include/trace-cmd
+INCLUDES += -I$(src)/lib/trace-cmd/include
+INCLUDES += -I$(src)/lib/trace-cmd/include/private
+INCLUDES += -I$(src)/tracecmd/include
+INCLUDES += $(LIBTRACEEVENT_CFLAGS)
+INCLUDES += $(LIBTRACEFS_CFLAGS)
+
+include $(src)/features.mk
+
+# Set compile option CFLAGS if not set elsewhere
+CFLAGS ?= -g -Wall
+CPPFLAGS ?=
+LDFLAGS ?=
+
+ifndef NO_VSOCK
+VSOCK_DEFINED := $(shell if (echo "$(pound)include <linux/vm_sockets.h>" | $(CC) -E - >/dev/null 2>&1) ; then echo 1; else echo 0 ; fi)
+else
+VSOCK_DEFINED := 0
+endif
+
+export VSOCK_DEFINED
+ifeq ($(VSOCK_DEFINED), 1)
+CFLAGS += -DVSOCK
+endif
+
+PERF_DEFINED := $(shell if (echo "$(pound)include <linux/perf_event.h>" | $(CC) -E - >/dev/null 2>&1) ; then echo 1; else echo 0 ; fi)
+export PERF_DEFINED
+ifeq ($(PERF_DEFINED), 1)
+CFLAGS += -DPERF
+endif
+
+ZLIB_INSTALLED := $(shell if (printf "$(pound)include <zlib.h>\n void main(){deflateInit(NULL, Z_BEST_COMPRESSION);}" | $(CC) -o /dev/null -x c - -lz >/dev/null 2>&1) ; then echo 1; else echo 0 ; fi)
+ifeq ($(ZLIB_INSTALLED), 1)
+export ZLIB_INSTALLED
+CFLAGS += -DHAVE_ZLIB
+$(info Have zlib compression support)
+endif
+
+TEST_LIBZSTD = $(shell sh -c "$(PKG_CONFIG) --atleast-version 1.4.0 libzstd > /dev/null 2>&1 && echo y")
+
+ifeq ("$(TEST_LIBZSTD)", "y")
+LIBZSTD_CFLAGS = $(shell sh -c "$(PKG_CONFIG) --cflags libzstd")
+LIBZSTD_LDLAGS = $(shell sh -c "$(PKG_CONFIG) --libs libzstd")
+CFLAGS += -DHAVE_ZSTD
+ZSTD_INSTALLED=1
+$(info Have ZSTD compression support)
+else
+$(info *************************************************************)
+$(info ZSTD package not found, best compression algorithm not in use)
+$(info *************************************************************)
+endif
+
+export LIBZSTD_CFLAGS LIBZSTD_LDLAGS ZSTD_INSTALLED
+
+CUNIT_INSTALLED := $(shell if (printf "$(pound)include <CUnit/Basic.h>\n void main(){CU_initialize_registry();}" | $(CC) -o /dev/null -x c - -lcunit >/dev/null 2>&1) ; then echo 1; else echo 0 ; fi)
+export CUNIT_INSTALLED
+
+export CFLAGS
+export INCLUDES
+
+# Required CFLAGS
+override CFLAGS += -D_GNU_SOURCE
+
+ifndef NO_PTRACE
+ifneq ($(call try-cc,$(SOURCE_PTRACE),),y)
+ NO_PTRACE = 1
+ override CFLAGS += -DWARN_NO_PTRACE
+endif
+endif
+
+ifdef NO_PTRACE
+override CFLAGS += -DNO_PTRACE
+endif
+
+ifndef NO_AUDIT
+ifneq ($(call try-cc,$(SOURCE_AUDIT),-laudit),y)
+ NO_AUDIT = 1
+ override CFLAGS += -DWARN_NO_AUDIT
+endif
+endif
+
+ifdef NO_AUDIT
+override CFLAGS += -DNO_AUDIT
+else
+LIBS += -laudit
+endif
+
+# Append required CFLAGS
+override CFLAGS += $(INCLUDES) $(VAR_DIR)
+override CFLAGS += $(PLUGIN_DIR_TRACECMD_SQ)
+override CFLAGS += $(udis86-flags) $(blk-flags)
+override LDFLAGS += $(udis86-ldflags)
+
+CMD_TARGETS = trace-cmd $(BUILD_PYTHON)
+
+###
+# Default we just build trace-cmd
+#
+# If you want all libraries, then do: make libs
+###
+
+all: all_cmd plugins show_other_make
+
+all_cmd: $(CMD_TARGETS)
+
+BUILD_PREFIX := $(BUILD_OUTPUT)/build_prefix
+
+$(BUILD_PREFIX): force
+ $(Q)$(call build_prefix,$(prefix))
+
+$(PKG_CONFIG_FILE) : ${PKG_CONFIG_SOURCE_FILE}.template $(BUILD_PREFIX) $(VERSION_FILE)
+ $(Q) $(call do_make_pkgconfig_file,$(prefix))
+
+trace-cmd: force $(LIBTRACECMD_STATIC) \
+ force $(obj)/lib/trace-cmd/plugins/tracecmd_plugin_dir
+ $(Q)$(MAKE) -C $(src)/tracecmd $(obj)/tracecmd/$@
+
+$(LIBTRACECMD_STATIC): force
+ $(Q)$(MAKE) -C $(src)/lib/trace-cmd $@
+
+$(LIBTRACECMD_SHARED): force
+ $(Q)$(MAKE) -C $(src)/lib/trace-cmd libtracecmd.so
+
+libtracecmd.a: $(LIBTRACECMD_STATIC)
+libtracecmd.so: $(LIBTRACECMD_SHARED)
+
+libs: $(LIBTRACECMD_SHARED) $(PKG_CONFIG_FILE)
+
+VERSION = $(LIBTC_VERSION)
+PATCHLEVEL = $(LIBTC_PATCHLEVEL)
+EXTRAVERSION = $(LIBTC_EXTRAVERSION)
+
+define make_version.h
+ (echo '/* This file is automatically generated. Do not modify. */'; \
+ echo \#define VERSION_CODE $(shell \
+ expr $(VERSION) \* 256 + $(PATCHLEVEL)); \
+ echo '#define EXTRAVERSION ' $(EXTRAVERSION); \
+ echo '#define VERSION_STRING "'$(VERSION).$(PATCHLEVEL).$(EXTRAVERSION)'"'; \
+ ) > $1
+endef
+
+define update_version.h
+ ($(call make_version.h, $@.tmp); \
+ if [ -r $@ ] && cmp -s $@ $@.tmp; then \
+ rm -f $@.tmp; \
+ else \
+ echo ' UPDATE $@'; \
+ mv -f $@.tmp $@; \
+ fi);
+endef
+
+$(VERSION_FILE): force
+ $(Q)$(call update_version.h)
+
+gui: force
+ @echo "***************************"
+ @echo " KernelShark has moved!"
+ @echo " Please use its new home at https://git.kernel.org/pub/scm/utils/trace-cmd/kernel-shark.git/"
+ @echo "***************************"
+
+test: force $(LIBTRACECMD_STATIC)
+ifneq ($(CUNIT_INSTALLED),1)
+ $(error CUnit framework not installed, cannot build unit tests))
+endif
+ $(Q)$(MAKE) -C $(src)/utest $@
+
+plugins_tracecmd: force $(obj)/lib/trace-cmd/plugins/tracecmd_plugin_dir
+ $(Q)$(MAKE) -C $(src)/lib/trace-cmd/plugins
+
+plugins: plugins_tracecmd
+
+$(obj)/lib/trace-cmd/plugins/tracecmd_plugin_dir: force
+ $(Q)$(MAKE) -C $(src)/lib/trace-cmd/plugins $@
+
+show_other_make:
+ @echo "Note: to build man pages, type \"make doc\""
+ @echo " to build unit tests, type \"make test\""
+
+PHONY += show_other_make
+
+define find_tag_files
+ find . -name '\.pc' -prune -o -name '*\.[ch]' -print -o -name '*\.[ch]pp' \
+ ! -name '\.#' -print
+endef
+
+tags: force
+ $(RM) tags
+ $(call find_tag_files) | xargs ctags --extra=+f --c-kinds=+px
+
+TAGS: force
+ $(RM) TAGS
+ $(call find_tag_files) | xargs etags
+
+cscope: force
+ $(RM) cscope*
+ $(call find_tag_files) | cscope -b -q
+
+install_plugins_tracecmd: force
+ $(Q)$(MAKE) -C $(src)/lib/trace-cmd/plugins install_plugins
+
+install_plugins: install_plugins_tracecmd
+
+install_python: force
+ $(Q)$(MAKE) -C $(src)/python $@
+
+install_bash_completion: force
+ $(Q)$(call do_install_data,$(src)/tracecmd/trace-cmd.bash,$(BASH_COMPLETE_DIR))
+
+install_cmd: all_cmd install_plugins install_python install_bash_completion
+ $(Q)$(call do_install,$(obj)/tracecmd/trace-cmd,$(bindir_SQ))
+
+install: install_cmd
+ @echo "Note: to install man pages, type \"make install_doc\""
+
+install_gui: force
+ @echo "Nothing to do here."
+ @echo " Have you tried https://git.kernel.org/pub/scm/utils/trace-cmd/kernel-shark.git/"
+
+install_libs: libs
+ $(Q)$(MAKE) -C $(src)/lib/trace-cmd/ $@
+
+doc:
+ $(MAKE) -C $(src)/Documentation all
+
+doc_clean:
+ $(MAKE) -C $(src)/Documentation clean
+
+install_doc:
+ $(MAKE) -C $(src)/Documentation install
+
+clean:
+ $(RM) *.o *~ *.a *.so .*.d
+ $(RM) tags TAGS cscope* $(PKG_CONFIG_SOURCE_FILE) $(VERSION_FILE)
+ $(MAKE) -C $(src)/lib/trace-cmd clean
+ $(MAKE) -C $(src)/lib/trace-cmd/plugins clean
+ $(MAKE) -C $(src)/utest clean
+ $(MAKE) -C $(src)/python clean
+ $(MAKE) -C $(src)/tracecmd clean
+
+define build_uninstall_script
+ $(Q)mkdir $(BUILD_OUTPUT)/tmp_build
+ $(Q)$(MAKE) -C $(src) DESTDIR=$(BUILD_OUTPUT)/tmp_build O=$(BUILD_OUTPUT) $1 > /dev/null
+ $(Q)find $(BUILD_OUTPUT)/tmp_build ! -type d -printf "%P\n" > $(BUILD_OUTPUT)/build_$2
+ $(Q)$(RM) -rf $(BUILD_OUTPUT)/tmp_build
+endef
+
+build_uninstall: $(BUILD_PREFIX)
+ $(call build_uninstall_script,install,uninstall)
+
+$(BUILD_OUTPUT)/build_uninstall: build_uninstall
+
+build_libs_uninstall: $(BUILD_PREFIX)
+ $(call build_uninstall_script,install_libs,libs_uninstall)
+
+$(BUILD_OUTPUT)/build_libs_uninstall: build_libs_uninstall
+
+define uninstall_file
+ if [ -f $(DESTDIR)/$1 -o -h $(DESTDIR)/$1 ]; then \
+ $(call print_uninstall,$(DESTDIR)/$1)$(RM) $(DESTDIR)/$1; \
+ fi;
+endef
+
+uninstall: $(BUILD_OUTPUT)/build_uninstall
+ @$(foreach file,$(shell cat $(BUILD_OUTPUT)/build_uninstall),$(call uninstall_file,$(file)))
+
+uninstall_libs: $(BUILD_OUTPUT)/build_libs_uninstall
+ @$(foreach file,$(shell cat $(BUILD_OUTPUT)/build_libs_uninstall),$(call uninstall_file,$(file)))
+
+##### PYTHON STUFF #####
+
+report_noswig: force
+ $(Q)echo
+ $(Q)echo " NO_PYTHON forced: swig not installed, not compiling python plugins"
+ $(Q)echo
+
+report_nopythondev: force
+ $(Q)echo
+ $(Q)echo " python-dev is not installed, not compiling python plugins"
+ $(Q)echo
+
+ifndef NO_PYTHON
+PYTHON_INCLUDES = `$(PKG_CONFIG) --cflags $(PYTHON_PKGCONFIG_VERS)`
+PYTHON_LDFLAGS = `$(PKG_CONFIG) --libs $(PYTHON_PKGCONFIG_VERS)` \
+ $(shell $(PYTHON_VERS)-config --ldflags)
+PYGTK_CFLAGS = `$(PKG_CONFIG) --cflags pygtk-2.0`
+else
+PYTHON_INCLUDES =
+PYTHON_LDFLAGS =
+PYGTK_CFLAGS =
+endif
+
+export PYTHON_INCLUDES
+export PYTHON_LDFLAGS
+export PYGTK_CFLAGS
+
+ctracecmd.so: force $(LIBTRACECMD_STATIC)
+ $(Q)$(MAKE) -C $(src)/python $@
+
+PHONY += python
+python: $(PYTHON)
+
+
+dist:
+ git archive --format=tar --prefix=trace-cmd-$(TRACECMD_VERSION)/ HEAD \
+ > ../trace-cmd-$(TRACECMD_VERSION).tar
+ cat ../trace-cmd-$(TRACECMD_VERSION).tar | \
+ bzip2 -c9 > ../trace-cmd-$(TRACECMD_VERSION).tar.bz2
+ cat ../trace-cmd-$(TRACECMD_VERSION).tar | \
+ xz -e -c8 > ../trace-cmd-$(TRACECMD_VERSION).tar.xz
+
+PHONY += force
+force:
+
+# Declare the contents of the .PHONY variable as phony. We keep that
+# information in a variable so we can use it in if_changed and friends.
+.PHONY: $(PHONY)
diff --git a/OWNERS b/OWNERS
new file mode 100644
index 00000000..7e72f726
--- /dev/null
+++ b/OWNERS
@@ -0,0 +1,2 @@
+kaleshsingh@google.com
+namhyung@google.com
diff --git a/PACKAGING b/PACKAGING
new file mode 100644
index 00000000..7e7d2065
--- /dev/null
+++ b/PACKAGING
@@ -0,0 +1,30 @@
+The libtracefs and libtraceevent packages are required for trace-cmd
+and libtracecmd.so
+
+In order to create a package directory with libtraceevent, libtracefs
+and libtracecmd and trace-cmd, you can follow these steps:
+
+ git clone git://git.kernel.org/pub/scm/libs/libtrace/libtraceevent.git
+ git clone git://git.kernel.org/pub/scm/libs/libtrace/libtracefs.git
+ git clone git://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git
+
+ cd libtraceevent
+ INSTALL_PATH=/tmp/install ../trace-cmd/make-trace-cmd.sh install
+
+ cd ../libtracefs
+ INSTALL_PATH=/tmp/install ../trace-cmd/make-trace-cmd.sh install
+
+ cd ../trace-cmd
+ INSTALL_PATH=/tmp/install ./make-trace-cmd.sh install install_libs
+
+ cd /tmp/install
+ tar cvjf /tmp/trace-cmd-files.tar.bz2 .
+
+And then the tarball of /tmp/trace-cmd-files.tar.bz2 will can be extracted
+on another machine at the root directory, and trace-cmd will be installed there.
+
+Note, to define a prefix, add a PREFIX variable before calling make-trace-cmd.sh
+
+ For example:
+
+ PREFIX=/usr/local INSTALL_PATH=/tmp/install ./make-trace-cmd.sh install
diff --git a/README b/README
new file mode 100644
index 00000000..9d8c127c
--- /dev/null
+++ b/README
@@ -0,0 +1,68 @@
+
+
+ For more information on contributing please see: https://www.trace-cmd.org
+
+Note: The official repositiory for trace-cmd and KernelShark is here:
+
+ git://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git
+
+For bug reports and issues, please file it here:
+
+ https://bugzilla.kernel.org/buglist.cgi?component=Trace-cmd%2FKernelshark&product=Tools&resolution=---
+
+These files make up the code that create the trace-cmd programs.
+This includes the GUI interface application kernelshark as well
+as trace-graph and trace-view.
+
+These files also make up the code to create the libtracecmd library.
+
+The applications are licensed under the GNU General Public License 2.0
+(see COPYING) and the libraries are licensed under the GNU
+Lesser General Public License 2.1 (See COPYING.LIB).
+
+BUILDING:
+
+In order to install build dependencies on Debian / Ubuntu do the following:
+ sudo apt-get install build-essential git pkg-config -y
+ sudo apt-get install libtracefs-dev libtraceevent-dev -y
+
+In order to install build dependencies on Fedora, as root do the following:
+ dnf install gcc make git pkg-config -y
+ dnf install libtracefs-devel libtraceevent-devel -y
+
+In case your distribution does not have the required libtracefs and
+libtraceevent libraries, build and install them manually:
+
+ git clone https://git.kernel.org/pub/scm/libs/libtrace/libtraceevent.git/
+ cd libtraceevent
+ make
+ sudo make install
+
+ git clone https://git.kernel.org/pub/scm/libs/libtrace/libtracefs.git/
+ cd libtracefs
+ make
+ sudo make install
+
+To make trace-cmd
+ make
+
+To make the gui
+ make gui
+
+INSTALL:
+
+To install trace-cmd
+ make install
+
+To install the gui
+ make install_gui
+
+Note: The default install is relative to /usr/local
+ The default install directory is /usr/local/bin
+ The default plugin directory is /usr/local/lib/trace-cmd/plugins
+
+To change the default, you can set 'prefix', eg
+mkdir $HOME/test-trace
+make prefix=$HOME/test-trace
+make prefix=$HOME/test-trace install
+
diff --git a/features.mk b/features.mk
new file mode 100644
index 00000000..53f35fd4
--- /dev/null
+++ b/features.mk
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# taken from perf which was based on Linux Kbuild
+# try-cc
+# Usage: option = $(call try-cc, source-to-build, cc-options)
+try-cc = $(shell sh -c \
+ 'TMP="$(BUILD_OUTPUT)$(TMPOUT).$$$$"; \
+ echo "$(1)" | \
+ $(CC) -x c - $(2) -o "$$TMP" > /dev/null 2>&1 && echo y; \
+ rm -f "$$TMP"')
+
+define SOURCE_PTRACE
+#include <stdio.h>
+#include <sys/ptrace.h>
+
+int main (void)
+{
+ int ret;
+ ret = ptrace(PTRACE_ATTACH, 0, NULL, 0);
+ ptrace(PTRACE_TRACEME, 0, NULL, 0);
+ ptrace(PTRACE_GETSIGINFO, 0, NULL, NULL);
+ ptrace(PTRACE_GETEVENTMSG, 0, NULL, NULL);
+ ptrace(PTRACE_SETOPTIONS, NULL, NULL,
+ PTRACE_O_TRACEFORK |
+ PTRACE_O_TRACEVFORK |
+ PTRACE_O_TRACECLONE |
+ PTRACE_O_TRACEEXIT);
+ ptrace(PTRACE_CONT, NULL, NULL, 0);
+ ptrace(PTRACE_DETACH, 0, NULL, NULL);
+ ptrace(PTRACE_SETOPTIONS, 0, NULL,
+ PTRACE_O_TRACEFORK |
+ PTRACE_O_TRACEVFORK |
+ PTRACE_O_TRACECLONE |
+ PTRACE_O_TRACEEXIT);
+ return ret;
+}
+endef
+
+define SOURCE_AUDIT
+#include <stdio.h>
+#include <libaudit.h>
+
+int main (void)
+{
+ char *name;
+ int ret;
+ ret = audit_detect_machine();
+ if (ret < 0)
+ return ret;
+ name = audit_syscall_to_name(1, ret);
+ if (!name)
+ return -1;
+ return ret;
+}
+endef
diff --git a/include/linux/time64.h b/include/linux/time64.h
new file mode 100644
index 00000000..3961589e
--- /dev/null
+++ b/include/linux/time64.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _TOOLS_LINUX_TIME64_H
+#define _TOOLS_LINUX_TIME64_H
+
+#define MSEC_PER_SEC 1000L
+#define USEC_PER_MSEC 1000L
+#define NSEC_PER_USEC 1000L
+#define NSEC_PER_MSEC 1000000L
+#define USEC_PER_SEC 1000000L
+#define NSEC_PER_SEC 1000000000L
+#define FSEC_PER_SEC 1000000000000000LL
+
+#endif /* _LINUX_TIME64_H */
diff --git a/include/trace-cmd/trace-cmd.h b/include/trace-cmd/trace-cmd.h
new file mode 100644
index 00000000..5d71e8ba
--- /dev/null
+++ b/include/trace-cmd/trace-cmd.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+/*
+ * Copyright (C) 2008, 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#ifndef _TRACE_CMD_H
+#define _TRACE_CMD_H
+
+#include "event-parse.h"
+#include "tracefs.h"
+
+struct tracecmd_input;
+
+enum tracecmd_open_flags {
+ TRACECMD_FL_LOAD_NO_PLUGINS = 1 << 0, /* Do not load plugins */
+ TRACECMD_FL_LOAD_NO_SYSTEM_PLUGINS = 1 << 1, /* Do not load system plugins */
+};
+
+enum tracecmd_section_flags {
+ TRACECMD_SEC_FL_COMPRESS = 1 << 0, /* the section is compressed */
+};
+
+struct tracecmd_input *tracecmd_open_head(const char *file, int flags);
+struct tracecmd_input *tracecmd_open(const char *file, int flags);
+struct tracecmd_input *tracecmd_open_fd(int fd, int flags);
+
+void tracecmd_close(struct tracecmd_input *handle);
+
+int tracecmd_init_data(struct tracecmd_input *handle);
+struct tep_record *
+tracecmd_read_cpu_first(struct tracecmd_input *handle, int cpu);
+struct tep_record *
+tracecmd_read_data(struct tracecmd_input *handle, int cpu);
+struct tep_record *
+tracecmd_read_at(struct tracecmd_input *handle, unsigned long long offset,
+ int *cpu);
+void tracecmd_free_record(struct tep_record *record);
+
+struct tep_handle *tracecmd_get_tep(struct tracecmd_input *handle);
+unsigned long long tracecmd_get_traceid(struct tracecmd_input *handle);
+int tracecmd_get_guest_cpumap(struct tracecmd_input *handle,
+ unsigned long long trace_id,
+ const char **name,
+ int *vcpu_count, const int **cpu_pid);
+unsigned long long tracecmd_get_first_ts(struct tracecmd_input *handle);
+void tracecmd_add_ts_offset(struct tracecmd_input *handle, long long offset);
+int tracecmd_buffer_instances(struct tracecmd_input *handle);
+const char *tracecmd_buffer_instance_name(struct tracecmd_input *handle, int indx);
+struct tracecmd_input *tracecmd_buffer_instance_handle(struct tracecmd_input *handle, int indx);
+
+void tracecmd_set_loglevel(enum tep_loglevel level);
+
+#endif /* _TRACE_CMD_H */
diff --git a/include/version.h b/include/version.h
new file mode 100644
index 00000000..fcf7ba02
--- /dev/null
+++ b/include/version.h
@@ -0,0 +1,12 @@
+#ifndef _VERSION_H
+#define _VERSION_H
+
+#define VERSION(a, b) (((a) << 8) + (b))
+
+#ifdef BUILDGUI
+#include "ks_version.h"
+#else
+#include "tc_version.h"
+#endif
+
+#endif /* _VERSION_H */
diff --git a/lib/trace-cmd/Makefile b/lib/trace-cmd/Makefile
new file mode 100644
index 00000000..9374b163
--- /dev/null
+++ b/lib/trace-cmd/Makefile
@@ -0,0 +1,129 @@
+# SPDX-License-Identifier: GPL-2.0
+
+include $(src)/scripts/utils.mk
+
+bdir:=$(obj)/lib/trace-cmd
+ldir:=$(src)/lib/trace-cmd
+
+DEFAULT_TARGET = $(LIBTRACECMD_STATIC)
+
+OBJS =
+OBJS += trace-hash.o
+OBJS += trace-hooks.o
+OBJS += trace-input.o
+OBJS += trace-output.o
+OBJS += trace-recorder.o
+OBJS += trace-util.o
+OBJS += trace-filter-hash.o
+OBJS += trace-msg.o
+OBJS += trace-plugin.o
+ifeq ($(PERF_DEFINED), 1)
+OBJS += trace-perf.o
+endif
+OBJS += trace-timesync.o
+OBJS += trace-timesync-ptp.o
+ifeq ($(VSOCK_DEFINED), 1)
+OBJS += trace-timesync-kvm.o
+endif
+OBJS += trace-compress.o
+ifeq ($(ZLIB_INSTALLED), 1)
+OBJS += trace-compress-zlib.o
+endif
+ifeq ($(ZSTD_INSTALLED), 1)
+OBJS += trace-compress-zstd.o
+endif
+
+# Additional util objects
+OBJS += trace-blk-hack.o
+OBJS += trace-ftrace.o
+
+OBJS := $(OBJS:%.o=$(bdir)/%.o)
+DEPS := $(OBJS:$(bdir)/%.o=$(bdir)/.%.d)
+
+all: $(DEFAULT_TARGET)
+
+$(bdir):
+ @mkdir -p $(bdir)
+
+$(OBJS): | $(bdir)
+$(DEPS): | $(bdir)
+
+$(LIBTRACECMD_STATIC): $(OBJS)
+ $(Q)$(call do_build_static_lib)
+
+LIBS = $(LIBTRACEEVENT_LDLAGS) $(LIBTRACEFS_LDLAGS) $(LIBZSTD_LDLAGS) -lpthread
+
+ifeq ($(ZLIB_INSTALLED), 1)
+LIBS += -lz
+endif
+
+$(LIBTRACECMD_SHARED_VERSION): $(LIBTRACECMD_SHARED)
+ @ln -sf $(<F) $@
+
+$(LIBTRACECMD_SHARED_SO): $(LIBTRACECMD_SHARED_VERSION)
+ @ln -sf $(<F) $@
+
+libtracecmd.so: force $(LIBTRACECMD_SHARED_SO)
+
+$(LIBTRACECMD_SHARED): $(OBJS)
+ $(Q)$(call do_compile_shared_library,$(notdir $(LIBTRACECMD_SHARED_VERSION)))
+
+$(bdir)/%.o: %.c
+ $(Q)$(call do_fpic_compile)
+
+$(DEPS): $(bdir)/.%.d: %.c
+ $(Q)$(CC) -M -MT $(bdir)/$*.o $(CPPFLAGS) $(CFLAGS) $< > $@
+
+$(OBJS): $(bdir)/%.o : $(bdir)/.%.d
+
+ifeq ("$(DESTDIR)", "")
+# If DESTDIR is not defined, then test if after installing the library
+# and running ldconfig, if the library is visible by ld.so.
+# If not, add the path to /etc/ld.so.conf.d/trace.conf and run ldconfig again.
+define install_ld_config
+ if $(LDCONFIG); then \
+ if ! grep -q "^$(libdir)$$" $(LD_SO_CONF_PATH)/* ; then \
+ echo here;\
+ $(CC) -o $(bdir)/test $(ldir)/test.c -I $(includedir_SQ) \
+ -L $(libdir_SQ) -ltracecmd &> /dev/null; \
+ if ! $(bdir)/test &> /dev/null; then \
+ $(call print_install,trace.conf,$(LD_SO_CONF_PATH)) \
+ echo $(libdir_SQ) >> $(LD_SO_CONF_PATH)/trace.conf; \
+ $(LDCONFIG); \
+ fi; \
+ $(RM) $(bdir)/test; \
+ fi; \
+ fi
+endef
+else
+# If installing to a location for another machine or package, do not bother
+# with running ldconfig.
+define install_ld_config
+endef
+endif # DESTDIR = ""
+
+install_pkgconfig: $(PKG_CONFIG_FILE)
+ $(Q)$(call do_install_pkgconfig_file,$(prefix))
+
+install_libs: install_pkgconfig
+ $(Q)$(call do_install,$(LIBTRACECMD_SHARED),$(libdir_SQ))
+ $(Q)$(call print_install,$(LIBTRACECMD_SHARED_VERSION),$(DESTDIR)$(libdir_SQ))
+ $(Q)cp -fpR $(LIBTRACECMD_SHARED_VERSION) $(DESTDIR)$(libdir_SQ)
+ $(Q)$(call print_install,$(LIBTRACECMD_SHARED_SO),$(DESTDIR)$(libdir_SQ))
+ $(Q)cp -fpR $(LIBTRACECMD_SHARED_SO) $(DESTDIR)$(libdir_SQ)
+ $(Q)$(call do_install,$(src)/include/trace-cmd/trace-cmd.h,$(includedir_SQ)/trace-cmd,644)
+ $(Q)$(call install_ld_config)
+
+dep_includes := $(wildcard $(DEPS))
+
+ifneq ($(dep_includes),)
+ include $(dep_includes)
+endif
+
+clean:
+ $(RM) $(bdir)/*.a $(bdir)/*.so $(bdir)/*.so.* $(bdir)/*.o $(bdir)/.*.d
+
+.PHONY: clean
+
+PHONY += force
+force:
diff --git a/lib/trace-cmd/include/private/trace-cmd-private.h b/lib/trace-cmd/include/private/trace-cmd-private.h
new file mode 100644
index 00000000..3cc3e9dd
--- /dev/null
+++ b/lib/trace-cmd/include/private/trace-cmd-private.h
@@ -0,0 +1,635 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+/*
+ * Copyright (C) 2008, 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#ifndef _TRACE_CMD_PRIVATE_H
+#define _TRACE_CMD_PRIVATE_H
+
+#include <fcntl.h> /* for iovec */
+#include <sys/types.h>
+#include "event-parse.h"
+#include "trace-cmd/trace-cmd.h"
+
+#define __packed __attribute__((packed))
+#define __hidden __attribute__((visibility ("hidden")))
+
+#define TRACECMD_MAGIC { 23, 8, 68 }
+
+#define ARRAY_SIZE(_a) (sizeof(_a) / sizeof((_a)[0]))
+#define __weak __attribute__((weak))
+#define __noreturn __attribute__((noreturn))
+
+#define TRACECMD_ERR_MSK ((unsigned long)(-1) & ~((1UL << 14) - 1))
+#define TRACECMD_ISERR(ptr) ((unsigned long)(ptr) > TRACECMD_ERR_MSK)
+#define TRACECMD_ERROR(ret) ((void *)((unsigned long)(ret) | TRACECMD_ERR_MSK))
+#define TRACECMD_PTR2ERR(ptr) ((unisgned long)(ptr) & ~TRACECMD_ERR_MSK)
+
+#define TSCNSEC_CLOCK "tsc2nsec"
+
+struct tep_plugin_list *trace_load_plugins(struct tep_handle *tep, int flags);
+
+int *tracecmd_add_id(int *list, int id, int len);
+
+#define FILE_VERSION_MIN 6
+#define FILE_VERSION_MAX 7
+
+#define FILE_VERSION_SECTIONS 7
+#define FILE_VERSION_COMPRESSION 7
+
+enum {
+ RINGBUF_TYPE_PADDING = 29,
+ RINGBUF_TYPE_TIME_EXTEND = 30,
+ RINGBUF_TYPE_TIME_STAMP = 31,
+};
+
+/* Can be overridden */
+void tracecmd_debug(const char *fmt, ...);
+
+void tracecmd_record_ref(struct tep_record *record);
+
+void tracecmd_set_debug(bool set_debug);
+bool tracecmd_get_debug(void);
+
+bool tracecmd_is_version_supported(unsigned int version);
+int tracecmd_default_file_version(void);
+
+struct tracecmd_output;
+struct tracecmd_recorder;
+struct hook_list;
+
+/* --- tracecmd plugins --- */
+
+enum tracecmd_context {
+ TRACECMD_INPUT,
+ TRACECMD_OUTPUT,
+};
+
+enum tracecmd_plugin_flag {
+ TRACECMD_DISABLE_SYS_PLUGINS = 1,
+ TRACECMD_DISABLE_PLUGINS = 1 << 1,
+};
+
+struct trace_plugin_context;
+
+struct trace_plugin_context *
+tracecmd_plugin_context_create(enum tracecmd_context context, void *data);
+
+void tracecmd_plugin_set_flag(struct trace_plugin_context *context,
+ enum tracecmd_plugin_flag flag);
+
+#define TRACECMD_PLUGIN_LOADER tracecmd_plugin_loader
+#define TRACECMD_PLUGIN_UNLOADER tracecmd_plugin_unloader
+#define TRACECMD_PLUGIN_ALIAS tracecmd_plugin_alias
+#define _MAKE_STR(x) #x
+#define MAKE_STR(x) _MAKE_STR(x)
+#define TRACECMD_PLUGIN_LOADER_NAME MAKE_STR(TRACECMD_PLUGIN_LOADER)
+#define TRACECMD_PLUGIN_UNLOADER_NAME MAKE_STR(TRACECMD_PLUGIN_UNLOADER)
+#define TRACECMD_PLUGIN_ALIAS_NAME MAKE_STR(TRACECMD_PLUGIN_ALIAS)
+
+typedef int (*tracecmd_plugin_load_func)(struct trace_plugin_context *trace);
+typedef int (*tracecmd_plugin_unload_func)(struct trace_plugin_context *trace);
+
+struct tracecmd_input *
+tracecmd_plugin_context_input(struct trace_plugin_context *trace_context);
+struct tracecmd_output *
+tracecmd_plugin_context_output(struct trace_plugin_context *trace_context);
+
+void tracecmd_set_quiet(struct tracecmd_output *handle, bool set_quiet);
+bool tracecmd_get_quiet(struct tracecmd_output *handle);
+void tracecmd_set_out_clock(struct tracecmd_output *handle, const char *clock);
+const char *tracecmd_get_trace_clock(struct tracecmd_input *handle);
+
+const char *tracecmd_get_cpustats(struct tracecmd_input *handle);
+const char *tracecmd_get_uname(struct tracecmd_input *handle);
+const char *tracecmd_get_version(struct tracecmd_input *handle);
+off64_t tracecmd_get_cpu_file_size(struct tracecmd_input *handle, int cpu);
+
+static inline int tracecmd_host_bigendian(void)
+{
+ unsigned char str[] = { 0x1, 0x2, 0x3, 0x4 };
+ unsigned int *ptr;
+
+ ptr = (unsigned int *)str;
+ return *ptr == 0x01020304;
+}
+
+/* --- Opening and Reading the trace.dat file --- */
+
+enum tracecmd_file_states {
+ TRACECMD_FILE_ALLOCATED = 0,
+ TRACECMD_FILE_INIT,
+ TRACECMD_FILE_HEADERS,
+ TRACECMD_FILE_FTRACE_EVENTS,
+ TRACECMD_FILE_ALL_EVENTS,
+ TRACECMD_FILE_KALLSYMS,
+ TRACECMD_FILE_PRINTK,
+ TRACECMD_FILE_CMD_LINES,
+ TRACECMD_FILE_CPU_COUNT,
+ TRACECMD_FILE_OPTIONS,
+ TRACECMD_FILE_CPU_LATENCY,
+ TRACECMD_FILE_CPU_FLYRECORD,
+};
+
+enum {
+ TRACECMD_OPTION_DONE,
+ TRACECMD_OPTION_DATE,
+ TRACECMD_OPTION_CPUSTAT,
+ TRACECMD_OPTION_BUFFER,
+ TRACECMD_OPTION_TRACECLOCK,
+ TRACECMD_OPTION_UNAME,
+ TRACECMD_OPTION_HOOK,
+ TRACECMD_OPTION_OFFSET,
+ TRACECMD_OPTION_CPUCOUNT,
+ TRACECMD_OPTION_VERSION,
+ TRACECMD_OPTION_PROCMAPS,
+ TRACECMD_OPTION_TRACEID,
+ TRACECMD_OPTION_TIME_SHIFT,
+ TRACECMD_OPTION_GUEST,
+ TRACECMD_OPTION_TSC2NSEC,
+ TRACECMD_OPTION_STRINGS,
+ TRACECMD_OPTION_HEADER_INFO,
+ TRACECMD_OPTION_FTRACE_EVENTS,
+ TRACECMD_OPTION_EVENT_FORMATS,
+ TRACECMD_OPTION_KALLSYMS,
+ TRACECMD_OPTION_PRINTK,
+ TRACECMD_OPTION_CMDLINES,
+ TRACECMD_OPTION_BUFFER_TEXT,
+ TRACECMD_OPTION_MAX,
+};
+
+enum {
+ TRACECMD_FL_IGNORE_DATE = (1 << 0),
+ TRACECMD_FL_BUFFER_INSTANCE = (1 << 1),
+ TRACECMD_FL_IN_USECS = (1 << 2),
+ TRACECMD_FL_RAW_TS = (1 << 3),
+ TRACECMD_FL_SECTIONED = (1 << 4),
+ TRACECMD_FL_COMPRESSION = (1 << 5),
+};
+
+struct tracecmd_ftrace {
+ struct tracecmd_input *handle;
+ struct tep_event *fgraph_ret_event;
+ int fgraph_ret_id;
+ int long_size;
+};
+
+struct tracecmd_proc_addr_map {
+ unsigned long long start;
+ unsigned long long end;
+ char *lib_name;
+};
+
+typedef void (*tracecmd_show_data_func)(struct tracecmd_input *handle,
+ struct tep_record *record);
+typedef void (*tracecmd_handle_init_func)(struct tracecmd_input *handle,
+ struct hook_list *hook, int global);
+
+struct tracecmd_input *tracecmd_alloc(const char *file, int flags);
+struct tracecmd_input *tracecmd_alloc_fd(int fd, int flags);
+void tracecmd_ref(struct tracecmd_input *handle);
+int tracecmd_read_headers(struct tracecmd_input *handle,
+ enum tracecmd_file_states state);
+int tracecmd_get_parsing_failures(struct tracecmd_input *handle);
+int tracecmd_long_size(struct tracecmd_input *handle);
+int tracecmd_page_size(struct tracecmd_input *handle);
+int tracecmd_cpus(struct tracecmd_input *handle);
+int tracecmd_copy_headers(struct tracecmd_input *in_handle,
+ struct tracecmd_output *out_handle,
+ enum tracecmd_file_states start_state,
+ enum tracecmd_file_states end_state);
+int tracecmd_copy_buffer_descr(struct tracecmd_input *in_handle,
+ struct tracecmd_output *out_handle);
+int tracecmd_copy_options(struct tracecmd_input *in_handle,
+ struct tracecmd_output *out_handle);
+int tracecmd_copy_trace_data(struct tracecmd_input *in_handle,
+ struct tracecmd_output *out_handle);
+void tracecmd_set_flag(struct tracecmd_input *handle, int flag);
+void tracecmd_clear_flag(struct tracecmd_input *handle, int flag);
+unsigned long tracecmd_get_flags(struct tracecmd_input *handle);
+enum tracecmd_file_states tracecmd_get_file_state(struct tracecmd_input *handle);
+int tracecmd_enable_tsync(struct tracecmd_input *handle, bool enable);
+
+void tracecmd_parse_trace_clock(struct tracecmd_input *handle, char *file, int size);
+
+int tracecmd_make_pipe(struct tracecmd_input *handle, int cpu, int fd, int cpus);
+
+int tracecmd_is_buffer_instance(struct tracecmd_input *handle);
+
+void tracecmd_set_ts_offset(struct tracecmd_input *handle, long long offset);
+void tracecmd_set_ts2secs(struct tracecmd_input *handle, unsigned long long hz);
+
+void tracecmd_print_events(struct tracecmd_input *handle, const char *regex);
+
+struct hook_list *tracecmd_hooks(struct tracecmd_input *handle);
+
+void tracecmd_print_stats(struct tracecmd_input *handle);
+void tracecmd_print_uname(struct tracecmd_input *handle);
+void tracecmd_print_version(struct tracecmd_input *handle);
+
+struct tep_record *
+tracecmd_peek_data(struct tracecmd_input *handle, int cpu);
+
+static inline struct tep_record *
+tracecmd_peek_data_ref(struct tracecmd_input *handle, int cpu)
+{
+ struct tep_record *rec = tracecmd_peek_data(handle, cpu);
+ if (rec)
+ rec->ref_count++;
+ return rec;
+}
+
+int tracecmd_latency_data_read(struct tracecmd_input *handle, char **buf, size_t *size);
+
+struct tep_record *
+tracecmd_read_prev(struct tracecmd_input *handle, struct tep_record *record);
+
+struct tep_record *
+tracecmd_read_next_data(struct tracecmd_input *handle, int *rec_cpu);
+
+struct tep_record *
+tracecmd_peek_next_data(struct tracecmd_input *handle, int *rec_cpu);
+
+struct tep_record *
+tracecmd_translate_data(struct tracecmd_input *handle,
+ void *ptr, int size);
+struct tep_record *
+tracecmd_read_cpu_last(struct tracecmd_input *handle, int cpu);
+int tracecmd_refresh_record(struct tracecmd_input *handle,
+ struct tep_record *record);
+
+int tracecmd_set_cpu_to_timestamp(struct tracecmd_input *handle,
+ int cpu, unsigned long long ts);
+void
+tracecmd_set_all_cpus_to_timestamp(struct tracecmd_input *handle,
+ unsigned long long time);
+
+int tracecmd_set_cursor(struct tracecmd_input *handle,
+ int cpu, unsigned long long offset);
+unsigned long long
+tracecmd_get_cursor(struct tracecmd_input *handle, int cpu);
+
+unsigned long tracecmd_get_in_file_version(struct tracecmd_input *handle);
+size_t tracecmd_get_options_offset(struct tracecmd_input *handle);
+int tracecmd_get_file_compress_proto(struct tracecmd_input *handle,
+ const char **name, const char **version);
+
+int tracecmd_ftrace_overrides(struct tracecmd_input *handle, struct tracecmd_ftrace *finfo);
+bool tracecmd_get_use_trace_clock(struct tracecmd_input *handle);
+tracecmd_show_data_func
+tracecmd_get_show_data_func(struct tracecmd_input *handle);
+void tracecmd_set_show_data_func(struct tracecmd_input *handle,
+ tracecmd_show_data_func func);
+
+int tracecmd_record_at_buffer_start(struct tracecmd_input *handle, struct tep_record *record);
+unsigned long long tracecmd_page_ts(struct tracecmd_input *handle,
+ struct tep_record *record);
+unsigned int tracecmd_record_ts_delta(struct tracecmd_input *handle,
+ struct tep_record *record);
+
+struct tracecmd_proc_addr_map *
+tracecmd_search_task_map(struct tracecmd_input *handle,
+ int pid, unsigned long long addr);
+#ifndef SWIG
+/* hack for function graph work around */
+extern __thread struct tracecmd_input *tracecmd_curr_thread_handle;
+#endif
+
+
+/* --- Creating and Writing the trace.dat file --- */
+
+struct tracecmd_event_list {
+ struct tracecmd_event_list *next;
+ const char *glob;
+};
+
+struct tracecmd_option;
+struct tracecmd_msg_handle;
+
+int tracecmd_output_set_msg(struct tracecmd_output *handle,
+ struct tracecmd_msg_handle *msg_handle);
+int tracecmd_output_set_trace_dir(struct tracecmd_output *handle, const char *tracing_dir);
+int tracecmd_output_set_kallsyms(struct tracecmd_output *handle, const char *kallsyms);
+int tracecmd_output_set_from_input(struct tracecmd_output *handle, struct tracecmd_input *ihandle);
+int tracecmd_output_set_version(struct tracecmd_output *handle, int file_version);
+int tracecmd_output_set_compression(struct tracecmd_output *handle, const char *compression);
+int tracecmd_output_write_headers(struct tracecmd_output *handle,
+ struct tracecmd_event_list *list);
+
+struct tracecmd_output *tracecmd_output_create(const char *output_file);
+struct tracecmd_output *tracecmd_output_create_fd(int fd);
+struct tracecmd_output *tracecmd_create_file_latency(const char *output_file, int cpus,
+ int file_version, const char *compression);
+
+struct tracecmd_option *tracecmd_add_option(struct tracecmd_output *handle,
+ unsigned short id, int size,
+ const void *data);
+struct tracecmd_option *
+tracecmd_add_option_v(struct tracecmd_output *handle,
+ unsigned short id, const struct iovec *vector, int count);
+
+int tracecmd_add_buffer_info(struct tracecmd_output *handle, const char *name, int cpus);
+int tracecmd_write_buffer_info(struct tracecmd_output *handle);
+
+int tracecmd_write_cpus(struct tracecmd_output *handle, int cpus);
+int tracecmd_write_cmdlines(struct tracecmd_output *handle);
+int tracecmd_write_options(struct tracecmd_output *handle);
+int tracecmd_write_meta_strings(struct tracecmd_output *handle);
+int tracecmd_append_options(struct tracecmd_output *handle);
+void tracecmd_output_close(struct tracecmd_output *handle);
+void tracecmd_output_free(struct tracecmd_output *handle);
+struct tracecmd_output *tracecmd_copy(struct tracecmd_input *ihandle, const char *file,
+ enum tracecmd_file_states state, int file_version,
+ const char *compression);
+
+int tracecmd_write_cpu_data(struct tracecmd_output *handle,
+ int cpus, char * const *cpu_data_files, const char *buff_name);
+int tracecmd_append_cpu_data(struct tracecmd_output *handle,
+ int cpus, char * const *cpu_data_files);
+int tracecmd_append_buffer_cpu_data(struct tracecmd_output *handle,
+ const char *name, int cpus, char * const *cpu_data_files);
+struct tracecmd_output *tracecmd_get_output_handle_fd(int fd);
+unsigned long tracecmd_get_out_file_version(struct tracecmd_output *handle);
+unsigned long long tracecmd_get_out_file_offset(struct tracecmd_output *handle);
+
+/* --- Reading the Fly Recorder Trace --- */
+
+enum {
+ TRACECMD_RECORD_NOSPLICE = (1 << 0), /* Use read instead of splice */
+ TRACECMD_RECORD_SNAPSHOT = (1 << 1), /* Extract from snapshot */
+ TRACECMD_RECORD_BLOCK_SPLICE = (1 << 2), /* Block on splice write */
+ TRACECMD_RECORD_NOBRASS = (1 << 3), /* Splice directly without a brass pipe */
+ TRACECMD_RECORD_POLL = (1 << 4), /* Use O_NONBLOCK, poll trace buffers */
+};
+
+void tracecmd_free_recorder(struct tracecmd_recorder *recorder);
+struct tracecmd_recorder *tracecmd_create_recorder(const char *file, int cpu, unsigned flags);
+struct tracecmd_recorder *tracecmd_create_recorder_fd(int fd, int cpu, unsigned flags);
+struct tracecmd_recorder *tracecmd_create_recorder_virt(const char *file, int cpu, unsigned flags, int trace_fd);
+struct tracecmd_recorder *tracecmd_create_recorder_maxkb(const char *file, int cpu, unsigned flags, int maxkb);
+struct tracecmd_recorder *tracecmd_create_buffer_recorder_fd(int fd, int cpu, unsigned flags, const char *buffer);
+struct tracecmd_recorder *tracecmd_create_buffer_recorder(const char *file, int cpu, unsigned flags, const char *buffer);
+struct tracecmd_recorder *tracecmd_create_buffer_recorder_maxkb(const char *file, int cpu, unsigned flags, const char *buffer, int maxkb);
+
+int tracecmd_start_recording(struct tracecmd_recorder *recorder, unsigned long sleep);
+void tracecmd_stop_recording(struct tracecmd_recorder *recorder);
+long tracecmd_flush_recording(struct tracecmd_recorder *recorder);
+
+enum tracecmd_msg_flags {
+ TRACECMD_MSG_FL_USE_TCP = 1 << 0,
+ TRACECMD_MSG_FL_USE_VSOCK = 1 << 1,
+};
+
+/* for both client and server */
+#ifdef __ANDROID__
+#define MSG_CACHE_FILE "/data/local/tmp/trace_msg_cacheXXXXXX"
+#else /* !__ANDROID__ */
+#define MSG_CACHE_FILE "/tmp/trace_msg_cacheXXXXXX"
+#endif /* __ANDROID__ */
+
+struct tracecmd_msg_handle {
+ int fd;
+ short cpu_count;
+ short version; /* Current protocol version */
+ unsigned long flags;
+ bool done;
+ bool cache;
+ int cfd;
+ char cfile[sizeof(MSG_CACHE_FILE)];
+};
+
+struct tracecmd_tsync_protos {
+ char **names;
+};
+
+struct tracecmd_msg_handle *
+tracecmd_msg_handle_alloc(int fd, unsigned long flags);
+int tracecmd_msg_handle_cache(struct tracecmd_msg_handle *msg_handle);
+
+/* Closes the socket and frees the handle */
+void tracecmd_msg_handle_close(struct tracecmd_msg_handle *msg_handle);
+
+/* for clients */
+int tracecmd_msg_send_init_data(struct tracecmd_msg_handle *msg_handle,
+ unsigned int **client_ports);
+int tracecmd_msg_data_send(struct tracecmd_msg_handle *msg_handle,
+ const char *buf, int size);
+int tracecmd_msg_finish_sending_data(struct tracecmd_msg_handle *msg_handle);
+int tracecmd_msg_send_close_msg(struct tracecmd_msg_handle *msg_handle);
+int tracecmd_msg_send_close_resp_msg(struct tracecmd_msg_handle *msg_handle);
+int tracecmd_msg_wait_close(struct tracecmd_msg_handle *msg_handle);
+int tracecmd_msg_wait_close_resp(struct tracecmd_msg_handle *msg_handle);
+
+/* for server */
+int tracecmd_msg_initial_setting(struct tracecmd_msg_handle *msg_handle);
+int tracecmd_msg_send_port_array(struct tracecmd_msg_handle *msg_handle,
+ unsigned *ports);
+int tracecmd_msg_read_data(struct tracecmd_msg_handle *msg_handle, int ofd);
+int tracecmd_msg_collect_data(struct tracecmd_msg_handle *msg_handle, int ofd);
+bool tracecmd_msg_done(struct tracecmd_msg_handle *msg_handle);
+void tracecmd_msg_set_done(struct tracecmd_msg_handle *msg_handle);
+
+int tracecmd_msg_send_trace_req(struct tracecmd_msg_handle *msg_handle,
+ int argc, char **argv, bool use_fifos,
+ unsigned long long trace_id,
+ struct tracecmd_tsync_protos *protos);
+int tracecmd_msg_recv_trace_req(struct tracecmd_msg_handle *msg_handle,
+ int *argc, char ***argv, bool *use_fifos,
+ unsigned long long *trace_id,
+ struct tracecmd_tsync_protos **protos);
+
+int tracecmd_msg_send_trace_resp(struct tracecmd_msg_handle *msg_handle,
+ int nr_cpus, int page_size,
+ unsigned int *ports, bool use_fifos,
+ unsigned long long trace_id,
+ const char *tsync_proto, unsigned int tsync_port);
+int tracecmd_msg_recv_trace_resp(struct tracecmd_msg_handle *msg_handle,
+ int *nr_cpus, int *page_size,
+ unsigned int **ports, bool *use_fifos,
+ unsigned long long *trace_id,
+ char **tsync_proto,
+ unsigned int *tsync_port);
+
+int tracecmd_msg_send_time_sync(struct tracecmd_msg_handle *msg_handle,
+ char *sync_protocol, unsigned int sync_msg_id,
+ unsigned int payload_size, char *payload);
+int tracecmd_msg_recv_time_sync(struct tracecmd_msg_handle *msg_handle,
+ char *sync_protocol,
+ unsigned int *sync_msg_id,
+ unsigned int *payload_size, char **payload);
+
+enum tracecmd_clocks {
+ TRACECMD_CLOCK_UNKNOWN = 0,
+ TRACECMD_CLOCK_LOCAL = 1,
+ TRACECMD_CLOCK_GLOBAL = 1 << 1,
+ TRACECMD_CLOCK_COUNTER = 1 << 2,
+ TRACECMD_CLOCK_UPTIME = 1 << 3,
+ TRACECMD_CLOCK_PERF = 1 << 4,
+ TRACECMD_CLOCK_MONO = 1 << 5,
+ TRACECMD_CLOCK_MONO_RAW = 1 << 6,
+ TRACECMD_CLOCK_BOOT = 1 << 7,
+ TRACECMD_CLOCK_X86_TSC = 1 << 8
+};
+
+enum tracecmd_clocks tracecmd_clock_str2id(const char *clock);
+const char *tracecmd_clock_id2str(enum tracecmd_clocks clock);
+
+/* --- Timestamp synchronization --- */
+
+struct tracecmd_time_sync;
+#define TRACECMD_TSYNC_PNAME_LENGTH 16
+#define TRACECMD_TSYNC_PROTO_NONE "none"
+
+enum{
+ TRACECMD_TIME_SYNC_CMD_PROBE = 1,
+ TRACECMD_TIME_SYNC_CMD_STOP = 2,
+};
+
+enum tracecmd_time_sync_role {
+ TRACECMD_TIME_SYNC_ROLE_HOST = (1 << 0),
+ TRACECMD_TIME_SYNC_ROLE_GUEST = (1 << 1),
+ TRACECMD_TIME_SYNC_ROLE_CLIENT = (1 << 2),
+ TRACECMD_TIME_SYNC_ROLE_SERVER = (1 << 3),
+};
+
+/* Timestamp synchronization flags */
+#define TRACECMD_TSYNC_FLAG_INTERPOLATE 0x1
+
+void tracecmd_tsync_init(void);
+int tracecmd_tsync_proto_getall(struct tracecmd_tsync_protos **protos, const char *clock, int role);
+bool tsync_proto_is_supported(const char *proto_name);
+struct tracecmd_time_sync *
+tracecmd_tsync_with_host(int fd,
+ const struct tracecmd_tsync_protos *tsync_protos,
+ const char *clock, int remote_id, int local_id);
+int tracecmd_tsync_with_host_stop(struct tracecmd_time_sync *tsync);
+struct tracecmd_time_sync *
+tracecmd_tsync_with_guest(unsigned long long trace_id, int loop_interval,
+ unsigned int fd, int guest_pid,
+ int guest_cpus, const char *proto_name, const char *clock);
+int tracecmd_tsync_with_guest_stop(struct tracecmd_time_sync *tsync);
+int tracecmd_tsync_get_offsets(struct tracecmd_time_sync *tsync, int cpu,
+ int *count, long long **ts,
+ long long **offsets, long long **scalings, long long **frac);
+int tracecmd_tsync_get_selected_proto(struct tracecmd_time_sync *tsync,
+ char **selected_proto);
+void tracecmd_tsync_free(struct tracecmd_time_sync *tsync);
+int tracecmd_write_guest_time_shift(struct tracecmd_output *handle,
+ struct tracecmd_time_sync *tsync);
+
+/* --- Compression --- */
+struct tracecmd_compress_chunk {
+ unsigned int size;
+ unsigned int zsize;
+ off64_t zoffset;
+ off64_t offset;
+};
+struct tracecmd_compression;
+struct tracecmd_compression_proto {
+ int weight;
+ const char *name;
+ const char *version;
+ int (*compress)(void *ctx, const void *in, int in_bytes, void *out, int out_bytes);
+ int (*uncompress)(void *ctx, const void *in, int in_bytes, void *out, int out_bytes);
+ unsigned int (*compress_size)(void *ctx, unsigned int bytes);
+ bool (*is_supported)(const char *name, const char *version);
+ void *(*new_context)(void);
+ void (*free_context)(void *ctx);
+};
+
+struct tracecmd_compression *tracecmd_compress_alloc(const char *name, const char *version,
+ int fd, struct tep_handle *tep,
+ struct tracecmd_msg_handle *msg_handle);
+void tracecmd_compress_destroy(struct tracecmd_compression *handle);
+int tracecmd_compress_block(struct tracecmd_compression *handle);
+int tracecmd_uncompress_block(struct tracecmd_compression *handle);
+void tracecmd_compress_reset(struct tracecmd_compression *handle);
+int tracecmd_compress_buffer_read(struct tracecmd_compression *handle, char *dst, int len);
+int tracecmd_compress_pread(struct tracecmd_compression *handle, char *dst, int len, off_t offset);
+int tracecmd_compress_buffer_write(struct tracecmd_compression *handle,
+ const void *data, unsigned long long size);
+off64_t tracecmd_compress_lseek(struct tracecmd_compression *handle, off64_t offset, int whence);
+int tracecmd_compress_proto_get_name(struct tracecmd_compression *compress,
+ const char **name, const char **version);
+bool tracecmd_compress_is_supported(const char *name, const char *version);
+int tracecmd_compress_protos_get(char ***names, char ***versions);
+int tracecmd_compress_proto_register(struct tracecmd_compression_proto *proto);
+int tracecmd_compress_copy_from(struct tracecmd_compression *handle, int fd, int chunk_size,
+ unsigned long long *read_size, unsigned long long *write_size);
+int tracecmd_uncompress_copy_to(struct tracecmd_compression *handle, int fd,
+ unsigned long long *read_size, unsigned long long *write_size);
+int tracecmd_uncompress_chunk(struct tracecmd_compression *handle,
+ struct tracecmd_compress_chunk *chunk, char *data);
+int tracecmd_load_chunks_info(struct tracecmd_compression *handle,
+ struct tracecmd_compress_chunk **chunks_info);
+/* --- Plugin handling --- */
+extern struct tep_plugin_option trace_ftrace_options[];
+
+char **trace_util_find_plugin_files(const char *suffix);
+void trace_util_free_plugin_files(char **files);
+
+/* Used for trace-cmd list */
+void tracecmd_ftrace_load_options(void);
+
+/* event hooks */
+
+struct hook_list {
+ struct hook_list *next;
+ struct buffer_instance *instance;
+ const char *hook;
+ char *str;
+ char *start_system;
+ char *start_event;
+ char *start_match;
+ char *end_system;
+ char *end_event;
+ char *end_match;
+ char *pid;
+ int migrate;
+ int global;
+ int stack;
+};
+
+struct hook_list *tracecmd_create_event_hook(const char *arg);
+void tracecmd_free_hooks(struct hook_list *hooks);
+
+void tracecmd_plog(const char *fmt, ...);
+void tracecmd_plog_error(const char *fmt, ...);
+int tracecmd_set_logfile(char *logfile);
+
+/* --- System --- */
+unsigned long long tracecmd_generate_traceid(void);
+int tracecmd_count_cpus(void);
+
+/* --- Hack! --- */
+int tracecmd_blk_hack(struct tracecmd_input *handle);
+
+/* --- Stack tracer functions --- */
+int tracecmd_stack_tracer_status(int *status);
+
+/* --- Debugging --- */
+struct kbuffer *tracecmd_record_kbuf(struct tracecmd_input *handle,
+ struct tep_record *record);
+void *tracecmd_record_page(struct tracecmd_input *handle,
+ struct tep_record *record);
+void *tracecmd_record_offset(struct tracecmd_input *handle,
+ struct tep_record *record);
+#ifdef PERF
+
+#include <linux/perf_event.h>
+
+/* trace-cmd Perf */
+struct trace_perf {
+ int fd;
+ int cpu;
+ int pid;
+ int pages;
+ struct perf_event_attr pe;
+ struct perf_event_mmap_page *mmap;
+};
+int trace_perf_init(struct trace_perf *perf, int pages, int cpu, int pid);
+int trace_perf_open(struct trace_perf *perf);
+void trace_perf_close(struct trace_perf *perf);
+#endif
+
+#endif /* _TRACE_CMD_PRIVATE_H */
diff --git a/lib/trace-cmd/include/private/trace-filter-hash.h b/lib/trace-cmd/include/private/trace-filter-hash.h
new file mode 100644
index 00000000..4111c41e
--- /dev/null
+++ b/lib/trace-cmd/include/private/trace-filter-hash.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ * Copyright (C) 2018 VMware Inc, Steven Rostedt <rostedt@goodmis.org>
+ *
+ */
+#ifndef _TRACE_FILTER_HASH_H
+#define _TRACE_FILTER_HASH_H
+
+#include <stdint.h>
+
+struct tracecmd_filter_id_item {
+ struct tracecmd_filter_id_item *next;
+ int id;
+};
+
+struct tracecmd_filter_id {
+ struct tracecmd_filter_id_item **hash;
+ int count;
+};
+
+/**
+ * tracecmd_quick_hash - A quick (non secured) hash alogirthm
+ * @val: The value to perform the hash on
+ * @bits: The size in bits you need to return
+ *
+ * This is a quick hashing function adapted from Donald E. Knuth's 32
+ * bit multiplicative hash. See The Art of Computer Programming (TAOCP).
+ * Multiplication by the Prime number, closest to the golden ratio of
+ * 2^32.
+ *
+ * @bits is used to max the result for use cases that require
+ * a power of 2 return value that is less than 32 bits. Any value
+ * of @bits greater than 31 (or zero), will simply return the full hash on @val.
+ */
+static inline uint32_t tracecmd_quick_hash(uint32_t val, unsigned int bits)
+{
+ val *= UINT32_C(2654435761);
+
+ if (!bits || bits > 31)
+ return val;
+
+ return val & ((1 << bits) - 1);
+}
+
+struct tracecmd_filter_id_item *
+ tracecmd_filter_id_find(struct tracecmd_filter_id *hash, int id);
+void tracecmd_filter_id_add(struct tracecmd_filter_id *hash, int id);
+void tracecmd_filter_id_remove(struct tracecmd_filter_id *hash, int id);
+void tracecmd_filter_id_clear(struct tracecmd_filter_id *hash);
+struct tracecmd_filter_id *tracecmd_filter_id_hash_alloc(void);
+void tracecmd_filter_id_hash_free(struct tracecmd_filter_id *hash);
+struct tracecmd_filter_id *
+ tracecmd_filter_id_hash_copy(struct tracecmd_filter_id *hash);
+int *tracecmd_filter_ids(struct tracecmd_filter_id *hash);
+int tracecmd_filter_id_compare(struct tracecmd_filter_id *hash1,
+ struct tracecmd_filter_id *hash2);
+
+static inline int tracecmd_filter_task_count(struct tracecmd_filter_id *hash)
+{
+ return hash->count;
+}
+
+#endif /* _TRACE_FILTER_HASH_H */
diff --git a/lib/trace-cmd/include/private/trace-hash.h b/lib/trace-cmd/include/private/trace-hash.h
new file mode 100644
index 00000000..aa92cdfe
--- /dev/null
+++ b/lib/trace-cmd/include/private/trace-hash.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2014 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#ifndef _TRACE_HASH_H
+#define _TRACE_HASH_H
+
+struct trace_hash_item {
+ struct trace_hash_item *next;
+ struct trace_hash_item *prev;
+ unsigned long long key;
+};
+
+struct trace_hash {
+ struct trace_hash_item **buckets;
+ int nr_buckets;
+ int power;
+};
+
+int trace_hash_init(struct trace_hash *hash, int buckets);
+void trace_hash_free(struct trace_hash *hash);
+int trace_hash_add(struct trace_hash *hash, struct trace_hash_item *item);
+int trace_hash_empty(struct trace_hash *hash);
+
+static inline void trace_hash_del(struct trace_hash_item *item)
+{
+ struct trace_hash_item *prev = item->prev;
+
+ prev->next = item->next;
+ if (item->next)
+ item->next->prev = prev;
+}
+
+#define trace_hash_for_each_bucket(bucket, hash) \
+ for (bucket = (hash)->buckets; \
+ (bucket) < (hash)->buckets + (hash)->nr_buckets; (bucket)++)
+
+#define trace_hash_for_each_item(item, bucket) \
+ for ((item = *(bucket)); item; item = (item)->next)
+
+#define trace_hash_for_each_item_safe(item, n, bucket) \
+ for ((item = *(bucket)), n = item ? item->next : NULL; item; \
+ item = n, n = item ? (item)->next : NULL)
+
+#define trace_hash_while_item(item, bucket) \
+ while ((item = *(bucket)))
+
+typedef int (*trace_hash_func)(struct trace_hash_item *item, void *data);
+
+struct trace_hash_item *
+trace_hash_find(struct trace_hash *hash, unsigned long long key,
+ trace_hash_func match, void *data);
+
+#endif /* _TRACE_HASH_H */
diff --git a/lib/trace-cmd/include/private/trace-msg.h b/lib/trace-cmd/include/private/trace-msg.h
new file mode 100644
index 00000000..cfcf9615
--- /dev/null
+++ b/lib/trace-cmd/include/private/trace-msg.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+#ifndef _TRACE_MSG_H_
+#define _TRACE_MSG_H_
+
+#include <stdbool.h>
+
+#define UDP_MAX_PACKET (65536 - 20)
+#define V3_MAGIC "766679\0"
+#define V3_CPU "-1V3"
+
+#define V1_PROTOCOL 1
+#define V3_PROTOCOL 3
+
+extern unsigned int page_size;
+
+#endif /* _TRACE_MSG_H_ */
diff --git a/lib/trace-cmd/include/trace-cmd-local.h b/lib/trace-cmd/include/trace-cmd-local.h
new file mode 100644
index 00000000..6ac34137
--- /dev/null
+++ b/lib/trace-cmd/include/trace-cmd-local.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+/*
+ * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#ifndef _TRACE_CMD_LOCAL_H
+#define _TRACE_CMD_LOCAL_H
+
+#include <byteswap.h>
+#include "trace-cmd-private.h"
+
+#define FILE_VERSION_DEFAULT 7
+
+/* Can be overridden */
+void tracecmd_warning(const char *fmt, ...);
+void tracecmd_critical(const char *fmt, ...);
+void tracecmd_info(const char *fmt, ...);
+
+#ifndef htonll
+# if __BYTE_ORDER == __LITTLE_ENDIAN
+#define htonll(x) __bswap_64(x)
+#define ntohll(x) __bswap_64(x)
+#else
+#define htonll(x) (x)
+#define ntohll(x) (x)
+#endif
+#endif
+
+#ifdef HAVE_ZLIB
+int tracecmd_zlib_init(void);
+#endif
+
+#ifdef HAVE_ZSTD
+int tracecmd_zstd_init(void);
+#else
+static inline int tracecmd_zstd_init(void)
+{
+ return 0;
+}
+#endif
+
+struct data_file_write {
+ unsigned long long file_size;
+ unsigned long long write_size;
+ /* offset in the trace file, where write_size is stored */
+ unsigned long long file_write_size;
+ unsigned long long data_offset;
+ /* offset in the trace file, where data_offset is stored */
+ unsigned long long file_data_offset;
+};
+
+void tracecmd_compress_init(void);
+void tracecmd_compress_free(void);
+
+bool check_file_state(unsigned long file_version, int current_state, int new_state);
+bool check_out_state(struct tracecmd_output *handle, int new_state);
+
+int out_uncompress_block(struct tracecmd_output *handle);
+int out_compression_start(struct tracecmd_output *handle, bool compress);
+int out_compression_end(struct tracecmd_output *handle, bool compress);
+void out_compression_reset(struct tracecmd_output *handle, bool compress);
+bool out_check_compression(struct tracecmd_output *handle);
+
+void out_set_file_state(struct tracecmd_output *handle, int new_state);
+int out_save_options_offset(struct tracecmd_output *handle,
+ unsigned long long start);
+unsigned long long out_copy_fd_compress(struct tracecmd_output *handle,
+ int fd, unsigned long long max,
+ unsigned long long *write_size, int page);
+void in_uncompress_reset(struct tracecmd_input *handle);
+int in_uncompress_block(struct tracecmd_input *handle);
+
+unsigned long long
+out_write_section_header(struct tracecmd_output *handle, unsigned short header_id,
+ char *description, int flags, bool option);
+int out_update_section_header(struct tracecmd_output *handle, unsigned long long offset);
+
+long long do_write_check(struct tracecmd_output *handle, const void *data, long long size);
+
+struct tracecmd_option *
+out_add_buffer_option(struct tracecmd_output *handle, const char *name,
+ unsigned short id, unsigned long long data_offset,
+ int cpus, struct data_file_write *cpu_data, int page_size);
+
+struct cpu_data_source {
+ int fd;
+ int size;
+ off64_t offset;
+};
+
+int out_write_cpu_data(struct tracecmd_output *handle, int cpus,
+ struct cpu_data_source *data, const char *buff_name);
+int out_write_emty_cpu_data(struct tracecmd_output *handle, int cpus);
+off64_t msg_lseek(struct tracecmd_msg_handle *msg_handle, off64_t offset, int whence);
+unsigned long long get_last_option_offset(struct tracecmd_input *handle);
+unsigned int get_meta_strings_size(struct tracecmd_input *handle);
+
+#endif /* _TRACE_CMD_LOCAL_H */
diff --git a/lib/trace-cmd/include/trace-hash-local.h b/lib/trace-cmd/include/trace-hash-local.h
new file mode 100644
index 00000000..70a0e76a
--- /dev/null
+++ b/lib/trace-cmd/include/trace-hash-local.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2009, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#ifndef _TRACE_HASH_LOCAL_H
+#define _TRACE_HASH_LOCAL_H
+
+static inline unsigned int trace_hash(unsigned int val)
+{
+ unsigned int hash, tmp;
+
+ hash = 12546869; /* random prime */
+
+ /*
+ * The following hash is based off of Paul Hsieh's super fast hash:
+ * http://www.azillionmonkeys.com/qed/hash.html
+ * Note, he released this code unde the GPL 2.0 license, which
+ * is the same as the license for the programs that use it here.
+ */
+
+ hash += (val & 0xffff);
+ tmp = (val >> 16) ^ hash;
+ hash = (hash << 16) ^ tmp;
+ hash += hash >> 11;
+
+ hash ^= hash << 3;
+ hash += hash >> 5;
+ hash ^= hash << 4;
+ hash += hash >> 17;
+ hash ^= hash << 25;
+ hash += hash >> 6;
+
+ return hash;
+}
+
+static inline unsigned int trace_hash_str(char *str)
+{
+ int val = 0;
+ int i;
+
+ for (i = 0; str[i]; i++)
+ val += ((int)str[i]) << (i & 0xf);
+ return trace_hash(val);
+}
+#endif /* _TRACE_HASH_LOCAL_H */
diff --git a/lib/trace-cmd/include/trace-tsync-local.h b/lib/trace-cmd/include/trace-tsync-local.h
new file mode 100644
index 00000000..5bbc1db6
--- /dev/null
+++ b/lib/trace-cmd/include/trace-tsync-local.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+/*
+ * Copyright (C) 2019, VMware, Tzvetomir Stoyanov <tz.stoyanov@gmail.com>
+ *
+ */
+#ifndef _TRACE_TSYNC_LOCAL_H
+#define _TRACE_TSYNC_LOCAL_H
+
+#include <stdbool.h>
+
+struct tracecmd_time_sync {
+ pthread_t thread;
+ bool thread_running;
+ unsigned long long trace_id;
+ char *proto_name;
+ int loop_interval;
+ pthread_mutex_t lock;
+ pthread_cond_t cond;
+ pthread_barrier_t first_sync;
+ char *clock_str;
+ struct tracecmd_msg_handle *msg_handle;
+ void *context;
+ int guest_pid;
+ int vcpu_count;
+ int remote_id;
+ int local_id;
+};
+
+struct clock_sync_offsets {
+ /* Arrays with calculated time offsets at given time */
+ int sync_size; /* Allocated size of sync_ts,
+ * sync_offsets, sync_scalings and sync_frac
+ */
+ int sync_count; /* Number of elements in sync_ts,
+ * sync_offsets, sync_scalings and sync_frac
+ */
+ long long *sync_ts;
+ long long *sync_offsets;
+ long long *sync_scalings;
+ long long *sync_frac;
+};
+
+struct clock_sync_context {
+ void *proto_data; /* time sync protocol specific data */
+ bool is_server; /* server side time sync role */
+ bool is_guest; /* guest or host time sync role */
+ struct tracefs_instance *instance; /* ftrace buffer, used for time sync events */
+
+ int cpu_count;
+ struct clock_sync_offsets *offsets; /* Array of size cpu_count
+ * calculated offsets per CPU
+ */
+
+ /* Identifiers of local and remote time sync peers */
+ unsigned int local_id;
+ unsigned int remote_id;
+};
+
+int tracecmd_tsync_proto_register(const char *proto_name, int accuracy, int roles,
+ int supported_clocks, unsigned int flags,
+ int (*init)(struct tracecmd_time_sync *),
+ int (*free)(struct tracecmd_time_sync *),
+ int (*calc)(struct tracecmd_time_sync *,
+ long long *, long long *, long long*,
+ long long *, unsigned int));
+int tracecmd_tsync_proto_unregister(char *proto_name);
+int ptp_clock_sync_register(void);
+
+#ifdef VSOCK
+int kvm_clock_sync_register(void);
+#else
+static inline int kvm_clock_sync_register(void)
+{
+ return 0;
+}
+#endif
+
+#endif /* _TRACE_TSYNC_LOCAL_H */
diff --git a/lib/trace-cmd/include/trace-write-local.h b/lib/trace-cmd/include/trace-write-local.h
new file mode 100644
index 00000000..046992f1
--- /dev/null
+++ b/lib/trace-cmd/include/trace-write-local.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+/*
+ * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#ifndef _TRACE_WRITE_LOCAL_H
+#define _TRACE_WRITE_LOCAL_H
+
+/* Local for trace-input.c, trace-output.c and trace-msg.c */
+
+static inline ssize_t __do_write(int fd, const void *data, size_t size)
+{
+ ssize_t tot = 0;
+ ssize_t w;
+
+ do {
+ w = write(fd, data + tot, size - tot);
+ tot += w;
+
+ if (!w)
+ break;
+ if (w < 0)
+ return w;
+ } while (tot != size);
+
+ return tot;
+}
+
+static inline ssize_t
+__do_write_check(int fd, const void *data, size_t size)
+{
+ ssize_t ret;
+
+ ret = __do_write(fd, data, size);
+ if (ret < 0)
+ return ret;
+ if (ret != size)
+ return -1;
+
+ return 0;
+}
+
+#endif /* _TRACE_WRITE_LOCAL_H */
diff --git a/lib/trace-cmd/plugins/Makefile b/lib/trace-cmd/plugins/Makefile
new file mode 100644
index 00000000..ed5a9cea
--- /dev/null
+++ b/lib/trace-cmd/plugins/Makefile
@@ -0,0 +1,58 @@
+include $(src)/scripts/utils.mk
+
+bdir:=$(obj)/lib/trace-cmd/plugins
+
+PLUGIN_OBJS =
+
+PLUGIN_OBJS := $(PLUGIN_OBJS:%.o=$(bdir)/%.o)
+PLUGIN_BUILD := $(PLUGIN_OBJS:$(bdir)/%.o=$(bdir)/%.so)
+
+PLUGINS := $(PLUGIN_BUILD)
+
+DEPS := $(PLUGIN_OBJS:$(bdir)/%.o=$(bdir)/.%.d)
+
+all: $(PLUGINS)
+
+$(bdir):
+ @mkdir -p $(bdir)
+
+$(PLUGIN_OBJS): | $(bdir)
+$(DEPS): | $(bdir)
+
+$(PLUGIN_OBJS): $(bdir)/%.o : %.c
+ $(Q)$(do_compile_plugin_obj)
+
+$(PLUGIN_BUILD): $(bdir)/%.so: $(bdir)/%.o
+ $(Q)$(do_plugin_build)
+
+$(DEPS): $(bdir)/.%.d: %.c
+ $(Q)$(CC) -M -MT $(bdir)/$*.o $(CPPFLAGS) $(CFLAGS) $< > $@
+
+$(PLUGIN_OBJS): $(bdir)/%.o : $(bdir)/.%.d
+
+PLUGINS_INSTALL = $(subst .so,.install,$(PLUGINS))
+
+$(PLUGINS_INSTALL): $(bdir)/%.install : $(bdir)/%.so force
+ $(Q)$(call do_install_data,$<,$(plugin_tracecmd_dir_SQ))
+
+install_plugins: $(PLUGINS_INSTALL)
+
+# The following targets are necessary to trigger a rebuild when
+# $(PLUGIN_DIR_TRACECMD) change. Without them, a full clean build would
+# necessary in order to get the binaries updated.
+
+$(bdir)/tracecmd_plugin_dir: $(bdir) force
+ $(Q)$(N)$(call update_dir, 'PLUGIN_DIR_TRACECMD=$(PLUGIN_DIR_TRACECMD)')
+
+dep_includes := $(wildcard $(DEPS))
+
+ifneq ($(dep_includes),)
+ include $(dep_includes)
+endif
+
+clean:
+ $(RM) -f $(bdir)/*.a $(bdir)/*.so $(bdir)/*.o $(bdir)/.*.d\
+ $(bdir)/tracecmd_plugin_dir
+
+force:
+.PHONY: clean force
diff --git a/lib/trace-cmd/test.c b/lib/trace-cmd/test.c
new file mode 100644
index 00000000..3db029aa
--- /dev/null
+++ b/lib/trace-cmd/test.c
@@ -0,0 +1,7 @@
+#include <trace-cmd/trace-cmd.h>
+
+int main()
+{
+ tracecmd_open_head("trace.dat", 0);
+ return 0;
+}
diff --git a/lib/trace-cmd/trace-blk-hack.c b/lib/trace-cmd/trace-blk-hack.c
new file mode 100644
index 00000000..2a05cf98
--- /dev/null
+++ b/lib/trace-cmd/trace-blk-hack.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#include <stdio.h>
+#include "trace-cmd.h"
+#include "trace-local.h"
+
+static const char blk_event_start[] =
+ "name: blktrace\n"
+ "ID: %d\n"
+ "format:\n"
+ "\tfield:unsigned short common_type;\toffset:0;\tsize:2;\n"
+ "\tfield:unsigned char common_flags;\toffset:2;\tsize:1;\n"
+ "\tfield:unsigned char common_preempt_count;\toffset:3;\tsize:1;\n"
+ "\tfield:int common_pid;\toffset:4;\tsize:4;\n";
+
+static const char blk_body[] = "\n"
+ "\tfield:u64 sector;\toffset:16;\tsize:8;\n"
+ "\tfield:int bytes;\toffset:24;\tsize:4;\n"
+ "\tfield:int action;\toffset:28;\tsize:4;\n"
+ "\tfield:int pid;\toffset:32;\tsize:4;\n"
+ "\tfield:int device;\toffset:36;\tsize:4;\n"
+ "\tfield:int cpu;\toffset:40;\tsize:4;\n"
+ "\tfield:short error;\toffset:44;\tsize:2;\n"
+ "\tfield:short pdu_len;\toffset:46;\tsize:2;\n"
+ "\tfield:void data;\toffset:48;\tsize:0;\n"
+ "\n"
+ "print fmt: \"%%d\", REC->pid\n";
+
+int tracecmd_blk_hack(struct tracecmd_input *handle)
+{
+ struct tep_handle *pevent;
+ struct tep_event *event;
+ struct tep_format_field *field;
+ char buf[4096]; /* way more than enough! */
+ int id;
+ int l;
+ int r;
+
+ pevent = tracecmd_get_tep(handle);
+
+ /*
+ * Unfortunately, the TRACE_BLK has changed a bit.
+ * We need to test if various events exist to try
+ * to guess what event id TRACE_BLK would be.
+ */
+
+ /* It was originally behind the "power" event */
+ event = tep_find_event_by_name(pevent, "ftrace", "power");
+ if (event) {
+ id = event->id + 1;
+ goto found;
+ }
+
+ /*
+ * But the power tracer is now in perf.
+ * Then it was after kmem_free
+ */
+ event = tep_find_event_by_name(pevent, "ftrace", "kmem_free");
+ if (event) {
+ id = event->id + 1;
+ goto found;
+ }
+
+ /*
+ * But that then went away.
+ * Currently it should be behind the user stack.
+ */
+ event = tep_find_event_by_name(pevent, "ftrace", "user_stack");
+ if (event) {
+ id = event->id + 1;
+ goto found;
+ }
+ /* Give up :( */
+ return -1;
+
+ found:
+ /*
+ * Blk events are not exported in the events directory.
+ * This is a hack to attempt to create a block event
+ * that we can read.
+ *
+ * We'll make a format file to look like this:
+ *
+ * name: blktrace
+ * ID: 13
+ * format:
+ * field:unsigned short common_type; offset:0; size:2;
+ * field:unsigned char common_flags; offset:2; size:1;
+ * field:unsigned char common_preempt_count; offset:3; size:1;
+ * field:int common_pid; offset:4; size:4;
+ * field:int common_lock_depth; offset:8; size:4;
+ *
+ * field:u64 sector; offset:16; size:8;
+ * field:int bytes; offset:32; size:4;
+ * field:int action; offset:36; size:4;
+ * field:int pid; offset:40; size:4;
+ * field:int device; offset:44; size:4;
+ * field:int cpu; offset:48; size:4;
+ * field:short error; offset:52; size:2;
+ * field:short pdu_len; offset:54; size:2;
+ * field:void data; offset:60; size:0;
+ *
+ * print fmt: "%d", REC->pid
+ *
+ * Note: the struct blk_io_trace is used directly and
+ * just the first parts of the struct are not used in order
+ * to not write over the ftrace data.
+ */
+
+ /* Make sure the common fields exist */
+ field = tep_find_common_field(event, "common_type");
+ if (!field || field->offset != 0 || field->size != 2)
+ goto fail;
+ field = tep_find_common_field(event, "common_flags");
+ if (!field || field->offset != 2 || field->size != 1)
+ goto fail;
+ field = tep_find_common_field(event, "common_preempt_count");
+ if (!field || field->offset != 3 || field->size != 1)
+ goto fail;
+ field = tep_find_common_field(event, "common_pid");
+ if (!field || field->offset != 4 || field->size != 4)
+ goto fail;
+ r = sprintf(buf, blk_event_start, id);
+ l = r;
+
+ /* lock depth is optional */
+ field = tep_find_common_field(event, "common_lock_depth");
+ if (field) {
+ if (field->offset != 8 || field->size != 4)
+ return -1;
+ r = sprintf(buf+l, "\tfield:int common_lock_depth;\toffset:8;\tsize:4;\n");
+ l += r;
+ }
+
+ r = sprintf(buf+l, blk_body);
+
+ /* Parse this event */
+ l += r;
+ tep_parse_event(pevent, buf, l, "ftrace");
+
+ return 0;
+
+ fail:
+ return -1;
+}
diff --git a/lib/trace-cmd/trace-compress-zlib.c b/lib/trace-cmd/trace-compress-zlib.c
new file mode 100644
index 00000000..413a0764
--- /dev/null
+++ b/lib/trace-cmd/trace-compress-zlib.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2021, VMware, Tzvetomir Stoyanov tz.stoyanov@gmail.com>
+ *
+ */
+#include <stdlib.h>
+#include <dlfcn.h>
+#include <zlib.h>
+#include <errno.h>
+
+#include "trace-cmd-private.h"
+
+#define __ZLIB_NAME "zlib"
+#define __ZLIB_WEIGTH 10
+
+static int zlib_compress(void *ctx, const void *in, int in_bytes, void *out, int out_bytes)
+{
+ unsigned long obytes = out_bytes;
+ int ret;
+
+ ret = compress2((unsigned char *)out, &obytes,
+ (unsigned char *)in, (unsigned long)in_bytes, Z_BEST_COMPRESSION);
+ switch (ret) {
+ case Z_OK:
+ return obytes;
+ case Z_BUF_ERROR:
+ errno = -ENOBUFS;
+ break;
+ case Z_MEM_ERROR:
+ errno = -ENOMEM;
+ break;
+ case Z_STREAM_ERROR:
+ errno = -EINVAL;
+ break;
+ case Z_ERRNO:
+ break;
+ default:
+ errno = -EFAULT;
+ break;
+ }
+
+ return -1;
+}
+
+static int zlib_decompress(void *ctx, const void *in, int in_bytes, void *out, int out_bytes)
+{
+ unsigned long obytes = out_bytes;
+ int ret;
+
+ ret = uncompress((unsigned char *)out, &obytes,
+ (unsigned char *)in, (unsigned long)in_bytes);
+ switch (ret) {
+ case Z_OK:
+ return obytes;
+ case Z_BUF_ERROR:
+ errno = -ENOBUFS;
+ break;
+ case Z_MEM_ERROR:
+ errno = -ENOMEM;
+ break;
+ case Z_DATA_ERROR:
+ errno = -EINVAL;
+ break;
+ case Z_ERRNO:
+ break;
+ default:
+ errno = -EFAULT;
+ break;
+ }
+
+ return -1;
+}
+
+static unsigned int zlib_compress_bound(void *ctx, unsigned int in_bytes)
+{
+ return compressBound(in_bytes);
+}
+
+static bool zlib_is_supported(const char *name, const char *version)
+{
+ const char *zver;
+
+ if (!name)
+ return false;
+ if (strlen(name) != strlen(__ZLIB_NAME) || strcmp(name, __ZLIB_NAME))
+ return false;
+
+ if (!version)
+ return true;
+
+ zver = zlibVersion();
+ if (!zver)
+ return false;
+
+ /* Compare the major version number */
+ if (atoi(version) <= atoi(zver))
+ return true;
+
+ return false;
+}
+
+int tracecmd_zlib_init(void)
+{
+ struct tracecmd_compression_proto proto;
+
+ memset(&proto, 0, sizeof(proto));
+ proto.name = __ZLIB_NAME;
+ proto.version = zlibVersion();
+ proto.weight = __ZLIB_WEIGTH;
+ proto.compress = zlib_compress;
+ proto.uncompress = zlib_decompress;
+ proto.is_supported = zlib_is_supported;
+ proto.compress_size = zlib_compress_bound;
+
+ return tracecmd_compress_proto_register(&proto);
+}
diff --git a/lib/trace-cmd/trace-compress-zstd.c b/lib/trace-cmd/trace-compress-zstd.c
new file mode 100644
index 00000000..10ae7a4c
--- /dev/null
+++ b/lib/trace-cmd/trace-compress-zstd.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2022, Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
+ *
+ */
+#include <stdlib.h>
+#include <zstd.h>
+#include <errno.h>
+
+#include "trace-cmd-private.h"
+
+#define __ZSTD_NAME "zstd"
+#define __ZSTD_WEIGTH 5
+
+struct zstd_context {
+ ZSTD_CCtx *ctx_c;
+ ZSTD_DCtx *ctx_d;
+};
+
+static int zstd_compress(void *ctx, const void *in, int in_bytes, void *out, int out_bytes)
+{
+ struct zstd_context *context = ctx;
+ size_t ret;
+
+ if (!ctx)
+ return -1;
+
+ ret = ZSTD_compress2(context->ctx_c, out, out_bytes, in, in_bytes);
+ if (ZSTD_isError(ret))
+ return -1;
+
+ return ret;
+}
+
+static int zstd_decompress(void *ctx, const void *in, int in_bytes, void *out, int out_bytes)
+{
+ struct zstd_context *context = ctx;
+ size_t ret;
+
+ if (!ctx)
+ return -1;
+
+ ret = ZSTD_decompressDCtx(context->ctx_d, out, out_bytes, in, in_bytes);
+ if (ZSTD_isError(ret)) {
+ errno = -EINVAL;
+ return -1;
+ }
+
+ return ret;
+}
+
+static unsigned int zstd_compress_bound(void *ctx, unsigned int in_bytes)
+{
+ return ZSTD_compressBound(in_bytes);
+}
+
+static bool zstd_is_supported(const char *name, const char *version)
+{
+ if (!name)
+ return false;
+ if (strcmp(name, __ZSTD_NAME))
+ return false;
+
+ return true;
+}
+
+static void *new_zstd_context(void)
+{
+ struct zstd_context *context;
+ size_t r;
+
+ context = calloc(1, sizeof(*context));
+ if (!context)
+ return NULL;
+
+ context->ctx_c = ZSTD_createCCtx();
+ context->ctx_d = ZSTD_createDCtx();
+ if (!context->ctx_c || !context->ctx_d)
+ goto err;
+
+ r = ZSTD_CCtx_setParameter(context->ctx_c, ZSTD_c_contentSizeFlag, 0);
+ if (ZSTD_isError(r))
+ goto err;
+
+ return context;
+err:
+ ZSTD_freeCCtx(context->ctx_c);
+ ZSTD_freeDCtx(context->ctx_d);
+ free(context);
+ return NULL;
+}
+static void free_zstd_context(void *ctx)
+{
+ struct zstd_context *context = ctx;
+
+ if (!ctx)
+ return;
+
+ ZSTD_freeCCtx(context->ctx_c);
+ ZSTD_freeDCtx(context->ctx_d);
+ free(context);
+}
+
+int tracecmd_zstd_init(void)
+{
+ struct tracecmd_compression_proto proto;
+
+ memset(&proto, 0, sizeof(proto));
+ proto.name = __ZSTD_NAME;
+ proto.version = ZSTD_versionString();
+ proto.weight = __ZSTD_WEIGTH;
+ proto.compress = zstd_compress;
+ proto.uncompress = zstd_decompress;
+ proto.is_supported = zstd_is_supported;
+ proto.compress_size = zstd_compress_bound;
+ proto.new_context = new_zstd_context;
+ proto.free_context = free_zstd_context;
+
+ return tracecmd_compress_proto_register(&proto);
+}
diff --git a/lib/trace-cmd/trace-compress.c b/lib/trace-cmd/trace-compress.c
new file mode 100644
index 00000000..a63295e6
--- /dev/null
+++ b/lib/trace-cmd/trace-compress.c
@@ -0,0 +1,991 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2021, VMware, Tzvetomir Stoyanov tz.stoyanov@gmail.com>
+ *
+ */
+#include <stdlib.h>
+#include <sys/time.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include "trace-cmd-private.h"
+#include "trace-cmd-local.h"
+
+struct compress_proto {
+ struct compress_proto *next;
+ char *proto_name;
+ char *proto_version;
+ int weight;
+
+ int (*compress_block)(void *ctx, const void *in, int in_bytes, void *out, int out_bytes);
+ int (*uncompress_block)(void *ctx, const void *in, int in_bytes, void *out, int out_bytes);
+ unsigned int (*compress_size)(void *ctx, unsigned int bytes);
+ bool (*is_supported)(const char *name, const char *version);
+ void *(*new_context)(void);
+ void (*free_context)(void *ctx);
+};
+
+static struct compress_proto *proto_list;
+
+struct tracecmd_compression {
+ int fd;
+ unsigned int capacity;
+ unsigned int capacity_read;
+ unsigned long pointer;
+ char *buffer;
+ struct compress_proto *proto;
+ struct tep_handle *tep;
+ struct tracecmd_msg_handle *msg_handle;
+ void *context;
+};
+
+static int read_fd(int fd, char *dst, int len)
+{
+ size_t size = 0;
+ int r;
+
+ do {
+ r = read(fd, dst+size, len);
+ if (r > 0) {
+ size += r;
+ len -= r;
+ } else
+ break;
+ } while (r > 0);
+
+ if (len)
+ return -1;
+ return size;
+}
+
+static long long write_fd(int fd, const void *data, size_t size)
+{
+ long long tot = 0;
+ long long w;
+
+ do {
+ w = write(fd, data + tot, size - tot);
+ tot += w;
+
+ if (!w)
+ break;
+ if (w < 0)
+ return w;
+ } while (tot != size);
+
+ return tot;
+}
+
+static long long do_write(struct tracecmd_compression *handle,
+ const void *data, unsigned long long size)
+{
+ int ret;
+
+ if (handle->msg_handle) {
+ ret = tracecmd_msg_data_send(handle->msg_handle, data, size);
+ if (ret)
+ return -1;
+ return size;
+ }
+ return write_fd(handle->fd, data, size);
+}
+
+static inline int buffer_extend(struct tracecmd_compression *handle, unsigned int size)
+{
+ int extend;
+ char *buf;
+
+ if (size <= handle->capacity)
+ return 0;
+
+ extend = (size / BUFSIZ + 1) * BUFSIZ;
+ buf = realloc(handle->buffer, extend);
+ if (!buf)
+ return -1;
+ handle->buffer = buf;
+ handle->capacity = extend;
+
+ return 0;
+}
+
+/**
+ * tracecmd_compress_lseek - Move the read/write pointer into the compression buffer
+ * @handle: compression handle
+ * @offset: number of bytes to move the pointer, can be negative or positive
+ * @whence: the starting position of the pointer movement,
+ *
+ * Returns the new file pointer on success, or -1 in case of an error.
+ */
+off64_t tracecmd_compress_lseek(struct tracecmd_compression *handle, off64_t offset, int whence)
+{
+ unsigned long p;
+
+ if (!handle || !handle->buffer)
+ return (off64_t)-1;
+
+ switch (whence) {
+ case SEEK_CUR:
+ p = handle->pointer + offset;
+ break;
+ case SEEK_END:
+ p = handle->capacity + offset;
+ break;
+ case SEEK_SET:
+ p = offset;
+ break;
+ default:
+ return (off64_t)-1;
+ }
+
+ if (buffer_extend(handle, p))
+ return (off64_t)-1;
+
+ handle->pointer = p;
+
+ return p;
+}
+
+static int compress_read(struct tracecmd_compression *handle, char *dst, int len)
+{
+
+ if (handle->pointer > handle->capacity_read)
+ return -1;
+
+ if (handle->pointer + len > handle->capacity_read)
+ len = handle->capacity_read - handle->pointer;
+
+ memcpy(dst, handle->buffer + handle->pointer, len);
+
+ return len;
+}
+
+/**
+ * tracecmd_compress_pread - pread() on compression buffer
+ * @handle: compression handle
+ * @dst: return, store the read data
+ * @len: length of data to be read
+ * @offset: offset in the buffer of data to be read
+ *
+ * Read a @len of data from the compression buffer at given @offset,
+ * without updating the buffer pointer.
+ *
+ * On success returns the number of bytes read, or -1 on failure.
+ */
+int tracecmd_compress_pread(struct tracecmd_compression *handle, char *dst, int len, off_t offset)
+{
+ int ret;
+
+ if (!handle || !handle->buffer || offset > handle->capacity_read)
+ return -1;
+
+ ret = tracecmd_compress_lseek(handle, offset, SEEK_SET);
+ if (ret < 0)
+ return ret;
+ return compress_read(handle, dst, len);
+}
+
+/**
+ * tracecmd_compress_buffer_read - read() from compression buffer
+ * @handle: compression handle
+ * @dst: return, store the read data
+ * @len: length of data to be read
+ *
+ * Read a @len of data from the compression buffer
+ *
+ * On success returns the number of bytes read, or -1 on failure.
+ */
+int tracecmd_compress_buffer_read(struct tracecmd_compression *handle, char *dst, int len)
+{
+ int ret;
+
+ if (!handle || !handle->buffer)
+ return -1;
+
+ ret = compress_read(handle, dst, len);
+ if (ret > 0)
+ handle->pointer += ret;
+
+ return ret;
+}
+
+/**
+ * tracecmd_compress_reset - Reset the compression buffer
+ * @handle: compression handle
+ *
+ * Reset the compression buffer, any data currently in the buffer
+ * will be destroyed.
+ *
+ */
+void tracecmd_compress_reset(struct tracecmd_compression *handle)
+{
+ if (!handle)
+ return;
+
+ free(handle->buffer);
+ handle->buffer = NULL;
+ handle->pointer = 0;
+ handle->capacity_read = 0;
+ handle->capacity = 0;
+}
+
+/**
+ * tracecmd_uncompress_block - uncompress a memory block
+ * @handle: compression handle
+ *
+ * Read compressed memory block from the file and uncompress it into
+ * internal buffer. The tracecmd_compress_buffer_read() can be used
+ * to read the uncompressed data from the buffer.
+ *
+ * Returns 0 on success, or -1 in case of an error.
+ */
+int tracecmd_uncompress_block(struct tracecmd_compression *handle)
+{
+ unsigned int s_uncompressed;
+ unsigned int s_compressed;
+ char *bytes = NULL;
+ char buf[4];
+ int size;
+ int ret;
+
+ if (!handle || !handle->proto || !handle->proto->uncompress_block)
+ return -1;
+
+ tracecmd_compress_reset(handle);
+
+ if (read(handle->fd, buf, 4) != 4)
+ return -1;
+
+ s_compressed = tep_read_number(handle->tep, buf, 4);
+ if (read(handle->fd, buf, 4) != 4)
+ return -1;
+
+ s_uncompressed = tep_read_number(handle->tep, buf, 4);
+ size = s_uncompressed > s_compressed ? s_uncompressed : s_compressed;
+
+ handle->buffer = malloc(size);
+ if (!handle->buffer)
+ return -1;
+
+ bytes = malloc(s_compressed);
+ if (!bytes)
+ goto error;
+
+ if (read_fd(handle->fd, bytes, s_compressed) < 0)
+ goto error;
+
+ ret = handle->proto->uncompress_block(handle->context,
+ bytes, s_compressed, handle->buffer, size);
+ if (ret < 0)
+ goto error;
+
+ free(bytes);
+ handle->pointer = 0;
+ handle->capacity_read = ret;
+ handle->capacity = size;
+ return 0;
+error:
+ tracecmd_compress_reset(handle);
+ free(bytes);
+ return -1;
+}
+
+/**
+ * tracecmd_compress_block - compress a memory block
+ * @handle: compression handle
+ *
+ * Compress the content of the internal memory buffer and write
+ * the compressed data in the file. The tracecmd_compress_buffer_write()
+ * can be used to write data into the internal memory buffer,
+ * before calling this API.
+ *
+ * Returns 0 on success, or -1 in case of an error.
+ */
+int tracecmd_compress_block(struct tracecmd_compression *handle)
+{
+ unsigned int size, real_size;
+ char *buf;
+ int endian4;
+ int ret;
+
+ if (!handle || !handle->proto ||
+ !handle->proto->compress_size || !handle->proto->compress_block)
+ return -1;
+
+ size = handle->proto->compress_size(handle->context, handle->pointer);
+
+ buf = malloc(size);
+ if (!buf)
+ return -1;
+
+ real_size = handle->proto->compress_block(handle->context, handle->buffer, handle->pointer, buf, size);
+ if (real_size < 0) {
+ ret = real_size;
+ goto out;
+ }
+
+ /* Write compressed data size */
+ endian4 = tep_read_number(handle->tep, &real_size, 4);
+ ret = do_write(handle, &endian4, 4);
+ if (ret != 4)
+ goto out;
+
+ /* Write uncompressed data size */
+ endian4 = tep_read_number(handle->tep, &handle->pointer, 4);
+ ret = do_write(handle, &endian4, 4);
+ if (ret != 4) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Write compressed data */
+ ret = do_write(handle, buf, real_size);
+ if (ret != real_size) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+ tracecmd_compress_reset(handle);
+out:
+ free(buf);
+ return ret;
+}
+
+/**
+ * tracecmd_compress_buffer_write - write() to compression buffer
+ * @handle: compression handle
+ * @data: data to be written
+ * @size: size of @data
+ *
+ * Write @data of @size in the compression buffer
+ *
+ * Returns 0 on success, or -1 on failure.
+ */
+int tracecmd_compress_buffer_write(struct tracecmd_compression *handle,
+ const void *data, unsigned long long size)
+{
+ if (!handle)
+ return -1;
+
+ if (buffer_extend(handle, handle->pointer + size))
+ return -1;
+
+ memcpy(&handle->buffer[handle->pointer], data, size);
+ handle->pointer += size;
+ if (handle->capacity_read < handle->pointer)
+ handle->capacity_read = handle->pointer;
+
+ return 0;
+}
+
+/**
+ * tracecmd_compress_init - initialize the library with available compression algorithms
+ */
+void tracecmd_compress_init(void)
+{
+ struct timeval time;
+
+ gettimeofday(&time, NULL);
+ srand((time.tv_sec * 1000) + (time.tv_usec / 1000));
+
+#ifdef HAVE_ZLIB
+ tracecmd_zlib_init();
+#endif
+ tracecmd_zstd_init();
+}
+
+static struct compress_proto *compress_proto_select(void)
+{
+ struct compress_proto *proto = proto_list;
+ struct compress_proto *selected = NULL;
+
+ while (proto) {
+ if (!selected || selected->weight > proto->weight)
+ selected = proto;
+ proto = proto->next;
+ }
+
+ return selected;
+}
+
+/**
+ * tracecmd_compress_alloc - Allocate a new compression context
+ * @name: name of the compression algorithm.
+ * If NULL - auto select the best available algorithm
+ * @version: version of the compression algorithm, can be NULL
+ * @fd: file descriptor for reading / writing data
+ * @tep: tep handle, used to encode the data
+ * @msg_handle: message handle, use it for reading / writing data instead of @fd
+ *
+ * Returns NULL on failure or pointer to allocated compression context.
+ * The returned context must be freed by tracecmd_compress_destroy()
+ */
+struct tracecmd_compression *tracecmd_compress_alloc(const char *name, const char *version,
+ int fd, struct tep_handle *tep,
+ struct tracecmd_msg_handle *msg_handle)
+{
+ struct tracecmd_compression *new;
+ struct compress_proto *proto;
+
+ if (name) {
+ proto = proto_list;
+ while (proto) {
+ if (proto->is_supported && proto->is_supported(name, version))
+ break;
+ proto = proto->next;
+ }
+ } else {
+ proto = compress_proto_select();
+ }
+ if (!proto)
+ return NULL;
+
+ new = calloc(1, sizeof(*new));
+ if (!new)
+ return NULL;
+
+ new->fd = fd;
+ new->tep = tep;
+ new->msg_handle = msg_handle;
+ new->proto = proto;
+ if (proto->new_context)
+ new->context = proto->new_context();
+
+ return new;
+}
+
+/**
+ * tracecmd_compress_destroy - Free a compression context
+ * @handle: handle to the compression context that will be freed
+ */
+void tracecmd_compress_destroy(struct tracecmd_compression *handle)
+{
+ if (!handle)
+ return;
+
+ tracecmd_compress_reset(handle);
+
+ if (handle->proto && handle->proto->free_context)
+ handle->proto->free_context(handle->context);
+
+ free(handle);
+}
+
+/**
+ * tracecmd_compress_is_supported - check if compression algorithm is supported
+ * @name: name of the compression algorithm.
+ * @version: version of the compression algorithm.
+ *
+ * Checks if compression algorithm with given name and version is supported.
+ * Returns true if the algorithm is supported or false if it is not.
+ */
+bool tracecmd_compress_is_supported(const char *name, const char *version)
+{
+ struct compress_proto *proto = proto_list;
+
+ if (!name)
+ return NULL;
+
+ while (proto) {
+ if (proto->is_supported && proto->is_supported(name, version))
+ return true;
+ proto = proto->next;
+ }
+ return false;
+}
+
+/**
+ * tracecmd_compress_proto_get_name - get name and version of compression algorithm
+ * @compress: compression handle.
+ * @name: return, name of the compression algorithm.
+ * @version: return, version of the compression algorithm.
+ *
+ * Returns 0 on success, or -1 in case of an error. If 0 is returned, the name
+ * and version of the algorithm are stored in @name and @version. The returned
+ * strings must *not* be freed.
+ */
+int tracecmd_compress_proto_get_name(struct tracecmd_compression *compress,
+ const char **name, const char **version)
+{
+ if (!compress || !compress->proto)
+ return -1;
+
+ if (name)
+ *name = compress->proto->proto_name;
+ if (version)
+ *version = compress->proto->proto_version;
+
+ return 0;
+}
+
+/**
+ * tracecmd_compress_proto_register - register a new compression algorithm
+ * @name: name of the compression algorithm.
+ * @version: version of the compression algorithm.
+ * @weight: weight of the compression algorithm, lower is better.
+ * @compress: compression hook, called to compress a memory block.
+ * @uncompress: uncompression hook, called to uncompress a memory block.
+ * @compress_size: hook, called to get the required minimum size of the buffer
+ * for compression given number of bytes.
+ * @is_supported: check hook, called to check if compression with given name and
+ * version is supported by this plugin.
+ *
+ * Returns 0 on success, or -1 in case of an error. If algorithm with given name
+ * and version is already registered, -1 is returned.
+ */
+int tracecmd_compress_proto_register(struct tracecmd_compression_proto *proto)
+{
+ struct compress_proto *new;
+
+ if (!proto || !proto->name || !proto->compress || !proto->uncompress)
+ return -1;
+
+ if (tracecmd_compress_is_supported(proto->name, proto->version))
+ return -1;
+
+ new = calloc(1, sizeof(*new));
+ if (!new)
+ return -1;
+
+ new->proto_name = strdup(proto->name);
+ if (!new->proto_name)
+ goto error;
+
+ new->proto_version = strdup(proto->version);
+ if (!new->proto_version)
+ goto error;
+
+ new->compress_block = proto->compress;
+ new->uncompress_block = proto->uncompress;
+ new->compress_size = proto->compress_size;
+ new->is_supported = proto->is_supported;
+ new->weight = proto->weight;
+ new->next = proto_list;
+ new->new_context = proto->new_context;
+ new->free_context = proto->free_context;
+ proto_list = new;
+ return 0;
+
+error:
+ free(new->proto_name);
+ free(new->proto_version);
+ free(new);
+ return -1;
+}
+
+/**
+ * tracecmd_compress_free - free the library resources, related to available compression algorithms
+ *
+ */
+void tracecmd_compress_free(void)
+{
+ struct compress_proto *proto = proto_list;
+ struct compress_proto *del;
+
+ while (proto) {
+ del = proto;
+ proto = proto->next;
+ free(del->proto_name);
+ free(del->proto_version);
+ free(del);
+ }
+ proto_list = NULL;
+}
+
+/**
+ * tracecmd_compress_protos_get - get a list of all supported compression algorithms and versions
+ * @names: return, array with names of all supported compression algorithms
+ * @versions: return, array with versions of all supported compression algorithms
+ *
+ * On success, the size of @names and @versions arrays is returned.
+ * Those arrays are allocated by the API and must be freed with free() by the
+ * caller. Both arrays are with same size, each name from @names corresponds to
+ * a version from @versions. The last element in both arrays is a NULL pointer.
+ * On error -1 is returned and @names and @versions arrays are not allocated.
+ */
+int tracecmd_compress_protos_get(char ***names, char ***versions)
+{
+ struct compress_proto *proto = proto_list;
+ char **n = NULL;
+ char **v = NULL;
+ int c, i;
+
+ for (c = 0; proto; proto = proto->next)
+ c++;
+
+ if (c < 1)
+ return c;
+
+ n = calloc(c + 1, sizeof(char *));
+ if (!n)
+ goto error;
+ v = calloc(c + 1, sizeof(char *));
+ if (!v)
+ goto error;
+
+ proto = proto_list;
+ for (i = 0; i < c && proto; i++) {
+ n[i] = proto->proto_name;
+ v[i] = proto->proto_version;
+ proto = proto->next;
+ }
+
+ n[i] = NULL;
+ v[i] = NULL;
+ *names = n;
+ *versions = v;
+ return c;
+
+error:
+ free(n);
+ free(v);
+ return -1;
+}
+
+/**
+ * tracecmd_compress_copy_from - Copy and compress data from a file
+ * @handle: compression handle
+ * @fd: file descriptor to uncompressed data to copy from
+ * @chunk_size: size of one compression chunk
+ * @read_size: Pointer to max bytes to read from. The pointer is updated
+ * with the actual size of compressed data read. If 0 is passed,
+ * read until the EOF is reached.
+ * @write_size: return, size of the compressed data written into @handle
+ *
+ * This function reads uncompressed data from given @fd, compresses the data
+ * using the @handle compression context and writes the compressed data into the
+ * fd associated with the @handle. The data is compressed on chunks with given
+ * @chunk_size size. The compressed data is written in the format:
+ * - 4 bytes, chunks count
+ * - for each chunk:
+ * - 4 bytes, size of compressed data in this chunk
+ * - 4 bytes, uncompressed size of the data in this chunk
+ * - data, bytes of <size of compressed data in this chunk>
+ *
+ * On success 0 is returned, @read_size and @write_size are updated with the size of
+ * read and written data.
+ */
+int tracecmd_compress_copy_from(struct tracecmd_compression *handle, int fd, int chunk_size,
+ unsigned long long *read_size, unsigned long long *write_size)
+{
+ unsigned int rchunk = 0;
+ unsigned int chunks = 0;
+ unsigned int wsize = 0;
+ unsigned int rsize = 0;
+ unsigned int rmax = 0;
+ unsigned int csize;
+ unsigned int size;
+ unsigned int all;
+ unsigned int r;
+ off64_t offset;
+ char *buf_from;
+ char *buf_to;
+ int endian4;
+ int ret;
+
+ if (!handle || !handle->proto ||
+ !handle->proto->compress_block || !handle->proto->compress_size)
+ return 0;
+
+ if (read_size)
+ rmax = *read_size;
+ csize = handle->proto->compress_size(handle->context, chunk_size);
+ buf_from = malloc(chunk_size);
+ if (!buf_from)
+ return -1;
+
+ buf_to = malloc(csize);
+ if (!buf_to)
+ return -1;
+
+ /* save the initial offset and write 0 as initial chunk count */
+ offset = lseek64(handle->fd, 0, SEEK_CUR);
+ write_fd(handle->fd, &chunks, 4);
+
+ do {
+ all = 0;
+ if (rmax > 0 && (rmax - rsize) < chunk_size)
+ rchunk = (rmax - rsize);
+ else
+ rchunk = chunk_size;
+
+ do {
+ r = read(fd, buf_from + all, rchunk - all);
+ all += r;
+
+ if (r <= 0)
+ break;
+ } while (all != rchunk);
+
+
+ if (r < 0 || (rmax > 0 && rsize >= rmax))
+ break;
+ rsize += all;
+ size = csize;
+ if (all > 0) {
+ ret = handle->proto->compress_block(handle->context,
+ buf_from, all, buf_to, size);
+ if (ret < 0) {
+ if (errno == EINTR)
+ continue;
+ break;
+ }
+ size = ret;
+ /* Write compressed data size */
+ endian4 = tep_read_number(handle->tep, &size, 4);
+ ret = write_fd(handle->fd, &endian4, 4);
+ if (ret != 4)
+ break;
+
+ /* Write uncompressed data size */
+ endian4 = tep_read_number(handle->tep, &all, 4);
+ ret = write_fd(handle->fd, &endian4, 4);
+ if (ret != 4)
+ break;
+
+ /* Write the compressed data */
+ ret = write_fd(handle->fd, buf_to, size);
+ if (ret != size)
+ break;
+ /* data + compress header */
+ wsize += (size + 8);
+ chunks++;
+ }
+ } while (all > 0);
+
+ free(buf_from);
+ free(buf_to);
+
+ if (all)
+ return -1;
+
+ if (lseek64(handle->fd, offset, SEEK_SET) == (off_t)-1)
+ return -1;
+
+ endian4 = tep_read_number(handle->tep, &chunks, 4);
+ /* write chunks count*/
+ write_fd(handle->fd, &chunks, 4);
+ if (lseek64(handle->fd, 0, SEEK_END) == (off_t)-1)
+ return -1;
+
+ if (read_size)
+ *read_size = rsize;
+ if (write_size)
+ *write_size = wsize;
+ return 0;
+}
+
+/**
+ * tracecmd_load_chunks_info - Read compression chunks information from the file
+ * @handle: compression handle
+ * @chunks_info: return, array with compression chunks information
+ *
+ * This function reads information of all compression chunks in the current
+ * compression block from the file and fills that information in a newly
+ * allocated array @chunks_info which is returned.
+ *
+ * On success count of compression chunks is returned. Array of that count is
+ * allocated and returned in @chunks_info. Each entry describes one compression
+ * chunk. On error -1 is returned. In case of success, @chunks_info must be
+ * freed by free().
+ */
+int tracecmd_load_chunks_info(struct tracecmd_compression *handle,
+ struct tracecmd_compress_chunk **chunks_info)
+{
+ struct tracecmd_compress_chunk *chunks = NULL;
+ unsigned long long size = 0;
+ unsigned int count = 0;
+ off64_t offset;
+ int ret = -1;
+ char buf[4];
+ int i;
+
+ if (!handle)
+ return -1;
+
+ offset = lseek64(handle->fd, 0, SEEK_CUR);
+ if (offset == (off64_t)-1)
+ return -1;
+
+ if (read(handle->fd, buf, 4) != 4)
+ return -1;
+
+ count = tep_read_number(handle->tep, buf, 4);
+ if (!count) {
+ ret = 0;
+ goto out;
+ }
+
+ chunks = calloc(count, sizeof(struct tracecmd_compress_chunk));
+ if (!chunks)
+ goto out;
+
+ for (i = 0; i < count; i++) {
+ chunks[i].zoffset = lseek64(handle->fd, 0, SEEK_CUR);
+ if (chunks[i].zoffset == (off_t)-1)
+ goto out;
+ if (read(handle->fd, buf, 4) != 4)
+ goto out;
+ chunks[i].zsize = tep_read_number(handle->tep, buf, 4);
+ chunks[i].offset = size;
+ if (read(handle->fd, buf, 4) != 4)
+ goto out;
+ chunks[i].size = tep_read_number(handle->tep, buf, 4);
+ size += chunks[i].size;
+ if (lseek64(handle->fd, chunks[i].zsize, SEEK_CUR) == (off64_t)-1)
+ goto out;
+ }
+
+ ret = count;
+out:
+ if (lseek64(handle->fd, offset, SEEK_SET) == (off64_t)-1)
+ ret = -1;
+
+ if (ret > 0 && chunks_info)
+ *chunks_info = chunks;
+ else
+ free(chunks);
+
+ return ret;
+}
+
+/**
+ * tracecmd_uncompress_chunk - Uncompress given compression chunk.
+ * @handle: compression handle
+ * @chunk: chunk, that will be uncompressed in @data
+ * @data: Preallocated memory for uncompressed data. Must have enough space
+ * to hold the uncompressed data.
+ *
+ * This function uncompresses the chunk described by @chunk and stores
+ * the uncompressed data in the preallocated memory @data.
+ *
+ * On success 0 is returned and the uncompressed data is stored in @data.
+ * On error -1 is returned.
+ */
+int tracecmd_uncompress_chunk(struct tracecmd_compression *handle,
+ struct tracecmd_compress_chunk *chunk, char *data)
+{
+ char *bytes_in = NULL;
+ int ret = -1;
+
+ if (!handle || !handle->proto || !handle->proto->uncompress_block || !chunk || !data)
+ return -1;
+
+ if (lseek64(handle->fd, chunk->zoffset + 8, SEEK_SET) == (off_t)-1)
+ return -1;
+
+ bytes_in = malloc(chunk->zsize);
+ if (!bytes_in)
+ return -1;
+
+ if (read_fd(handle->fd, bytes_in, chunk->zsize) < 0)
+ goto out;
+
+ if (handle->proto->uncompress_block(handle->context,
+ bytes_in, chunk->zsize, data, chunk->size) < 0)
+ goto out;
+
+ ret = 0;
+out:
+ free(bytes_in);
+ return ret;
+}
+
+/**
+ * tracecmd_uncompress_copy_to - Uncompress data and copy to a file
+ * @handle: compression handle
+ * @fd: file descriptor to uncompressed data to copy into
+ * @read_size: return, size of the compressed data read from @handle
+ * @write_size: return, size of the uncompressed data written into @fd
+ *
+ * This function reads compressed data from the fd, associated with @handle,
+ * uncompresses it using the @handle compression context and writes
+ * the uncompressed data into the fd. The compressed data must be in the format:
+ * - 4 bytes, chunks count
+ * - for each chunk:
+ * - 4 bytes, size of compressed data in this chunk
+ * - 4 bytes, uncompressed size of the data in this chunk
+ * - data, bytes of <size of compressed data in this chunk>
+ *
+ * On success 0 is returned, @read_size and @write_size are updated with
+ * the size of read and written data.
+ */
+int tracecmd_uncompress_copy_to(struct tracecmd_compression *handle, int fd,
+ unsigned long long *read_size, unsigned long long *write_size)
+{
+ unsigned int s_uncompressed;
+ unsigned int s_compressed;
+ unsigned int rsize = 0;
+ unsigned int wsize = 0;
+ char *bytes_out = NULL;
+ char *bytes_in = NULL;
+ int size_out = 0;
+ int size_in = 0;
+ int chunks;
+ char buf[4];
+ char *tmp;
+ int ret;
+
+ if (!handle || !handle->proto || !handle->proto->uncompress_block)
+ return -1;
+
+ if (read(handle->fd, buf, 4) != 4)
+ return -1;
+
+ chunks = tep_read_number(handle->tep, buf, 4);
+ rsize += 4;
+
+ while (chunks) {
+ if (read(handle->fd, buf, 4) != 4)
+ break;
+
+ s_compressed = tep_read_number(handle->tep, buf, 4);
+ rsize += 4;
+ if (read(handle->fd, buf, 4) != 4)
+ break;
+
+ s_uncompressed = tep_read_number(handle->tep, buf, 4);
+ rsize += 4;
+ if (!bytes_in || size_in < s_compressed) {
+ tmp = realloc(bytes_in, s_compressed);
+ if (!tmp)
+ break;
+
+ bytes_in = tmp;
+ size_in = s_compressed;
+ }
+
+ if (!bytes_out || size_out < s_uncompressed) {
+ tmp = realloc(bytes_out, s_uncompressed);
+ if (!tmp)
+ break;
+ bytes_out = tmp;
+ size_out = s_uncompressed;
+ }
+
+ if (read_fd(handle->fd, bytes_in, s_compressed) < 0)
+ break;
+
+ rsize += s_compressed;
+ ret = handle->proto->uncompress_block(handle->context, bytes_in, s_compressed,
+ bytes_out, s_uncompressed);
+ if (ret < 0)
+ break;
+
+ write_fd(fd, bytes_out, ret);
+ wsize += ret;
+ chunks--;
+ }
+ free(bytes_in);
+ free(bytes_out);
+ if (chunks)
+ return -1;
+
+ if (read_size)
+ *read_size = rsize;
+ if (write_size)
+ *write_size = wsize;
+
+ return 0;
+}
diff --git a/lib/trace-cmd/trace-filter-hash.c b/lib/trace-cmd/trace-filter-hash.c
new file mode 100644
index 00000000..f5f0fb09
--- /dev/null
+++ b/lib/trace-cmd/trace-filter-hash.c
@@ -0,0 +1,211 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2009, Steven Rostedt <srostedt@redhat.com>
+ * Copyright (C) 2018 VMware Inc, Steven Rostedt <rostedt@goodmis.org>
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <assert.h>
+
+#include "trace-filter-hash.h"
+
+#define FILTER_HASH_BITS 8
+#define FILTER_HASH_SIZE (1 << FILTER_HASH_BITS)
+
+struct tracecmd_filter_id_item *
+tracecmd_filter_id_find(struct tracecmd_filter_id *hash, int id)
+{
+ int key = tracecmd_quick_hash(id, FILTER_HASH_BITS);
+ struct tracecmd_filter_id_item *item = hash->hash[key];
+
+ while (item) {
+ if (item->id == id)
+ break;
+ item = item->next;
+ }
+
+ return item;
+}
+
+void tracecmd_filter_id_add(struct tracecmd_filter_id *hash, int id)
+{
+ int key = tracecmd_quick_hash(id, FILTER_HASH_BITS);
+ struct tracecmd_filter_id_item *item;
+
+ item = calloc(1, sizeof(*item));
+ assert(item);
+
+ item->id = id;
+ item->next = hash->hash[key];
+ hash->hash[key] = item;
+
+ hash->count++;
+}
+
+void tracecmd_filter_id_remove(struct tracecmd_filter_id *hash, int id)
+{
+ int key = tracecmd_quick_hash(id, FILTER_HASH_BITS);
+ struct tracecmd_filter_id_item **next = &hash->hash[key];
+ struct tracecmd_filter_id_item *item;
+
+ while (*next) {
+ if ((*next)->id == id)
+ break;
+ next = &(*next)->next;
+ }
+
+ if (!*next)
+ return;
+
+ assert(hash->count);
+ hash->count--;
+
+ item = *next;
+
+ *next = item->next;
+
+ free(item);
+}
+
+void tracecmd_filter_id_clear(struct tracecmd_filter_id *hash)
+{
+ struct tracecmd_filter_id_item *item, *next;
+ int i;
+
+ for (i = 0; i < FILTER_HASH_SIZE; i++) {
+ next = hash->hash[i];
+ if (!next)
+ continue;
+
+ hash->hash[i] = NULL;
+ while (next) {
+ item = next;
+ next = item->next;
+ free(item);
+ }
+ }
+
+ hash->count = 0;
+}
+
+struct tracecmd_filter_id *tracecmd_filter_id_hash_alloc(void)
+{
+ struct tracecmd_filter_id *hash;
+
+ hash = calloc(1, sizeof(*hash));
+ assert(hash);
+ hash->hash = calloc(FILTER_HASH_SIZE, sizeof(*hash->hash));
+ hash->count = 0;
+
+ return hash;
+}
+
+void tracecmd_filter_id_hash_free(struct tracecmd_filter_id *hash)
+{
+ if (!hash)
+ return;
+
+ tracecmd_filter_id_clear(hash);
+ free(hash->hash);
+ free(hash);
+}
+
+struct tracecmd_filter_id *
+tracecmd_filter_id_hash_copy(struct tracecmd_filter_id *hash)
+{
+ struct tracecmd_filter_id *new_hash;
+ struct tracecmd_filter_id_item *item, **pitem;
+ int i;
+
+ if (!hash)
+ return NULL;
+
+ new_hash = tracecmd_filter_id_hash_alloc();
+ assert(new_hash);
+
+ for (i = 0; i < FILTER_HASH_SIZE; i++) {
+ item = hash->hash[i];
+ if (!item)
+ continue;
+
+ pitem = &new_hash->hash[i];
+
+ while (item) {
+ *pitem = calloc(1, sizeof(*item));
+ assert(*pitem);
+ **pitem = *item;
+
+ pitem = &(*pitem)->next;
+ item = item->next;
+ }
+ }
+
+ new_hash->count = hash->count;
+ return new_hash;
+}
+
+int *tracecmd_filter_ids(struct tracecmd_filter_id *hash)
+{
+ struct tracecmd_filter_id_item *item;
+ int *ids;
+ int count = 0;
+ int i;
+
+ if (!hash->count)
+ return NULL;
+
+ ids = malloc(sizeof(*ids) * (hash->count + 1));
+ if (!ids)
+ return NULL;
+
+ for (i = 0; i < FILTER_HASH_SIZE; i++) {
+ item = hash->hash[i];
+ while (item) {
+ ids[count++] = item->id;
+ item = item->next;
+ }
+ }
+
+ ids[count] = -1;
+ return ids;
+}
+
+/**
+ * filter_id_compare - compare two id hashes to see if they are equal
+ * @hash1: one hash to compare
+ * @hash2: another hash to compare to @hash1
+ *
+ * Returns 1 if the two hashes are the same, 0 otherwise.
+ */
+int tracecmd_filter_id_compare(struct tracecmd_filter_id *hash1,
+ struct tracecmd_filter_id *hash2)
+{
+ int *ids;
+ int ret = 0;
+ int i;
+
+ /* If counts don't match, then they obviously are not the same */
+ if (hash1->count != hash2->count)
+ return 0;
+
+ /* If both hashes are empty, they are the same */
+ if (!hash1->count && !hash2->count)
+ return 1;
+
+ /* Now compare the pids of one hash with the other */
+ ids = tracecmd_filter_ids(hash1);
+ for (i = 0; ids[i] >= 0; i++) {
+ if (!tracecmd_filter_id_find(hash2, ids[i]))
+ break;
+ }
+
+ if (ids[i] == -1)
+ ret = 1;
+
+ free(ids);
+
+ return ret;
+}
diff --git a/lib/trace-cmd/trace-ftrace.c b/lib/trace-cmd/trace-ftrace.c
new file mode 100644
index 00000000..f74f7c2e
--- /dev/null
+++ b/lib/trace-cmd/trace-ftrace.c
@@ -0,0 +1,397 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/param.h>
+
+#include "trace-cmd-private.h"
+
+struct tep_plugin_option trace_ftrace_options[] = {
+ {
+ .name = "tailprint",
+ .plugin_alias = "fgraph",
+ .description =
+ "Print function name at function exit in function graph",
+ },
+ {
+ .name = "depth",
+ .plugin_alias = "fgraph",
+ .description =
+ "Show the depth of each entry",
+ },
+ {
+ .name = NULL,
+ }
+};
+
+static struct tep_plugin_option *fgraph_tail = &trace_ftrace_options[0];
+static struct tep_plugin_option *fgraph_depth = &trace_ftrace_options[1];
+
+static int find_ret_event(struct tracecmd_ftrace *finfo, struct tep_handle *pevent)
+{
+ struct tep_event *event;
+
+ /* Store the func ret id and event for later use */
+ event = tep_find_event_by_name(pevent, "ftrace", "funcgraph_exit");
+ if (!event)
+ return -1;
+
+ finfo->fgraph_ret_id = event->id;
+ finfo->fgraph_ret_event = event;
+ return 0;
+}
+
+#define ret_event_check(finfo, pevent) \
+ do { \
+ if (!finfo->fgraph_ret_event && find_ret_event(finfo, pevent) < 0) \
+ return -1; \
+ } while (0)
+
+static int function_handler(struct trace_seq *s, struct tep_record *record,
+ struct tep_event *event, void *context)
+{
+ struct tep_handle *pevent = event->tep;
+ unsigned long long function;
+ const char *func;
+
+ if (tep_get_field_val(s, event, "ip", record, &function, 1))
+ return trace_seq_putc(s, '!');
+
+ func = tep_find_function(pevent, function);
+ if (func)
+ trace_seq_printf(s, "%s <-- ", func);
+ else
+ trace_seq_printf(s, "0x%llx", function);
+
+ if (tep_get_field_val(s, event, "parent_ip", record, &function, 1))
+ return trace_seq_putc(s, '!');
+
+ func = tep_find_function(pevent, function);
+ if (func)
+ trace_seq_printf(s, "%s", func);
+ else
+ trace_seq_printf(s, "0x%llx", function);
+
+ return 0;
+}
+
+#define TRACE_GRAPH_INDENT 2
+
+static struct tep_record *
+get_return_for_leaf(struct trace_seq *s, int cpu, int cur_pid,
+ unsigned long long cur_func, struct tep_record *next,
+ struct tracecmd_ftrace *finfo)
+{
+ unsigned long long val;
+ unsigned long long type;
+ unsigned long long pid;
+
+ /* Searching a common field, can use any event */
+ if (tep_get_common_field_val(s, finfo->fgraph_ret_event, "common_type", next, &type, 1))
+ return NULL;
+
+ if (type != finfo->fgraph_ret_id)
+ return NULL;
+
+ if (tep_get_common_field_val(s, finfo->fgraph_ret_event, "common_pid", next, &pid, 1))
+ return NULL;
+
+ if (cur_pid != pid)
+ return NULL;
+
+ /* We aleady know this is a funcgraph_ret_event */
+ if (tep_get_field_val(s, finfo->fgraph_ret_event, "func", next, &val, 1))
+ return NULL;
+
+ if (cur_func != val)
+ return NULL;
+
+ /* this is a leaf, now advance the iterator */
+ return tracecmd_read_data(tracecmd_curr_thread_handle, cpu);
+}
+
+/* Signal a overhead of time execution to the output */
+static void print_graph_overhead(struct trace_seq *s,
+ unsigned long long duration)
+{
+ /* Non nested entry or return */
+ if (duration == ~0ULL)
+ return (void)trace_seq_printf(s, " ");
+
+ /* Duration exceeded 1 sec */
+ if (duration > 1000000000ULL)
+ return (void)trace_seq_printf(s, "$ ");
+
+ /* Duration exceeded 1000 usecs */
+ if (duration > 1000000ULL)
+ return (void)trace_seq_printf(s, "# ");
+
+ /* Duration exceeded 100 usecs */
+ if (duration > 100000ULL)
+ return (void)trace_seq_printf(s, "! ");
+
+ /* Duration exceeded 10 usecs */
+ if (duration > 10000ULL)
+ return (void)trace_seq_printf(s, "+ ");
+
+ trace_seq_printf(s, " ");
+}
+
+static void print_graph_duration(struct trace_seq *s, unsigned long long duration)
+{
+ unsigned long usecs = duration / 1000;
+ unsigned long nsecs_rem = duration % 1000;
+ /* log10(ULONG_MAX) + '\0' */
+ char msecs_str[21];
+ char nsecs_str[5];
+ int len;
+ int i;
+
+ sprintf(msecs_str, "%lu", usecs);
+
+ /* Print msecs */
+ len = s->len;
+ trace_seq_printf(s, "%lu", usecs);
+
+ /* Print nsecs (we don't want to exceed 7 numbers) */
+ if ((s->len - len) < 7) {
+ snprintf(nsecs_str, MIN(sizeof(nsecs_str), 8 - len), "%03lu", nsecs_rem);
+ trace_seq_printf(s, ".%s", nsecs_str);
+ }
+
+ len = s->len - len;
+
+ trace_seq_puts(s, " us ");
+
+ /* Print remaining spaces to fit the row's width */
+ for (i = len; i < 7; i++)
+ trace_seq_putc(s, ' ');
+
+ trace_seq_puts(s, "| ");
+}
+
+static int
+print_graph_entry_leaf(struct trace_seq *s,
+ struct tep_event *event,
+ struct tep_record *record,
+ struct tep_record *ret_rec,
+ struct tracecmd_ftrace *finfo)
+{
+ struct tep_handle *pevent = event->tep;
+ unsigned long long rettime, calltime;
+ unsigned long long duration, depth;
+ unsigned long long val;
+ const char *func;
+ int ret;
+ int i;
+
+ if (tep_get_field_val(s, finfo->fgraph_ret_event, "rettime", ret_rec, &rettime, 1))
+ return trace_seq_putc(s, '!');
+
+ if (tep_get_field_val(s, finfo->fgraph_ret_event, "calltime", ret_rec, &calltime, 1))
+ return trace_seq_putc(s, '!');
+
+ duration = rettime - calltime;
+
+ /* Overhead */
+ print_graph_overhead(s, duration);
+
+ /* Duration */
+ print_graph_duration(s, duration);
+
+ if (tep_get_field_val(s, event, "depth", record, &depth, 1))
+ return trace_seq_putc(s, '!');
+
+ /* Function */
+ for (i = 0; i < (int)(depth * TRACE_GRAPH_INDENT); i++)
+ trace_seq_putc(s, ' ');
+
+ if (tep_get_field_val(s, event, "func", record, &val, 1))
+ return trace_seq_putc(s, '!');
+ func = tep_find_function(pevent, val);
+
+ if (func)
+ ret = trace_seq_printf(s, "%s();", func);
+ else
+ ret = trace_seq_printf(s, "%llx();", val);
+
+ if (ret && fgraph_depth->set)
+ ret = trace_seq_printf(s, " (%lld)", depth);
+
+ return ret;
+}
+
+static int print_graph_nested(struct trace_seq *s,
+ struct tep_event *event,
+ struct tep_record *record)
+{
+ struct tep_handle *pevent = event->tep;
+ unsigned long long depth;
+ unsigned long long val;
+ const char *func;
+ int ret;
+ int i;
+
+ /* No overhead */
+ print_graph_overhead(s, -1);
+
+ /* No time */
+ trace_seq_puts(s, " | ");
+
+ if (tep_get_field_val(s, event, "depth", record, &depth, 1))
+ return trace_seq_putc(s, '!');
+
+ /* Function */
+ for (i = 0; i < (int)(depth * TRACE_GRAPH_INDENT); i++)
+ trace_seq_putc(s, ' ');
+
+ if (tep_get_field_val(s, event, "func", record, &val, 1))
+ return trace_seq_putc(s, '!');
+
+ func = tep_find_function(pevent, val);
+
+ if (func)
+ ret = trace_seq_printf(s, "%s() {", func);
+ else
+ ret = trace_seq_printf(s, "%llx() {", val);
+
+ if (ret && fgraph_depth->set)
+ ret = trace_seq_printf(s, " (%lld)", depth);
+
+ return ret;
+}
+
+static int
+fgraph_ent_handler(struct trace_seq *s, struct tep_record *record,
+ struct tep_event *event, void *context)
+{
+ struct tracecmd_ftrace *finfo = context;
+ struct tep_record *rec;
+ unsigned long long val, pid;
+ int cpu;
+
+ ret_event_check(finfo, event->tep);
+
+ if (tep_get_common_field_val(s, event, "common_pid", record, &pid, 1))
+ return trace_seq_putc(s, '!');
+
+ if (tep_get_field_val(s, event, "func", record, &val, 1))
+ return trace_seq_putc(s, '!');
+
+ rec = tracecmd_peek_next_data(tracecmd_curr_thread_handle, &cpu);
+ if (rec)
+ rec = get_return_for_leaf(s, cpu, pid, val, rec, finfo);
+
+ if (rec) {
+ /*
+ * If this is a leaf function, then get_return_for_leaf
+ * returns the return of the function
+ */
+ print_graph_entry_leaf(s, event, record, rec, finfo);
+ tracecmd_free_record(rec);
+ } else
+ print_graph_nested(s, event, record);
+
+ return 0;
+}
+
+static int
+fgraph_ret_handler(struct trace_seq *s, struct tep_record *record,
+ struct tep_event *event, void *context)
+{
+ struct tracecmd_ftrace *finfo = context;
+ unsigned long long rettime, calltime;
+ unsigned long long duration, depth;
+ unsigned long long val;
+ const char *func;
+ int i;
+
+ ret_event_check(finfo, event->tep);
+
+ if (tep_get_field_val(s, event, "rettime", record, &rettime, 1))
+ return trace_seq_putc(s, '!');
+
+ if (tep_get_field_val(s, event, "calltime", record, &calltime, 1))
+ return trace_seq_putc(s, '!');
+
+ duration = rettime - calltime;
+
+ /* Overhead */
+ print_graph_overhead(s, duration);
+
+ /* Duration */
+ print_graph_duration(s, duration);
+
+ if (tep_get_field_val(s, event, "depth", record, &depth, 1))
+ return trace_seq_putc(s, '!');
+
+ /* Function */
+ for (i = 0; i < (int)(depth * TRACE_GRAPH_INDENT); i++)
+ trace_seq_putc(s, ' ');
+
+ trace_seq_putc(s, '}');
+
+ if (fgraph_tail->set) {
+ if (tep_get_field_val(s, event, "func", record, &val, 0))
+ return 0;
+ func = tep_find_function(event->tep, val);
+ if (!func)
+ return 0;
+ trace_seq_printf(s, " /* %s */", func);
+ }
+
+ if (fgraph_depth->set)
+ trace_seq_printf(s, " (%lld)", depth);
+
+ return 0;
+}
+
+/**
+ * tracecmd_ftrace_load_options - load the ftrace options
+ *
+ * This routine is used for trace-cmd list, to load the builtin
+ * ftrace options in order to list them. As the list command does
+ * not load a trace.dat file where this would normally be loaded.
+ */
+void tracecmd_ftrace_load_options(void)
+{
+ tep_plugin_add_options("ftrace", trace_ftrace_options);
+}
+
+int tracecmd_ftrace_overrides(struct tracecmd_input *handle,
+ struct tracecmd_ftrace *finfo)
+{
+ struct tep_handle *pevent;
+ struct tep_event *event;
+
+ finfo->handle = handle;
+
+ pevent = tracecmd_get_tep(handle);
+
+ tep_register_event_handler(pevent, -1, "ftrace", "function",
+ function_handler, NULL);
+
+ tep_register_event_handler(pevent, -1, "ftrace", "funcgraph_entry",
+ fgraph_ent_handler, finfo);
+
+ tep_register_event_handler(pevent, -1, "ftrace", "funcgraph_exit",
+ fgraph_ret_handler, finfo);
+
+ tep_plugin_add_options("ftrace", trace_ftrace_options);
+
+ /* Store the func ret id and event for later use */
+ event = tep_find_event_by_name(pevent, "ftrace", "funcgraph_exit");
+ if (!event)
+ return 0;
+
+ finfo->long_size = tracecmd_long_size(handle);
+
+ finfo->fgraph_ret_id = event->id;
+ finfo->fgraph_ret_event = event;
+
+ return 0;
+}
diff --git a/lib/trace-cmd/trace-hash.c b/lib/trace-cmd/trace-hash.c
new file mode 100644
index 00000000..bed97323
--- /dev/null
+++ b/lib/trace-cmd/trace-hash.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2014, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <errno.h>
+
+#include "trace-cmd-private.h"
+#include "trace-hash.h"
+
+int __hidden trace_hash_init(struct trace_hash *hash, int buckets)
+{
+ memset(hash, 0, sizeof(*hash));
+
+ hash->buckets = calloc(sizeof(*hash->buckets), buckets);
+ if (!hash->buckets)
+ return -ENOMEM;
+ hash->nr_buckets = buckets;
+
+ /* If a power of two then we can shortcut */
+ if (!(buckets & (buckets - 1)))
+ hash->power = buckets - 1;
+
+ return 0;
+}
+
+void __hidden trace_hash_free(struct trace_hash *hash)
+{
+ free(hash->buckets);
+}
+
+int __hidden trace_hash_empty(struct trace_hash *hash)
+{
+ struct trace_hash_item **bucket;
+
+ trace_hash_for_each_bucket(bucket, hash)
+ if (*bucket)
+ return 0;
+ return 1;
+}
+
+int __hidden trace_hash_add(struct trace_hash *hash, struct trace_hash_item *item)
+{
+ struct trace_hash_item *next;
+ int bucket = hash->power ? item->key & hash->power :
+ item->key % hash->nr_buckets;
+
+ if (hash->buckets[bucket]) {
+ next = hash->buckets[bucket];
+ next->prev = item;
+ } else
+ next = NULL;
+
+ item->next = next;
+ item->prev = (struct trace_hash_item *)&hash->buckets[bucket];
+
+ hash->buckets[bucket] = item;
+
+ return 1;
+}
+
+ __hidden struct trace_hash_item *
+trace_hash_find(struct trace_hash *hash, unsigned long long key,
+ trace_hash_func match, void *data)
+{
+ struct trace_hash_item *item;
+ int bucket = hash->power ? key & hash->power :
+ key % hash->nr_buckets;
+
+ for (item = hash->buckets[bucket]; item; item = item->next) {
+ if (item->key == key) {
+ if (!match)
+ return item;
+ if (match(item, data))
+ return item;
+ }
+ }
+
+ return NULL;
+}
diff --git a/lib/trace-cmd/trace-hooks.c b/lib/trace-cmd/trace-hooks.c
new file mode 100644
index 00000000..a58b5356
--- /dev/null
+++ b/lib/trace-cmd/trace-hooks.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2015 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#include "trace-cmd-private.h"
+#include "trace-cmd-local.h"
+#include "event-utils.h"
+
+struct hook_list *tracecmd_create_event_hook(const char *arg)
+{
+ struct hook_list *hook;
+ char *system = NULL;
+ char *event;
+ char *match;
+ char *flags = NULL;
+ char *pid = NULL;
+ char *str;
+ char *tok;
+ int index;
+ int ch;
+ int i;
+
+ hook = malloc(sizeof(*hook));
+ if (!hook)
+ return NULL;
+ memset(hook, 0, sizeof(*hook));
+
+ str = strdup(arg);
+ if (!str) {
+ free(hook);
+ return NULL;
+ }
+
+ hook->str = str;
+ hook->hook = arg;
+
+ /*
+ * Hooks are in the form of:
+ * [<start_system>:]<start_event>,<start_match>[,<start_pid>]/
+ * [<end_system>:]<end_event>,<end_match>[,<flags>]
+ *
+ * Where start_system, start_pid, end_system, and flags are all
+ * optional.
+ *
+ * Flags are (case insensitive):
+ * P - pinned to cpu (wont migrate)
+ * G - global, not hooked to task - currently ignored.
+ * S - save stacks for this event.
+ */
+ tok = strtok(str, ":,");
+ if (!tok)
+ goto invalid_tok;
+
+ /* See what the token was from the original arg */
+ index = strlen(tok);
+ if (arg[index] == ':') {
+ /* this is a system, the next token must be ',' */
+ system = tok;
+ tok = strtok(NULL, ",");
+ if (!tok)
+ goto invalid_tok;
+ }
+ event = tok;
+
+ tok = strtok(NULL, ",/");
+ if (!tok)
+ goto invalid_tok;
+ match = tok;
+ index = strlen(tok) + tok - str;
+ if (arg[index] == ',') {
+ tok = strtok(NULL, "/");
+ if (!tok)
+ goto invalid_tok;
+ pid = tok;
+ }
+
+ hook->start_system = system;
+ hook->start_event = event;
+ hook->start_match = match;
+ hook->pid = pid;
+
+ /* Now process the end event */
+ system = NULL;
+
+ tok = strtok(NULL, ":,");
+ if (!tok)
+ goto invalid_tok;
+
+ /* See what the token was from the original arg */
+ index = tok - str + strlen(tok);
+ if (arg[index] == ':') {
+ /* this is a system, the next token must be ',' */
+ system = tok;
+ tok = strtok(NULL, ",");
+ if (!tok)
+ goto invalid_tok;
+ }
+ event = tok;
+
+ tok = strtok(NULL, ",");
+ if (!tok)
+ goto invalid_tok;
+ match = tok;
+ index = strlen(tok) + tok - str;
+ if (arg[index] == ',') {
+ tok = strtok(NULL, "");
+ if (!tok)
+ goto invalid_tok;
+ flags = tok;
+ }
+
+ hook->end_system = system;
+ hook->end_event = event;
+ hook->end_match = match;
+ hook->migrate = 1;
+ if (flags) {
+ for (i = 0; flags[i]; i++) {
+ ch = tolower(flags[i]);
+ switch (ch) {
+ case 'p':
+ hook->migrate = 0;
+ break;
+ case 'g':
+ hook->global = 1;
+ break;
+ case 's':
+ hook->stack = 1;
+ break;
+ default:
+ tracecmd_warning("unknown flag %c", flags[i]);
+ }
+ }
+ }
+
+ printf("start %s:%s:%s (%s) end %s:%s:%s (%s)\n",
+ hook->start_system,
+ hook->start_event,
+ hook->start_match,
+ hook->pid,
+ hook->end_system,
+ hook->end_event,
+ hook->end_match,
+ flags);
+ return hook;
+
+invalid_tok:
+ tracecmd_warning("Invalid hook format '%s'", arg);
+ return NULL;
+}
+
+void tracecmd_free_hooks(struct hook_list *hooks)
+{
+ struct hook_list *hook;
+
+ while (hooks) {
+ hook = hooks;
+ hooks = hooks->next;
+
+ free(hook->str);
+ free(hook);
+ }
+}
diff --git a/lib/trace-cmd/trace-input.c b/lib/trace-cmd/trace-input.c
new file mode 100644
index 00000000..8ffdf04b
--- /dev/null
+++ b/lib/trace-cmd/trace-input.c
@@ -0,0 +1,5886 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#define _LARGEFILE64_SOURCE
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <regex.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include <linux/time64.h>
+
+#include "trace-write-local.h"
+#include "trace-cmd-local.h"
+#include "trace-local.h"
+#include "kbuffer.h"
+#include "list.h"
+
+#define _STRINGIFY(x) #x
+#define STRINGIFY(x) _STRINGIFY(x)
+
+#define MISSING_EVENTS (1 << 31)
+#define MISSING_STORED (1 << 30)
+
+#define COMMIT_MASK ((1 << 27) - 1)
+
+/* force uncompressing in memory */
+#define INMEMORY_DECOMPRESS
+
+/* for debugging read instead of mmap */
+static int force_read = 0;
+
+struct page_map {
+ struct list_head list;
+ off64_t offset;
+ off64_t size;
+ void *map;
+ int ref_count;
+};
+
+struct page {
+ struct list_head list;
+ off64_t offset;
+ struct tracecmd_input *handle;
+ struct page_map *page_map;
+ void *map;
+ int ref_count;
+ int cpu;
+ long long lost_events;
+#if DEBUG_RECORD
+ struct tep_record *records;
+#endif
+};
+
+struct zchunk_cache {
+ struct list_head list;
+ struct tracecmd_compress_chunk *chunk;
+ void *map;
+ int ref;
+};
+
+struct cpu_zdata {
+ /* uncompressed cpu data */
+ int fd;
+#ifdef __ANDROID__
+ char file[37]; /* strlen(COMPR_TEMP_FILE) */
+#else /* !__ANDROID__ */
+ char file[26]; /* strlen(COMPR_TEMP_FILE) */
+#endif /* __ANDROID__ */
+
+ unsigned int count;
+ unsigned int last_chunk;
+ struct list_head cache;
+ struct tracecmd_compress_chunk *chunks;
+};
+
+#ifdef __ANDROID__
+#define COMPR_TEMP_FILE "/data/local/tmp/trace_cpu_dataXXXXXX"
+#else /* !__ANDROID__ */
+#define COMPR_TEMP_FILE "/tmp/trace_cpu_dataXXXXXX"
+#endif /* __ANDROID__ */
+
+struct cpu_data {
+ /* the first two never change */
+ unsigned long long file_offset;
+ unsigned long long file_size;
+ unsigned long long offset;
+ unsigned long long size;
+ unsigned long long timestamp;
+ unsigned long long first_ts;
+ struct list_head page_maps;
+ struct page_map *page_map;
+ struct page **pages;
+ struct tep_record *next;
+ struct page *page;
+ struct kbuffer *kbuf;
+ int nr_pages;
+ int page_cnt;
+ int cpu;
+ int pipe_fd;
+ struct cpu_zdata compress;
+};
+
+struct cpu_file_data {
+ int cpu;
+ unsigned long long offset;
+ unsigned long long size;
+};
+
+struct input_buffer_instance {
+ char *name;
+ size_t offset;
+ char *clock;
+ bool latency;
+ int page_size;
+ int cpus;
+ struct cpu_file_data *cpu_data;
+};
+
+struct ts_offset_sample {
+ long long time;
+ long long offset;
+ long long scaling;
+ long long fraction;
+};
+
+struct guest_trace_info {
+ struct guest_trace_info *next;
+ char *name;
+ unsigned long long trace_id;
+ int vcpu_count;
+ int *cpu_pid;
+};
+
+struct timesync_offsets {
+ int ts_samples_count;
+ struct ts_offset_sample *ts_samples;
+};
+
+struct host_trace_info {
+ unsigned long long peer_trace_id;
+ unsigned int flags;
+ bool sync_enable;
+ int ts_samples_count;
+ struct ts_offset_sample *ts_samples;
+ int cpu_count;
+ struct timesync_offsets *ts_offsets;
+};
+
+struct tsc2nsec {
+ int mult;
+ int shift;
+ unsigned long long offset;
+};
+
+struct file_section {
+ unsigned long long section_offset;
+ unsigned long long data_offset;
+ int id;
+ int flags;
+ struct file_section *next;
+};
+
+struct tracecmd_input {
+ struct tep_handle *pevent;
+ struct tep_plugin_list *plugin_list;
+ struct tracecmd_input *parent;
+ unsigned long file_state;
+ unsigned long long trace_id;
+ unsigned long long next_offset;
+ unsigned long flags;
+ int fd;
+ int long_size;
+ int page_size;
+ int page_map_size;
+ int max_cpu;
+ int cpus;
+ int ref;
+ int nr_buffers; /* buffer instances */
+ bool use_trace_clock;
+ bool read_page;
+ bool use_pipe;
+ bool read_zpage; /* uncompress pages in memory, do not use tmp files */
+ bool cpu_compressed;
+ int file_version;
+ unsigned int cpustats_size;
+ struct cpu_zdata latz;
+ struct cpu_data *cpu_data;
+ long long ts_offset;
+ struct tsc2nsec tsc_calc;
+
+ unsigned int strings_size; /* size of the metadata strings */
+ char *strings; /* metadata strings */
+
+ bool read_compress;
+ struct tracecmd_compression *compress;
+
+ struct host_trace_info host;
+ double ts2secs;
+ char * cpustats;
+ char * uname;
+ char * version;
+ char * trace_clock;
+ struct input_buffer_instance top_buffer;
+ struct input_buffer_instance *buffers;
+ int parsing_failures;
+ struct guest_trace_info *guest;
+
+ struct tracecmd_ftrace finfo;
+
+ struct hook_list *hooks;
+ struct pid_addr_maps *pid_maps;
+ /* file information */
+ struct file_section *sections;
+ bool options_init;
+ unsigned long long options_start;
+ unsigned long long options_last_offset;
+ size_t total_file_size;
+
+ /* For custom profilers. */
+ tracecmd_show_data_func show_data_func;
+};
+
+__thread struct tracecmd_input *tracecmd_curr_thread_handle;
+
+#define CHECK_READ_STATE(H, S) ((H)->file_version < FILE_VERSION_SECTIONS && (H)->file_state >= (S))
+#define HAS_SECTIONS(H) ((H)->flags & TRACECMD_FL_SECTIONED)
+#define HAS_COMPRESSION(H) ((H)->flags & TRACECMD_FL_COMPRESSION)
+
+static int read_options_type(struct tracecmd_input *handle);
+
+void tracecmd_set_flag(struct tracecmd_input *handle, int flag)
+{
+ handle->flags |= flag;
+}
+
+void tracecmd_clear_flag(struct tracecmd_input *handle, int flag)
+{
+ handle->flags &= ~flag;
+}
+
+unsigned long tracecmd_get_flags(struct tracecmd_input *handle)
+{
+ return handle->flags;
+}
+
+enum tracecmd_file_states tracecmd_get_file_state(struct tracecmd_input *handle)
+{
+ return handle->file_state;
+}
+
+#if DEBUG_RECORD
+static void remove_record(struct page *page, struct tep_record *record)
+{
+ if (record->prev)
+ record->prev->next = record->next;
+ else
+ page->records = record->next;
+ if (record->next)
+ record->next->prev = record->prev;
+}
+static void add_record(struct page *page, struct tep_record *record)
+{
+ if (page->records)
+ page->records->prev = record;
+ record->next = page->records;
+ record->prev = NULL;
+ page->records = record;
+}
+static const char *show_records(struct page **pages, int nr_pages)
+{
+ static char buf[BUFSIZ + 1];
+ struct tep_record *record;
+ struct page *page;
+ int len;
+ int i;
+
+ memset(buf, 0, sizeof(buf));
+ len = 0;
+ for (i = 0; i < nr_pages; i++) {
+ page = pages[i];
+ if (!page)
+ continue;
+ for (record = page->records; record; record = record->next) {
+ int n;
+ n = snprintf(buf+len, BUFSIZ - len, " 0x%lx", record->alloc_addr);
+ len += n;
+ if (len >= BUFSIZ)
+ break;
+ }
+ }
+ return buf;
+}
+#else
+static inline void remove_record(struct page *page, struct tep_record *record) {}
+static inline void add_record(struct page *page, struct tep_record *record) {}
+static const char *show_records(struct page **pages, int nr_pages)
+{
+ return "";
+}
+#endif
+
+static int init_cpu(struct tracecmd_input *handle, int cpu);
+
+static ssize_t do_read_fd(int fd, void *data, size_t size)
+{
+ ssize_t tot = 0;
+ ssize_t r;
+
+ do {
+ r = read(fd, data + tot, size - tot);
+ tot += r;
+
+ if (!r)
+ break;
+ if (r < 0)
+ return r;
+ } while (tot != size);
+
+ return tot;
+}
+
+static inline int do_lseek(struct tracecmd_input *handle, int offset, int whence)
+{
+ if (handle->read_compress)
+ return tracecmd_compress_lseek(handle->compress, offset, whence);
+ else
+ return lseek(handle->fd, offset, whence);
+}
+
+static inline ssize_t do_read(struct tracecmd_input *handle, void *data, size_t size)
+{
+ if (handle->read_compress)
+ return tracecmd_compress_buffer_read(handle->compress, data, size);
+ else
+ return do_read_fd(handle->fd, data, size);
+}
+
+static ssize_t
+do_read_check(struct tracecmd_input *handle, void *data, size_t size)
+{
+ ssize_t ret;
+
+ ret = do_read(handle, data, size);
+ if (ret < 0)
+ return ret;
+ if (ret != size)
+ return -1;
+
+ return 0;
+}
+
+static char *read_string(struct tracecmd_input *handle)
+{
+ char buf[BUFSIZ];
+ char *str = NULL;
+ size_t size = 0;
+ ssize_t i;
+ ssize_t r;
+
+ for (;;) {
+ r = do_read(handle, buf, BUFSIZ);
+ if (r <= 0)
+ goto fail;
+
+ for (i = 0; i < r; i++) {
+ if (!buf[i])
+ break;
+ }
+ if (i < r)
+ break;
+
+ if (str) {
+ size += BUFSIZ;
+ str = realloc(str, size);
+ if (!str)
+ return NULL;
+ memcpy(str + (size - BUFSIZ), buf, BUFSIZ);
+ } else {
+ size = BUFSIZ;
+ str = malloc(size);
+ if (!str)
+ return NULL;
+ memcpy(str, buf, size);
+ }
+ }
+
+ /* move the file descriptor to the end of the string */
+ r = do_lseek(handle, -(r - (i+1)), SEEK_CUR);
+ if (r < 0)
+ goto fail;
+
+ if (str) {
+ size += i + 1;
+ str = realloc(str, size);
+ if (!str)
+ return NULL;
+ memcpy(str + (size - i), buf, i);
+ str[size] = 0;
+ } else {
+ size = i + 1;
+ str = malloc(size);
+ if (!str)
+ return NULL;
+ memcpy(str, buf, i);
+ str[i] = 0;
+ }
+
+ return str;
+
+ fail:
+ if (str)
+ free(str);
+ return NULL;
+}
+
+static int read2(struct tracecmd_input *handle, unsigned short *size)
+{
+ struct tep_handle *pevent = handle->pevent;
+ unsigned short data;
+
+ if (do_read_check(handle, &data, 2))
+ return -1;
+
+ *size = tep_read_number(pevent, &data, 2);
+ return 0;
+}
+
+static int read4(struct tracecmd_input *handle, unsigned int *size)
+{
+ struct tep_handle *pevent = handle->pevent;
+ unsigned int data;
+
+ if (do_read_check(handle, &data, 4))
+ return -1;
+
+ *size = tep_read_number(pevent, &data, 4);
+ return 0;
+}
+
+static int read8(struct tracecmd_input *handle, unsigned long long *size)
+{
+ struct tep_handle *pevent = handle->pevent;
+ unsigned long long data;
+
+ if (do_read_check(handle, &data, 8))
+ return -1;
+
+ *size = tep_read_number(pevent, &data, 8);
+ return 0;
+}
+
+__hidden void in_uncompress_reset(struct tracecmd_input *handle)
+{
+ if (handle->compress) {
+ handle->read_compress = false;
+ tracecmd_compress_reset(handle->compress);
+ }
+}
+
+__hidden int in_uncompress_block(struct tracecmd_input *handle)
+{
+ int ret = 0;
+
+ if (handle->compress) {
+ ret = tracecmd_uncompress_block(handle->compress);
+ if (!ret)
+ handle->read_compress = true;
+ }
+ return ret;
+}
+
+static struct file_section *section_get(struct tracecmd_input *handle, int id)
+{
+ struct file_section *sec;
+
+ for (sec = handle->sections; sec; sec = sec->next) {
+ if (sec->id == id)
+ return sec;
+ }
+
+ return NULL;
+}
+
+static struct file_section *section_open(struct tracecmd_input *handle, int id)
+{
+ struct file_section *sec = section_get(handle, id);
+
+ if (!sec)
+ return NULL;
+
+ if (lseek64(handle->fd, sec->data_offset, SEEK_SET) == (off64_t)-1)
+ return NULL;
+
+ if ((sec->flags & TRACECMD_SEC_FL_COMPRESS) && in_uncompress_block(handle))
+ return NULL;
+
+ return sec;
+}
+
+static void section_close(struct tracecmd_input *handle, struct file_section *sec)
+{
+ if (sec->flags & TRACECMD_SEC_FL_COMPRESS)
+ in_uncompress_reset(handle);
+}
+
+static int section_add_or_update(struct tracecmd_input *handle, int id, int flags,
+ unsigned long long section_offset,
+ unsigned long long data_offset)
+{
+ struct file_section *sec = section_get(handle, id);
+
+ if (!sec) {
+ sec = calloc(1, sizeof(struct file_section));
+ if (!sec)
+ return -1;
+ sec->next = handle->sections;
+ handle->sections = sec;
+ sec->id = id;
+ }
+
+ if (section_offset)
+ sec->section_offset = section_offset;
+ if (data_offset)
+ sec->data_offset = data_offset;
+ if (flags >= 0)
+ sec->flags = flags;
+
+ return 0;
+}
+
+static int read_header_files(struct tracecmd_input *handle)
+{
+ struct tep_handle *pevent = handle->pevent;
+ unsigned long long size;
+ char *header;
+ char buf[BUFSIZ];
+
+ if (CHECK_READ_STATE(handle, TRACECMD_FILE_HEADERS))
+ return 0;
+
+ if (!HAS_SECTIONS(handle))
+ section_add_or_update(handle, TRACECMD_OPTION_HEADER_INFO, 0, 0,
+ lseek64(handle->fd, 0, SEEK_CUR));
+
+ if (do_read_check(handle, buf, 12))
+ return -1;
+
+ if (memcmp(buf, "header_page", 12) != 0)
+ return -1;
+
+ if (read8(handle, &size) < 0)
+ return -1;
+
+ header = malloc(size);
+ if (!header)
+ return -1;
+
+ if (do_read_check(handle, header, size))
+ goto failed_read;
+
+ tep_parse_header_page(pevent, header, size, handle->long_size);
+ free(header);
+
+ /*
+ * The size field in the page is of type long,
+ * use that instead, since it represents the kernel.
+ */
+ handle->long_size = tep_get_header_page_size(pevent);
+
+ if (do_read_check(handle, buf, 13))
+ return -1;
+
+ if (memcmp(buf, "header_event", 13) != 0)
+ return -1;
+
+ if (read8(handle, &size) < 0)
+ return -1;
+
+ header = malloc(size);
+ if (!header)
+ return -1;
+
+ if (do_read_check(handle, header, size))
+ goto failed_read;
+
+ free(header);
+
+ handle->file_state = TRACECMD_FILE_HEADERS;
+
+ return 0;
+
+ failed_read:
+ free(header);
+ return -1;
+}
+
+static int regex_event_buf(const char *file, int size, regex_t *epreg)
+{
+ char *buf;
+ char *line;
+ int ret;
+
+ buf = malloc(size + 1);
+ if (!buf) {
+ tracecmd_warning("Insufficient memory");
+ return 0;
+ }
+
+ strncpy(buf, file, size);
+ buf[size] = 0;
+
+ /* get the name from the first line */
+ line = strtok(buf, "\n");
+ if (!line) {
+ tracecmd_warning("No newline found in '%s'", buf);
+ return 0;
+ }
+ /* skip name if it is there */
+ if (strncmp(line, "name: ", 6) == 0)
+ line += 6;
+
+ ret = regexec(epreg, line, 0, NULL, 0) == 0;
+
+ free(buf);
+
+ return ret;
+}
+
+static int read_ftrace_file(struct tracecmd_input *handle,
+ unsigned long long size,
+ int print, regex_t *epreg)
+{
+ struct tep_handle *pevent = handle->pevent;
+ char *buf;
+
+ buf = malloc(size);
+ if (!buf)
+ return -1;
+ if (do_read_check(handle, buf, size)) {
+ free(buf);
+ return -1;
+ }
+
+ if (epreg) {
+ if (print || regex_event_buf(buf, size, epreg))
+ printf("%.*s\n", (int)size, buf);
+ } else {
+ if (tep_parse_event(pevent, buf, size, "ftrace"))
+ handle->parsing_failures++;
+ }
+ free(buf);
+
+ return 0;
+}
+
+static int read_event_file(struct tracecmd_input *handle,
+ char *system, unsigned long long size,
+ int print, int *sys_printed,
+ regex_t *epreg)
+{
+ struct tep_handle *pevent = handle->pevent;
+ char *buf;
+
+ buf = malloc(size);
+ if (!buf)
+ return -1;
+
+ if (do_read_check(handle, buf, size)) {
+ free(buf);
+ return -1;
+ }
+
+ if (epreg) {
+ if (print || regex_event_buf(buf, size, epreg)) {
+ if (!*sys_printed) {
+ printf("\nsystem: %s\n", system);
+ *sys_printed = 1;
+ }
+ printf("%.*s\n", (int)size, buf);
+ }
+ } else {
+ if (tep_parse_event(pevent, buf, size, system))
+ handle->parsing_failures++;
+ }
+ free(buf);
+
+ return 0;
+}
+
+static int make_preg_files(const char *regex, regex_t *system,
+ regex_t *event, int *unique)
+{
+ char *buf;
+ char *sstr;
+ char *estr;
+ int ret;
+
+ /* unique is set if a colon is found */
+ *unique = 0;
+
+ /* split "system:event" into "system" and "event" */
+
+ buf = strdup(regex);
+ if (!buf)
+ return -ENOMEM;
+
+ sstr = strtok(buf, ":");
+ estr = strtok(NULL, ":");
+
+ /* If no colon is found, set event == system */
+ if (!estr)
+ estr = sstr;
+ else
+ *unique = 1;
+
+ ret = regcomp(system, sstr, REG_ICASE|REG_NOSUB);
+ if (ret) {
+ tracecmd_warning("Bad regular expression '%s'", sstr);
+ goto out;
+ }
+
+ ret = regcomp(event, estr, REG_ICASE|REG_NOSUB);
+ if (ret) {
+ tracecmd_warning("Bad regular expression '%s'", estr);
+ goto out;
+ }
+
+ out:
+ free(buf);
+ return ret;
+}
+
+static int read_ftrace_files(struct tracecmd_input *handle, const char *regex)
+{
+ unsigned long long size;
+ regex_t spreg;
+ regex_t epreg;
+ regex_t *sreg = NULL;
+ regex_t *ereg = NULL;
+ unsigned int count, i;
+ int print_all = 0;
+ int unique;
+ int ret;
+
+ if (CHECK_READ_STATE(handle, TRACECMD_FILE_FTRACE_EVENTS))
+ return 0;
+
+ if (!HAS_SECTIONS(handle))
+ section_add_or_update(handle, TRACECMD_OPTION_FTRACE_EVENTS, 0, 0,
+ lseek64(handle->fd, 0, SEEK_CUR));
+
+ if (regex) {
+ sreg = &spreg;
+ ereg = &epreg;
+ ret = make_preg_files(regex, sreg, ereg, &unique);
+ if (ret)
+ return -1;
+
+ if (regexec(sreg, "ftrace", 0, NULL, 0) == 0) {
+ /*
+ * If the system matches a regex that did
+ * not contain a colon, then print all events.
+ */
+ if (!unique)
+ print_all = 1;
+ } else if (unique) {
+ /*
+ * The user specified a unique event that did
+ * not match the ftrace system. Don't print any
+ * events here.
+ */
+ regfree(sreg);
+ regfree(ereg);
+ sreg = NULL;
+ ereg = NULL;
+ }
+ }
+
+ ret = read4(handle, &count);
+ if (ret < 0)
+ goto out;
+
+ for (i = 0; i < count; i++) {
+ ret = read8(handle, &size);
+ if (ret < 0)
+ goto out;
+ ret = read_ftrace_file(handle, size, print_all, ereg);
+ if (ret < 0)
+ goto out;
+ }
+
+ handle->file_state = TRACECMD_FILE_FTRACE_EVENTS;
+ ret = 0;
+out:
+ if (sreg) {
+ regfree(sreg);
+ regfree(ereg);
+ }
+
+ return ret;
+}
+
+static int read_event_files(struct tracecmd_input *handle, const char *regex)
+{
+ unsigned long long size;
+ char *system = NULL;
+ regex_t spreg;
+ regex_t epreg;
+ regex_t *sreg = NULL;
+ regex_t *ereg = NULL;
+ regex_t *reg;
+ unsigned int systems;
+ unsigned int count;
+ unsigned int i, x;
+ int print_all;
+ int sys_printed;
+ int unique;
+ int ret;
+
+ if (CHECK_READ_STATE(handle, TRACECMD_FILE_ALL_EVENTS))
+ return 0;
+
+ if (!HAS_SECTIONS(handle))
+ section_add_or_update(handle, TRACECMD_OPTION_EVENT_FORMATS, 0, 0,
+ lseek64(handle->fd, 0, SEEK_CUR));
+
+ if (regex) {
+ sreg = &spreg;
+ ereg = &epreg;
+ ret = make_preg_files(regex, sreg, ereg, &unique);
+ if (ret)
+ return -1;
+ }
+
+ ret = read4(handle, &systems);
+ if (ret < 0)
+ goto out;
+
+ for (i = 0; i < systems; i++) {
+ system = read_string(handle);
+ if (!system) {
+ ret = -1;
+ goto out;
+ }
+
+ sys_printed = 0;
+ print_all = 0;
+ reg = ereg;
+
+ if (sreg) {
+ if (regexec(sreg, system, 0, NULL, 0) == 0) {
+ /*
+ * If the user passed in a regex that
+ * did not contain a colon, then we can
+ * print all the events of this system.
+ */
+ if (!unique)
+ print_all = 1;
+ } else if (unique) {
+ /*
+ * The user passed in a unique event that
+ * specified a specific system and event.
+ * Since this system doesn't match this
+ * event, then we don't print any events
+ * for this system.
+ */
+ reg = NULL;
+ }
+ }
+
+ ret = read4(handle, &count);
+ if (ret < 0)
+ goto out;
+
+ for (x=0; x < count; x++) {
+ ret = read8(handle, &size);
+ if (ret < 0)
+ goto out;
+
+ ret = read_event_file(handle, system, size,
+ print_all, &sys_printed,
+ reg);
+ if (ret < 0)
+ goto out;
+ }
+ free(system);
+ }
+ system = NULL;
+
+ handle->file_state = TRACECMD_FILE_ALL_EVENTS;
+ ret = 0;
+ out:
+ if (sreg) {
+ regfree(sreg);
+ regfree(ereg);
+ }
+
+ free(system);
+ return ret;
+}
+
+static int read_proc_kallsyms(struct tracecmd_input *handle)
+{
+ struct tep_handle *tep = handle->pevent;
+ unsigned int size;
+ char *buf;
+
+ if (CHECK_READ_STATE(handle, TRACECMD_FILE_KALLSYMS))
+ return 0;
+ if (!HAS_SECTIONS(handle))
+ section_add_or_update(handle, TRACECMD_OPTION_KALLSYMS, 0, 0,
+ lseek64(handle->fd, 0, SEEK_CUR));
+
+ if (read4(handle, &size) < 0)
+ return -1;
+ if (!size) {
+ handle->file_state = TRACECMD_FILE_KALLSYMS;
+ return 0; /* OK? */
+ }
+
+ buf = malloc(size+1);
+ if (!buf)
+ return -1;
+ if (do_read_check(handle, buf, size)){
+ free(buf);
+ return -1;
+ }
+ buf[size] = 0;
+
+ tep_parse_kallsyms(tep, buf);
+
+ free(buf);
+
+ handle->file_state = TRACECMD_FILE_KALLSYMS;
+
+ return 0;
+}
+
+static int read_ftrace_printk(struct tracecmd_input *handle)
+{
+ unsigned int size;
+ char *buf;
+
+ if (CHECK_READ_STATE(handle, TRACECMD_FILE_PRINTK))
+ return 0;
+
+ if (!HAS_SECTIONS(handle))
+ section_add_or_update(handle, TRACECMD_OPTION_PRINTK, 0, 0,
+ lseek64(handle->fd, 0, SEEK_CUR));
+
+ if (read4(handle, &size) < 0)
+ return -1;
+ if (!size) {
+ handle->file_state = TRACECMD_FILE_PRINTK;
+ return 0; /* OK? */
+ }
+
+ buf = malloc(size + 1);
+ if (!buf)
+ return -1;
+ if (do_read_check(handle, buf, size)) {
+ free(buf);
+ return -1;
+ }
+
+ buf[size] = 0;
+
+ tep_parse_printk_formats(handle->pevent, buf);
+
+ free(buf);
+
+ handle->file_state = TRACECMD_FILE_PRINTK;
+
+ return 0;
+}
+
+static int read_and_parse_cmdlines(struct tracecmd_input *handle);
+
+/**
+ * tracecmd_get_parsing_failures - get the count of parsing failures
+ * @handle: input handle for the trace.dat file
+ *
+ * This returns the count of failures while parsing the event files
+ */
+int tracecmd_get_parsing_failures(struct tracecmd_input *handle)
+{
+ if (handle)
+ return handle->parsing_failures;
+ return 0;
+}
+
+static int read_cpus(struct tracecmd_input *handle)
+{
+ unsigned int cpus;
+
+ if (CHECK_READ_STATE(handle, TRACECMD_FILE_CPU_COUNT))
+ return 0;
+
+ if (read4(handle, &cpus) < 0)
+ return -1;
+
+ handle->cpus = cpus;
+ handle->max_cpu = cpus;
+ tep_set_cpus(handle->pevent, handle->cpus);
+ handle->file_state = TRACECMD_FILE_CPU_COUNT;
+
+ return 0;
+}
+
+static int read_headers_v6(struct tracecmd_input *handle, enum tracecmd_file_states state,
+ const char *regex)
+{
+ int ret;
+
+ /* Set to read all if state is zero */
+ if (!state)
+ state = TRACECMD_FILE_OPTIONS;
+
+ if (state <= handle->file_state)
+ return 0;
+
+ handle->parsing_failures = 0;
+
+ ret = read_header_files(handle);
+ if (ret < 0)
+ return -1;
+
+ if (state <= handle->file_state)
+ return 0;
+
+ ret = read_ftrace_files(handle, NULL);
+ if (ret < 0)
+ return -1;
+
+ if (state <= handle->file_state)
+ return 0;
+
+ ret = read_event_files(handle, regex);
+ if (ret < 0)
+ return -1;
+
+ if (state <= handle->file_state)
+ return 0;
+
+ ret = read_proc_kallsyms(handle);
+ if (ret < 0)
+ return -1;
+
+ if (state <= handle->file_state)
+ return 0;
+
+ ret = read_ftrace_printk(handle);
+ if (ret < 0)
+ return -1;
+
+ if (state <= handle->file_state)
+ return 0;
+
+ if (read_and_parse_cmdlines(handle) < 0)
+ return -1;
+
+ if (state <= handle->file_state)
+ return 0;
+
+ if (read_cpus(handle) < 0)
+ return -1;
+
+ if (state <= handle->file_state)
+ return 0;
+
+ if (read_options_type(handle) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int handle_options(struct tracecmd_input *handle);
+
+static const char *get_metadata_string(struct tracecmd_input *handle, int offset)
+{
+ if (!handle || !handle->strings || offset < 0 || handle->strings_size >= offset)
+ return NULL;
+
+ return handle->strings + offset;
+}
+
+static int read_section_header(struct tracecmd_input *handle, unsigned short *id,
+ unsigned short *flags, unsigned long long *size, const char **description)
+{
+ unsigned short fl;
+ unsigned short sec_id;
+ unsigned long long sz;
+ int desc;
+
+ if (read2(handle, &sec_id))
+ return -1;
+ if (read2(handle, &fl))
+ return -1;
+ if (read4(handle, (unsigned int *)&desc))
+ return -1;
+ if (read8(handle, &sz))
+ return -1;
+
+ if (id)
+ *id = sec_id;
+ if (flags)
+ *flags = fl;
+ if (size)
+ *size = sz;
+ if (description)
+ *description = get_metadata_string(handle, desc);
+
+ return 0;
+}
+
+static int handle_section(struct tracecmd_input *handle, struct file_section *section,
+ const char *regex)
+{
+ unsigned short id, flags;
+ unsigned long long size;
+ int ret;
+
+ if (lseek64(handle->fd, section->section_offset, SEEK_SET) == (off_t)-1)
+ return -1;
+ if (read_section_header(handle, &id, &flags, &size, NULL))
+ return -1;
+ section->flags = flags;
+ if (id != section->id)
+ return -1;
+
+ section->data_offset = lseek64(handle->fd, 0, SEEK_CUR);
+ if ((section->flags & TRACECMD_SEC_FL_COMPRESS) && in_uncompress_block(handle))
+ return -1;
+
+ switch (section->id) {
+ case TRACECMD_OPTION_HEADER_INFO:
+ ret = read_header_files(handle);
+ break;
+ case TRACECMD_OPTION_FTRACE_EVENTS:
+ ret = read_ftrace_files(handle, NULL);
+ break;
+ case TRACECMD_OPTION_EVENT_FORMATS:
+ ret = read_event_files(handle, regex);
+ break;
+ case TRACECMD_OPTION_KALLSYMS:
+ ret = read_proc_kallsyms(handle);
+ break;
+ case TRACECMD_OPTION_PRINTK:
+ ret = read_ftrace_printk(handle);
+ break;
+ case TRACECMD_OPTION_CMDLINES:
+ ret = read_and_parse_cmdlines(handle);
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+
+ if (section->flags & TRACECMD_SEC_FL_COMPRESS)
+ in_uncompress_reset(handle);
+
+ return ret;
+}
+
+static int read_headers(struct tracecmd_input *handle, const char *regex)
+{
+ struct file_section *section;
+
+ if (handle->options_init)
+ return 0;
+
+ if (!handle->options_start)
+ return -1;
+
+ if (lseek64(handle->fd, handle->options_start, SEEK_SET) == (off64_t)-1) {
+ tracecmd_warning("Filed to goto options offset %lld", handle->options_start);
+ return -1;
+ }
+
+ if (handle_options(handle))
+ return -1;
+
+ section = handle->sections;
+ while (section) {
+ if (handle_section(handle, section, NULL))
+ return -1;
+ section = section->next;
+ }
+
+ handle->options_init = true;
+ return 0;
+}
+
+/**
+ * tracecmd_read_headers - read the header information from trace.dat
+ * @handle: input handle for the trace.dat file
+ * @state: The state to read up to or zero to read up to options.
+ *
+ * This reads the trace.dat file for various information. Like the
+ * format of the ring buffer, event formats, ftrace formats, kallsyms
+ * and printk. This may be called multiple times with different @state
+ * values, to read partial data at a time. It will always continue
+ * where it left off.
+ */
+int tracecmd_read_headers(struct tracecmd_input *handle,
+ enum tracecmd_file_states state)
+{
+ if (!HAS_SECTIONS(handle))
+ return read_headers_v6(handle, state, NULL);
+ return read_headers(handle, NULL);
+}
+
+static unsigned long long calc_page_offset(struct tracecmd_input *handle,
+ unsigned long long offset)
+{
+ return offset & ~(handle->page_size - 1);
+}
+
+static int read_page(struct tracecmd_input *handle, off64_t offset,
+ int cpu, void *map)
+{
+ off64_t save_seek;
+ off64_t ret;
+
+ if (handle->use_pipe) {
+ ret = read(handle->cpu_data[cpu].pipe_fd, map, handle->page_size);
+ /* Set EAGAIN if the pipe is empty */
+ if (ret < 0) {
+ errno = EAGAIN;
+ return -1;
+
+ } else if (ret == 0) {
+ /* Set EINVAL when the pipe has closed */
+ errno = EINVAL;
+ return -1;
+ }
+ return 0;
+ }
+
+ /* other parts of the code may expect the pointer to not move */
+ save_seek = lseek64(handle->fd, 0, SEEK_CUR);
+
+ ret = lseek64(handle->fd, offset, SEEK_SET);
+ if (ret < 0)
+ return -1;
+ ret = read(handle->fd, map, handle->page_size);
+ if (ret < 0)
+ return -1;
+
+ /* reset the file pointer back */
+ lseek64(handle->fd, save_seek, SEEK_SET);
+
+ return 0;
+}
+
+/* page_map_size must be a power of two */
+static unsigned long long normalize_size(unsigned long long size)
+{
+ /* From Hacker's Delight: or bits after first set bit to all 1s */
+ size |= (size >> 1);
+ size |= (size >> 2);
+ size |= (size >> 4);
+ size |= (size >> 8);
+ size |= (size >> 16);
+ size |= (size >> 32);
+
+ /* Clear all bits except first one for previous power of two */
+ return size - (size >> 1);
+}
+
+static void free_page_map(struct page_map *page_map)
+{
+ page_map->ref_count--;
+ if (page_map->ref_count)
+ return;
+
+ munmap(page_map->map, page_map->size);
+ list_del(&page_map->list);
+ free(page_map);
+}
+
+#define CHUNK_CHECK_OFFSET(C, O) ((O) >= (C)->offset && (O) < ((C)->offset + (C)->size))
+
+static int chunk_cmp(const void *A, const void *B)
+{
+ const struct tracecmd_compress_chunk *a = A;
+ const struct tracecmd_compress_chunk *b = B;
+
+ if (CHUNK_CHECK_OFFSET(b, a->offset))
+ return 0;
+
+ if (b->offset < a->offset)
+ return -1;
+
+ return 1;
+}
+
+static struct tracecmd_compress_chunk *get_zchunk(struct cpu_data *cpu, off64_t offset)
+{
+ struct cpu_zdata *cpuz = &cpu->compress;
+ struct tracecmd_compress_chunk *chunk;
+ struct tracecmd_compress_chunk key;
+
+ if (!cpuz->chunks)
+ return NULL;
+
+ if (offset > (cpuz->chunks[cpuz->count - 1].offset + cpuz->chunks[cpuz->count - 1].size))
+ return NULL;
+
+ /* check if the requested offset is in the last requested chunk or in the next chunk */
+ if (CHUNK_CHECK_OFFSET(cpuz->chunks + cpuz->last_chunk, offset))
+ return cpuz->chunks + cpuz->last_chunk;
+
+ cpuz->last_chunk++;
+ if (cpuz->last_chunk < cpuz->count &&
+ CHUNK_CHECK_OFFSET(cpuz->chunks + cpuz->last_chunk, offset))
+ return cpuz->chunks + cpuz->last_chunk;
+
+ key.offset = offset;
+ chunk = bsearch(&key, cpuz->chunks, cpuz->count, sizeof(*chunk), chunk_cmp);
+
+ if (!chunk) /* should never happen */
+ return NULL;
+
+ cpuz->last_chunk = chunk - cpuz->chunks;
+ return chunk;
+}
+
+static void free_zpage(struct cpu_data *cpu_data, void *map)
+{
+ struct zchunk_cache *cache;
+
+ list_for_each_entry(cache, &cpu_data->compress.cache, list) {
+ if (map <= cache->map && map > (cache->map + cache->chunk->size))
+ goto found;
+ }
+ return;
+
+found:
+ cache->ref--;
+ if (cache->ref)
+ return;
+ list_del(&cache->list);
+ free(cache->map);
+ free(cache);
+}
+
+static void *read_zpage(struct tracecmd_input *handle, int cpu, off64_t offset)
+{
+ struct cpu_data *cpu_data = &handle->cpu_data[cpu];
+ struct tracecmd_compress_chunk *chunk;
+ struct zchunk_cache *cache;
+ void *map = NULL;
+ int pindex;
+ int size;
+
+ offset -= cpu_data->file_offset;
+
+ /* Look in the cache of already loaded chunks */
+ list_for_each_entry(cache, &cpu_data->compress.cache, list) {
+ if (CHUNK_CHECK_OFFSET(cache->chunk, offset)) {
+ cache->ref++;
+ goto out;
+ }
+ }
+
+ chunk = get_zchunk(cpu_data, offset);
+ if (!chunk)
+ return NULL;
+
+ size = handle->page_size > chunk->size ? handle->page_size : chunk->size;
+ map = malloc(size);
+ if (!map)
+ return NULL;
+
+ if (tracecmd_uncompress_chunk(handle->compress, chunk, map) < 0)
+ goto error;
+
+ cache = calloc(1, sizeof(struct zchunk_cache));
+ if (!cache)
+ goto error;
+
+ cache->ref = 1;
+ cache->chunk = chunk;
+ cache->map = map;
+ list_add(&cache->list, &cpu_data->compress.cache);
+
+ /* a chunk can hold multiple pages, get the requested one */
+out:
+ pindex = (offset - cache->chunk->offset) / handle->page_size;
+ return cache->map + (pindex * handle->page_size);
+error:
+ free(map);
+ return NULL;
+}
+
+static void *allocate_page_map(struct tracecmd_input *handle,
+ struct page *page, int cpu, off64_t offset)
+{
+ struct cpu_data *cpu_data = &handle->cpu_data[cpu];
+ struct page_map *page_map;
+ off64_t map_size;
+ off64_t map_offset;
+ void *map;
+ int ret;
+ int fd;
+
+ if (handle->cpu_compressed && handle->read_zpage)
+ return read_zpage(handle, cpu, offset);
+
+ if (handle->read_page) {
+ map = malloc(handle->page_size);
+ if (!map)
+ return NULL;
+ ret = read_page(handle, offset, cpu, map);
+ if (ret < 0) {
+ free(map);
+ return NULL;
+ }
+ return map;
+ }
+
+ map_size = handle->page_map_size;
+ map_offset = offset & ~(map_size - 1);
+
+ if (map_offset < cpu_data->file_offset) {
+ map_size -= cpu_data->file_offset - map_offset;
+ map_offset = cpu_data->file_offset;
+ }
+
+ page_map = cpu_data->page_map;
+
+ if (page_map && page_map->offset == map_offset)
+ goto out;
+
+ list_for_each_entry(page_map, &cpu_data->page_maps, list) {
+ if (page_map->offset == map_offset)
+ goto out;
+ }
+
+ page_map = calloc(1, sizeof(*page_map));
+ if (!page_map)
+ return NULL;
+
+ if (map_offset + map_size > cpu_data->file_offset + cpu_data->file_size)
+ map_size -= map_offset + map_size -
+ (cpu_data->file_offset + cpu_data->file_size);
+
+ if (cpu_data->compress.fd >= 0) {
+ map_offset -= cpu_data->file_offset;
+ fd = cpu_data->compress.fd;
+ } else
+ fd = handle->fd;
+ again:
+ page_map->size = map_size;
+ page_map->offset = map_offset;
+
+ page_map->map = mmap(NULL, map_size, PROT_READ, MAP_PRIVATE, fd, map_offset);
+
+ if (page_map->map == MAP_FAILED) {
+ /* Try a smaller map */
+ map_size >>= 1;
+ if (map_size < handle->page_size) {
+ free(page_map);
+ return NULL;
+ }
+ handle->page_map_size = map_size;
+ map_offset = offset & ~(map_size - 1);
+ /*
+ * Note, it is now possible to get duplicate memory
+ * maps. But that's fine, the previous maps with
+ * larger sizes will eventually be unmapped.
+ */
+ goto again;
+ }
+
+ list_add(&page_map->list, &cpu_data->page_maps);
+ out:
+ if (cpu_data->page_map != page_map) {
+ struct page_map *old_map = cpu_data->page_map;
+ cpu_data->page_map = page_map;
+ page_map->ref_count++;
+ if (old_map)
+ free_page_map(old_map);
+ }
+ page->page_map = page_map;
+ page_map->ref_count++;
+ return page_map->map + offset - page_map->offset;
+}
+
+static struct page *allocate_page(struct tracecmd_input *handle,
+ int cpu, off64_t offset)
+{
+ struct cpu_data *cpu_data = &handle->cpu_data[cpu];
+ struct page **pages;
+ struct page *page;
+ int index;
+
+ index = (offset - cpu_data->file_offset) / handle->page_size;
+ if (index >= cpu_data->nr_pages) {
+ pages = realloc(cpu_data->pages, (index + 1) * sizeof(*cpu_data->pages));
+ if (!pages)
+ return NULL;
+ memset(pages + cpu_data->nr_pages, 0,
+ (index + 1 - cpu_data->nr_pages) * sizeof(*cpu_data->pages));
+ cpu_data->pages = pages;
+ cpu_data->nr_pages = index + 1;
+ }
+ if (cpu_data->pages[index]) {
+ cpu_data->pages[index]->ref_count++;
+ return cpu_data->pages[index];
+ }
+
+ page = malloc(sizeof(*page));
+ if (!page)
+ return NULL;
+
+ memset(page, 0, sizeof(*page));
+ page->offset = offset;
+ page->handle = handle;
+ page->cpu = cpu;
+
+ page->map = allocate_page_map(handle, page, cpu, offset);
+
+ if (!page->map) {
+ free(page);
+ return NULL;
+ }
+
+ cpu_data->pages[index] = page;
+ cpu_data->page_cnt++;
+ page->ref_count = 1;
+
+ return page;
+}
+
+static void __free_page(struct tracecmd_input *handle, struct page *page)
+{
+ struct cpu_data *cpu_data = &handle->cpu_data[page->cpu];
+ struct page **pages;
+ int index;
+
+ if (!page->ref_count) {
+ tracecmd_critical("Page ref count is zero!");
+ return;
+ }
+
+ page->ref_count--;
+ if (page->ref_count)
+ return;
+
+ if (handle->read_page)
+ free(page->map);
+ else if (handle->read_zpage)
+ free_zpage(cpu_data, page->map);
+ else
+ free_page_map(page->page_map);
+
+ index = (page->offset - cpu_data->file_offset) / handle->page_size;
+ cpu_data->pages[index] = NULL;
+ cpu_data->page_cnt--;
+
+ free(page);
+
+ if (handle->use_pipe) {
+ for (index = cpu_data->nr_pages - 1; index > 0; index--)
+ if (cpu_data->pages[index])
+ break;
+ if (index < (cpu_data->nr_pages - 1)) {
+ pages = realloc(cpu_data->pages, (index + 1) * sizeof(*cpu_data->pages));
+ if (!pages)
+ return;
+ cpu_data->pages = pages;
+ cpu_data->nr_pages = index + 1;
+ }
+ }
+}
+
+static void free_page(struct tracecmd_input *handle, int cpu)
+{
+ if (!handle->cpu_data || cpu >= handle->cpus ||
+ !handle->cpu_data[cpu].page)
+ return;
+
+ __free_page(handle, handle->cpu_data[cpu].page);
+
+ handle->cpu_data[cpu].page = NULL;
+}
+
+static void __free_record(struct tep_record *record)
+{
+ if (record->priv) {
+ struct page *page = record->priv;
+ remove_record(page, record);
+ __free_page(page->handle, page);
+ }
+
+ free(record);
+}
+
+void tracecmd_free_record(struct tep_record *record)
+{
+ if (!record)
+ return;
+
+ if (!record->ref_count) {
+ tracecmd_critical("record ref count is zero!");
+ return;
+ }
+
+ record->ref_count--;
+
+ if (record->ref_count)
+ return;
+
+ if (record->locked) {
+ tracecmd_critical("freeing record when it is locked!");
+ return;
+ }
+
+ record->data = NULL;
+
+ __free_record(record);
+}
+
+void tracecmd_record_ref(struct tep_record *record)
+{
+ record->ref_count++;
+#if DEBUG_RECORD
+ /* Update locating of last reference */
+ record->alloc_addr = (unsigned long)__builtin_return_address(0);
+#endif
+}
+
+static void free_next(struct tracecmd_input *handle, int cpu)
+{
+ struct tep_record *record;
+
+ if (!handle->cpu_data || cpu >= handle->cpus)
+ return;
+
+ record = handle->cpu_data[cpu].next;
+ if (!record)
+ return;
+
+ handle->cpu_data[cpu].next = NULL;
+
+ record->locked = 0;
+ tracecmd_free_record(record);
+}
+
+/* This functions was taken from the Linux kernel */
+static unsigned long long mul_u64_u32_shr(unsigned long long a,
+ unsigned long long mul, unsigned int shift)
+{
+ unsigned int ah, al;
+ unsigned long long ret;
+
+ al = a;
+ ah = a >> 32;
+
+ ret = (al * mul) >> shift;
+ if (ah)
+ ret += (ah * mul) << (32 - shift);
+
+ return ret;
+}
+
+static inline unsigned long long
+timestamp_correction_calc(unsigned long long ts, unsigned int flags,
+ struct ts_offset_sample *min,
+ struct ts_offset_sample *max)
+{
+ long long tscor;
+
+ if (flags & TRACECMD_TSYNC_FLAG_INTERPOLATE) {
+ long long delta = max->time - min->time;
+ long long offset = ((long long)ts - min->time) *
+ (max->offset - min->offset);
+
+ tscor = min->offset + (offset + delta / 2) / delta;
+ } else {
+ tscor = min->offset;
+ }
+
+ ts = (ts * min->scaling) >> min->fraction;
+ if (tscor < 0)
+ return ts - llabs(tscor);
+
+ return ts + tscor;
+}
+
+static unsigned long long timestamp_host_sync(unsigned long long ts, int cpu,
+ struct tracecmd_input *handle)
+{
+ struct timesync_offsets *tsync;
+ int min, mid, max;
+
+ if (cpu >= handle->host.cpu_count)
+ return ts;
+ tsync = &handle->host.ts_offsets[cpu];
+
+ /* We have one sample, nothing to calc here */
+ if (tsync->ts_samples_count == 1)
+ return ts + tsync->ts_samples[0].offset;
+
+ /* We have two samples, nothing to search here */
+ if (tsync->ts_samples_count == 2)
+ return timestamp_correction_calc(ts, handle->host.flags,
+ &tsync->ts_samples[0],
+ &tsync->ts_samples[1]);
+
+ /* We have more than two samples */
+ if (ts <= tsync->ts_samples[0].time)
+ return timestamp_correction_calc(ts, handle->host.flags,
+ &tsync->ts_samples[0],
+ &tsync->ts_samples[1]);
+ else if (ts >= tsync->ts_samples[tsync->ts_samples_count-1].time)
+ return timestamp_correction_calc(ts, handle->host.flags,
+ &tsync->ts_samples[tsync->ts_samples_count-2],
+ &tsync->ts_samples[tsync->ts_samples_count-1]);
+ min = 0;
+ max = tsync->ts_samples_count-1;
+ mid = (min + max)/2;
+ while (min <= max) {
+ if (ts < tsync->ts_samples[mid].time)
+ max = mid - 1;
+ else if (ts > tsync->ts_samples[mid].time)
+ min = mid + 1;
+ else
+ break;
+ mid = (min + max)/2;
+ }
+
+ return timestamp_correction_calc(ts, handle->host.flags,
+ &tsync->ts_samples[mid],
+ &tsync->ts_samples[mid+1]);
+}
+
+static unsigned long long timestamp_calc(unsigned long long ts, int cpu,
+ struct tracecmd_input *handle)
+{
+ /* do not modify raw timestamps */
+ if (handle->flags & TRACECMD_FL_RAW_TS)
+ return ts;
+
+ /* Guest trace file, sync with host timestamps */
+ if (handle->host.sync_enable)
+ ts = timestamp_host_sync(ts, cpu, handle);
+
+ if (handle->ts2secs) {
+ /* user specified clock frequency */
+ ts *= handle->ts2secs;
+ } else if (handle->tsc_calc.mult) {
+ /* auto calculated TSC clock frequency */
+ ts = mul_u64_u32_shr(ts, handle->tsc_calc.mult, handle->tsc_calc.shift);
+ }
+
+ /* User specified time offset with --ts-offset or --date options */
+ ts += handle->ts_offset;
+
+ return ts;
+}
+
+/*
+ * Page is mapped, now read in the page header info.
+ */
+static int update_page_info(struct tracecmd_input *handle, int cpu)
+{
+ struct tep_handle *pevent = handle->pevent;
+ void *ptr = handle->cpu_data[cpu].page->map;
+ struct kbuffer *kbuf = handle->cpu_data[cpu].kbuf;
+
+ /* FIXME: handle header page */
+ if (tep_get_header_timestamp_size(pevent) != 8) {
+ tracecmd_warning("expected a long long type for timestamp");
+ return -1;
+ }
+
+ kbuffer_load_subbuffer(kbuf, ptr);
+ if (kbuffer_subbuffer_size(kbuf) > handle->page_size) {
+ tracecmd_warning("bad page read, with size of %d", kbuffer_subbuffer_size(kbuf));
+ return -1;
+ }
+ handle->cpu_data[cpu].timestamp = timestamp_calc(kbuffer_timestamp(kbuf),
+ cpu, handle);
+
+ return 0;
+}
+
+/*
+ * get_page maps a page for a given cpu.
+ *
+ * Returns 1 if the page was already mapped,
+ * 0 if it mapped successfully
+ * -1 on error
+ */
+static int get_page(struct tracecmd_input *handle, int cpu,
+ off64_t offset)
+{
+ /* Don't map if the page is already where we want */
+ if (handle->cpu_data[cpu].offset == offset &&
+ handle->cpu_data[cpu].page)
+ return 1;
+
+ /* Do not map no data for CPU */
+ if (!handle->cpu_data[cpu].size)
+ return -1;
+
+ if (offset & (handle->page_size - 1)) {
+ errno = -EINVAL;
+ tracecmd_critical("bad page offset %llx", offset);
+ return -1;
+ }
+
+ if (offset < handle->cpu_data[cpu].file_offset ||
+ offset > handle->cpu_data[cpu].file_offset +
+ handle->cpu_data[cpu].file_size) {
+ errno = -EINVAL;
+ tracecmd_critical("bad page offset %llx", offset);
+ return -1;
+ }
+
+ handle->cpu_data[cpu].offset = offset;
+ handle->cpu_data[cpu].size = (handle->cpu_data[cpu].file_offset +
+ handle->cpu_data[cpu].file_size) -
+ offset;
+
+ free_page(handle, cpu);
+
+ handle->cpu_data[cpu].page = allocate_page(handle, cpu, offset);
+ if (!handle->cpu_data[cpu].page)
+ return -1;
+
+ if (update_page_info(handle, cpu))
+ return -1;
+
+ return 0;
+}
+
+static int get_next_page(struct tracecmd_input *handle, int cpu)
+{
+ off64_t offset;
+
+ if (!handle->cpu_data[cpu].page && !handle->use_pipe)
+ return 0;
+
+ free_page(handle, cpu);
+
+ if (handle->cpu_data[cpu].size <= handle->page_size) {
+ handle->cpu_data[cpu].offset = 0;
+ return 0;
+ }
+
+ offset = handle->cpu_data[cpu].offset + handle->page_size;
+
+ return get_page(handle, cpu, offset);
+}
+
+static struct tep_record *
+peek_event(struct tracecmd_input *handle, unsigned long long offset,
+ int cpu)
+{
+ struct tep_record *record = NULL;
+
+ /*
+ * Since the timestamp is calculated from the beginning
+ * of the page and through each event, we reset the
+ * page to the beginning. This is just used by
+ * tracecmd_read_at.
+ */
+ update_page_info(handle, cpu);
+
+ do {
+ free_next(handle, cpu);
+ record = tracecmd_peek_data(handle, cpu);
+ if (record && (record->offset + record->record_size) > offset)
+ break;
+ } while (record);
+
+ return record;
+}
+
+static struct tep_record *
+read_event(struct tracecmd_input *handle, unsigned long long offset,
+ int cpu)
+{
+ struct tep_record *record;
+
+ record = peek_event(handle, offset, cpu);
+ if (record)
+ record = tracecmd_read_data(handle, cpu);
+ return record;
+}
+
+static struct tep_record *
+find_and_peek_event(struct tracecmd_input *handle, unsigned long long offset,
+ int *pcpu)
+{
+ unsigned long long page_offset;
+ int cpu;
+
+ /* find the cpu that this offset exists in */
+ for (cpu = 0; cpu < handle->cpus; cpu++) {
+ if (offset >= handle->cpu_data[cpu].file_offset &&
+ offset < handle->cpu_data[cpu].file_offset +
+ handle->cpu_data[cpu].file_size)
+ break;
+ }
+
+ /* Not found? */
+ if (cpu == handle->cpus)
+ return NULL;
+
+ /* Move this cpu index to point to this offest */
+ page_offset = calc_page_offset(handle, offset);
+
+ if (get_page(handle, cpu, page_offset) < 0)
+ return NULL;
+
+ if (pcpu)
+ *pcpu = cpu;
+
+ return peek_event(handle, offset, cpu);
+}
+
+
+static struct tep_record *
+find_and_read_event(struct tracecmd_input *handle, unsigned long long offset,
+ int *pcpu)
+{
+ struct tep_record *record;
+ int cpu;
+
+ record = find_and_peek_event(handle, offset, &cpu);
+ if (record) {
+ record = tracecmd_read_data(handle, cpu);
+ if (pcpu)
+ *pcpu = cpu;
+ }
+ return record;
+}
+
+/**
+ * tracecmd_read_at - read a record from a specific offset
+ * @handle: input handle for the trace.dat file
+ * @offset: the offset into the file to find the record
+ * @pcpu: pointer to a variable to store the CPU id the record was found in
+ *
+ * This function is useful when looking for a previous record.
+ * You can store the offset of the record "record->offset" and use that
+ * offset to retreive the record again without needing to store any
+ * other information about the record.
+ *
+ * The record returned must be freed.
+ */
+struct tep_record *
+tracecmd_read_at(struct tracecmd_input *handle, unsigned long long offset,
+ int *pcpu)
+{
+ unsigned long long page_offset;
+ int cpu;
+
+ page_offset = calc_page_offset(handle, offset);
+
+ /* check to see if we have this page already */
+ for (cpu = 0; cpu < handle->cpus; cpu++) {
+ if (handle->cpu_data[cpu].offset == page_offset &&
+ handle->cpu_data[cpu].file_size)
+ break;
+ }
+
+ if (cpu < handle->cpus && handle->cpu_data[cpu].page) {
+ if (pcpu)
+ *pcpu = cpu;
+ return read_event(handle, offset, cpu);
+ } else
+ return find_and_read_event(handle, offset, pcpu);
+}
+
+/**
+ * tracecmd_refresh_record - remaps the records data
+ * @handle: input handle for the trace.dat file
+ * @record: the record to be refreshed
+ *
+ * A record data points to a mmap section of memory.
+ * by reading new records the mmap section may be unmapped.
+ * This will refresh the record's data mapping.
+ *
+ * ===== OBSOLETED BY PAGE REFERENCES =====
+ *
+ * Returns 1 if page is still mapped (does not modify CPU iterator)
+ * 0 on successful mapping (was not mapped before,
+ * This will update CPU iterator to point to
+ * the next record)
+ * -1 on error.
+ */
+int tracecmd_refresh_record(struct tracecmd_input *handle,
+ struct tep_record *record)
+{
+ unsigned long long page_offset;
+ int cpu = record->cpu;
+ struct cpu_data *cpu_data = &handle->cpu_data[cpu];
+ int index;
+ int ret;
+
+ page_offset = calc_page_offset(handle, record->offset);
+ index = record->offset & (handle->page_size - 1);
+
+ ret = get_page(handle, record->cpu, page_offset);
+ if (ret < 0)
+ return -1;
+
+ /* If the page is still mapped, there's nothing to do */
+ if (ret)
+ return 1;
+
+ record->data = kbuffer_read_at_offset(cpu_data->kbuf, index, &record->ts);
+ cpu_data->timestamp = record->ts;
+
+ return 0;
+}
+
+/**
+ * tracecmd_read_cpu_first - get the first record in a CPU
+ * @handle: input handle for the trace.dat file
+ * @cpu: the CPU to search
+ *
+ * This returns the first (by time) record entry in a given CPU.
+ *
+ * The record returned must be freed.
+ */
+struct tep_record *
+tracecmd_read_cpu_first(struct tracecmd_input *handle, int cpu)
+{
+ unsigned long long page_offset;
+ int ret;
+
+ if (cpu >= handle->cpus)
+ return NULL;
+
+ page_offset = calc_page_offset(handle, handle->cpu_data[cpu].file_offset);
+
+ ret = get_page(handle, cpu, page_offset);
+ if (ret < 0)
+ return NULL;
+
+ /* If the page was already mapped, we need to reset it */
+ if (ret)
+ update_page_info(handle, cpu);
+
+ free_next(handle, cpu);
+
+ return tracecmd_read_data(handle, cpu);
+}
+
+/**
+ * tracecmd_read_cpu_last - get the last record in a CPU
+ * @handle: input handle for the trace.dat file
+ * @cpu: the CPU to search
+ *
+ * This returns the last (by time) record entry in a given CPU.
+ *
+ * The record returned must be freed.
+ */
+struct tep_record *
+tracecmd_read_cpu_last(struct tracecmd_input *handle, int cpu)
+{
+ struct tep_record *record = NULL;
+ off64_t offset, page_offset;
+
+ offset = handle->cpu_data[cpu].file_offset +
+ handle->cpu_data[cpu].file_size;
+
+ if (offset & (handle->page_size - 1))
+ offset &= ~(handle->page_size - 1);
+ else
+ offset -= handle->page_size;
+
+ page_offset = offset;
+
+ again:
+ if (get_page(handle, cpu, page_offset) < 0)
+ return NULL;
+
+ offset = page_offset;
+
+ do {
+ tracecmd_free_record(record);
+ record = tracecmd_read_data(handle, cpu);
+ if (record)
+ offset = record->offset;
+ } while (record);
+
+ record = tracecmd_read_at(handle, offset, NULL);
+
+ /*
+ * It is possible that a page has just a timestamp
+ * or just padding on it.
+ */
+ if (!record) {
+ if (page_offset == handle->cpu_data[cpu].file_offset)
+ return NULL;
+ page_offset -= handle->page_size;
+ goto again;
+ }
+
+ return record;
+}
+
+/**
+ * tracecmd_set_cpu_to_timestamp - set the CPU iterator to a given time
+ * @handle: input handle for the trace.dat file
+ * @cpu: the CPU pointer to set
+ * @ts: the timestamp to set the CPU at.
+ *
+ * This sets the CPU iterator used by tracecmd_read_data and
+ * tracecmd_peek_data to a location in the CPU storage near
+ * a given timestamp. It will try to set the iterator to a time before
+ * the time stamp and not actually at a given time.
+ *
+ * To use this to find a record in a time field, call this function
+ * first, than iterate with tracecmd_read_data to find the records
+ * you need.
+ */
+int
+tracecmd_set_cpu_to_timestamp(struct tracecmd_input *handle, int cpu,
+ unsigned long long ts)
+{
+ struct cpu_data *cpu_data = &handle->cpu_data[cpu];
+ off64_t start, end, next;
+
+ if (cpu < 0 || cpu >= handle->cpus) {
+ errno = -EINVAL;
+ return -1;
+ }
+
+ if (!cpu_data->size)
+ return -1;
+
+ if (!cpu_data->page) {
+ if (init_cpu(handle, cpu))
+ return -1;
+ }
+
+ if (cpu_data->timestamp == ts) {
+ /*
+ * If a record is cached, then that record is most
+ * likely the matching timestamp. Otherwise we need
+ * to start from the beginning of the index;
+ */
+ if (!cpu_data->next ||
+ cpu_data->next->ts != ts)
+ update_page_info(handle, cpu);
+ return 0;
+ }
+
+ /* Set to the first record on current page */
+ update_page_info(handle, cpu);
+
+ if (cpu_data->timestamp < ts) {
+ start = cpu_data->offset;
+ end = cpu_data->file_offset + cpu_data->file_size;
+ if (end & (handle->page_size - 1))
+ end &= ~(handle->page_size - 1);
+ else
+ end -= handle->page_size;
+ next = end;
+ } else {
+ end = cpu_data->offset;
+ start = cpu_data->file_offset;
+ next = start;
+ }
+
+ while (start < end) {
+ if (get_page(handle, cpu, next) < 0)
+ return -1;
+
+ if (cpu_data->timestamp == ts)
+ break;
+
+ if (cpu_data->timestamp < ts)
+ start = next;
+ else
+ end = next;
+
+ next = start + (end - start) / 2;
+ next = calc_page_offset(handle, next);
+
+ /* Prevent an infinite loop if start and end are a page off */
+ if (next == start)
+ start = next += handle->page_size;
+ }
+
+ /*
+ * We need to end up on a page before the time stamp.
+ * We go back even if the timestamp is the same. This is because
+ * we want the event with the timestamp, not the page. The page
+ * can start with the timestamp we are looking for, but the event
+ * may be on the previous page.
+ */
+ if (cpu_data->timestamp >= ts &&
+ cpu_data->offset > cpu_data->file_offset)
+ get_page(handle, cpu, cpu_data->offset - handle->page_size);
+
+ return 0;
+}
+
+/**
+ * tracecmd_set_all_cpus_to_timestamp - set all CPUs iterator to a given time
+ * @handle: input handle for the trace.dat file
+ * @cpu: the CPU pointer to set
+ * @ts: the timestamp to set the CPU at.
+ *
+ * This sets the CPU iterator used by tracecmd_read_data and
+ * tracecmd_peek_data to a location in the CPU storage near
+ * a given timestamp. It will try to set the iterator to a time before
+ * the time stamp and not actually at a given time.
+ *
+ * To use this to find a record in a time field, call this function
+ * first, than iterate with tracecmd_read_next_data to find the records
+ * you need.
+ */
+void
+tracecmd_set_all_cpus_to_timestamp(struct tracecmd_input *handle,
+ unsigned long long time)
+{
+ int cpu;
+
+ for (cpu = 0; cpu < handle->cpus; cpu++)
+ tracecmd_set_cpu_to_timestamp(handle, cpu, time);
+}
+
+/**
+ * tracecmd_set_cursor - set the offset for the next tracecmd_read_data
+ * @handle: input handle for the trace.dat file
+ * @cpu: the CPU pointer to set
+ * @offset: the offset to place the cursor
+ *
+ * Set the pointer to the next read or peek. This is useful when
+ * needing to read sequentially and then look at another record
+ * out of sequence without breaking the iteration. This is done with:
+ *
+ * record = tracecmd_peek_data()
+ * offset = record->offset;
+ * record = tracecmd_read_at();
+ * - do what ever with record -
+ * tracecmd_set_cursor(handle, cpu, offset);
+ *
+ * Now the next tracecmd_peek_data or tracecmd_read_data will return
+ * the original record.
+ */
+int tracecmd_set_cursor(struct tracecmd_input *handle,
+ int cpu, unsigned long long offset)
+{
+ struct cpu_data *cpu_data = &handle->cpu_data[cpu];
+ unsigned long long page_offset;
+
+ if (cpu < 0 || cpu >= handle->cpus)
+ return -1;
+
+ if (offset < cpu_data->file_offset ||
+ offset > cpu_data->file_offset + cpu_data->file_size)
+ return -1; /* cpu does not have this offset. */
+
+ /* Move this cpu index to point to this offest */
+ page_offset = calc_page_offset(handle, offset);
+
+ if (get_page(handle, cpu, page_offset) < 0)
+ return -1;
+
+ peek_event(handle, offset, cpu);
+
+ return 0;
+}
+
+/**
+ * tracecmd_get_cursor - get the offset for the next tracecmd_read_data
+ * @handle: input handle for the trace.dat file
+ * @cpu: the CPU pointer to get the cursor from
+ *
+ * Returns the offset of the next record that would be read.
+ */
+unsigned long long
+tracecmd_get_cursor(struct tracecmd_input *handle, int cpu)
+{
+ struct cpu_data *cpu_data = &handle->cpu_data[cpu];
+ struct kbuffer *kbuf = cpu_data->kbuf;
+
+ if (cpu < 0 || cpu >= handle->cpus)
+ return 0;
+
+ /*
+ * Use the next pointer if it exists and matches the
+ * current timestamp.
+ */
+ if (cpu_data->next &&
+ cpu_data->next->ts == cpu_data->timestamp)
+ return cpu_data->next->offset;
+
+ /*
+ * Either the next point does not exist, or it does
+ * not match the timestamp. The next read will use the
+ * current page.
+ *
+ * If the offset is at the end, then return that.
+ */
+ if (cpu_data->offset >= cpu_data->file_offset +
+ cpu_data->file_size)
+ return cpu_data->offset;
+
+ return cpu_data->offset + kbuffer_curr_offset(kbuf);
+}
+
+/**
+ * tracecmd_translate_data - create a record from raw data
+ * @handle: input handle for the trace.dat file
+ * @ptr: raw data to read
+ * @size: the size of the data
+ *
+ * This function tries to create a record from some given
+ * raw data. The data does not need to be from the trace.dat file.
+ * It can be stored from another location.
+ *
+ * Note, since the timestamp is calculated from within the trace
+ * buffer, the timestamp for the record will be zero, since it
+ * can't calculate it.
+ *
+ * The record returned must be freed.
+ */
+struct tep_record *
+tracecmd_translate_data(struct tracecmd_input *handle,
+ void *ptr, int size)
+{
+ struct tep_handle *pevent = handle->pevent;
+ struct tep_record *record;
+ unsigned int length;
+ int swap = 1;
+
+ /* minimum record read is 8, (warn?) (TODO: make 8 into macro) */
+ if (size < 8)
+ return NULL;
+
+ record = malloc(sizeof(*record));
+ if (!record)
+ return NULL;
+ memset(record, 0, sizeof(*record));
+
+ record->ref_count = 1;
+ if (tep_is_local_bigendian(pevent) == tep_is_file_bigendian(pevent))
+ swap = 0;
+ record->data = kbuffer_translate_data(swap, ptr, &length);
+ record->size = length;
+ if (record->data)
+ record->record_size = record->size + (record->data - ptr);
+
+ return record;
+}
+
+
+/**
+ * tracecmd_peek_data - return the record at the current location.
+ * @handle: input handle for the trace.dat file
+ * @cpu: the CPU to pull from
+ *
+ * This returns the record at the current location of the CPU
+ * iterator. It does not increment the CPU iterator.
+ */
+struct tep_record *
+tracecmd_peek_data(struct tracecmd_input *handle, int cpu)
+{
+ struct tep_record *record;
+ unsigned long long ts;
+ struct kbuffer *kbuf;
+ struct page *page;
+ int index;
+ void *data;
+
+ if (cpu >= handle->cpus)
+ return NULL;
+
+ page = handle->cpu_data[cpu].page;
+ kbuf = handle->cpu_data[cpu].kbuf;
+
+ /* Hack to work around function graph read ahead */
+ tracecmd_curr_thread_handle = handle;
+
+ if (handle->cpu_data[cpu].next) {
+
+ record = handle->cpu_data[cpu].next;
+ if (!record->data) {
+ tracecmd_critical("Something freed the record");
+ return NULL;
+ }
+
+ if (handle->cpu_data[cpu].timestamp == record->ts)
+ return record;
+
+ /*
+ * The timestamp changed, which means the cached
+ * record is no longer valid. Reread a new record.
+ */
+ free_next(handle, cpu);
+ }
+
+read_again:
+ if (!page) {
+ if (handle->use_pipe) {
+ get_next_page(handle, cpu);
+ page = handle->cpu_data[cpu].page;
+ }
+ if (!page)
+ return NULL;
+ }
+
+ data = kbuffer_read_event(kbuf, &ts);
+ if (!data) {
+ if (get_next_page(handle, cpu))
+ return NULL;
+ page = handle->cpu_data[cpu].page;
+ goto read_again;
+ }
+
+ handle->cpu_data[cpu].timestamp = timestamp_calc(ts, cpu, handle);
+
+ index = kbuffer_curr_offset(kbuf);
+
+ record = malloc(sizeof(*record));
+ if (!record)
+ return NULL;
+ memset(record, 0, sizeof(*record));
+
+ record->ts = handle->cpu_data[cpu].timestamp;
+ record->size = kbuffer_event_size(kbuf);
+ record->cpu = handle->cpu_data[cpu].cpu;
+ record->data = data;
+ record->offset = handle->cpu_data[cpu].offset + index;
+ record->missed_events = kbuffer_missed_events(kbuf);
+ record->ref_count = 1;
+ record->locked = 1;
+
+ handle->cpu_data[cpu].next = record;
+
+ record->record_size = kbuffer_curr_size(kbuf);
+ record->priv = page;
+ add_record(page, record);
+ page->ref_count++;
+
+ kbuffer_next_event(kbuf, NULL);
+
+ return record;
+}
+
+/**
+ * tracecmd_read_data - read the next record and increment
+ * @handle: input handle for the trace.dat file
+ * @cpu: the CPU to pull from
+ *
+ * This returns the record at the current location of the CPU
+ * iterator and increments the CPU iterator.
+ *
+ * The record returned must be freed.
+ */
+struct tep_record *
+tracecmd_read_data(struct tracecmd_input *handle, int cpu)
+{
+ struct tep_record *record;
+
+ if (cpu >= handle->cpus)
+ return NULL;
+
+ record = tracecmd_peek_data(handle, cpu);
+ handle->cpu_data[cpu].next = NULL;
+ if (record) {
+ record->locked = 0;
+#if DEBUG_RECORD
+ record->alloc_addr = (unsigned long)__builtin_return_address(0);
+#endif
+ }
+ return record;
+}
+
+/**
+ * tracecmd_read_next_data - read the next record
+ * @handle: input handle to the trace.dat file
+ * @rec_cpu: return pointer to the CPU that the record belongs to
+ *
+ * This returns the next record by time. This is different than
+ * tracecmd_read_data in that it looks at all CPUs. It does a peek
+ * at each CPU and the record with the earliest time stame is
+ * returned. If @rec_cpu is not NULL it gets the CPU id the record was
+ * on. The CPU cursor of the returned record is moved to the
+ * next record.
+ *
+ * Multiple reads of this function will return a serialized list
+ * of all records for all CPUs in order of time stamp.
+ *
+ * The record returned must be freed.
+ */
+struct tep_record *
+tracecmd_read_next_data(struct tracecmd_input *handle, int *rec_cpu)
+{
+ struct tep_record *record;
+ int next_cpu;
+
+ record = tracecmd_peek_next_data(handle, &next_cpu);
+ if (!record)
+ return NULL;
+
+ if (rec_cpu)
+ *rec_cpu = next_cpu;
+
+ return tracecmd_read_data(handle, next_cpu);
+}
+
+/**
+ * tracecmd_peek_next_data - return the next record
+ * @handle: input handle to the trace.dat file
+ * @rec_cpu: return pointer to the CPU that the record belongs to
+ *
+ * This returns the next record by time. This is different than
+ * tracecmd_peek_data in that it looks at all CPUs. It does a peek
+ * at each CPU and the record with the earliest time stame is
+ * returned. If @rec_cpu is not NULL it gets the CPU id the record was
+ * on. It does not increment the CPU iterator.
+ */
+struct tep_record *
+tracecmd_peek_next_data(struct tracecmd_input *handle, int *rec_cpu)
+{
+ unsigned long long ts;
+ struct tep_record *record, *next_record = NULL;
+ int next_cpu;
+ int cpu;
+
+ if (rec_cpu)
+ *rec_cpu = -1;
+
+ next_cpu = -1;
+ ts = 0;
+
+ for (cpu = 0; cpu < handle->cpus; cpu++) {
+ record = tracecmd_peek_data(handle, cpu);
+ if (record && (!next_record || record->ts < ts)) {
+ ts = record->ts;
+ next_cpu = cpu;
+ next_record = record;
+ }
+ }
+
+ if (next_record) {
+ if (rec_cpu)
+ *rec_cpu = next_cpu;
+ return next_record;
+ }
+
+ return NULL;
+}
+
+/**
+ * tracecmd_read_prev - read the record before the given record
+ * @handle: input handle to the trace.dat file
+ * @record: the record to use to find the previous record.
+ *
+ * This returns the record before the @record on its CPU. If
+ * @record is the first record, NULL is returned. The cursor is set
+ * as if the previous record was read by tracecmd_read_data().
+ *
+ * @record can not be NULL, otherwise NULL is returned; the
+ * record ownership goes to this function.
+ *
+ * Note, this is not that fast of an algorithm, since it needs
+ * to build the timestamp for the record.
+ *
+ * The record returned must be freed with tracecmd_free_record().
+ */
+struct tep_record *
+tracecmd_read_prev(struct tracecmd_input *handle, struct tep_record *record)
+{
+ unsigned long long offset, page_offset;;
+ struct cpu_data *cpu_data;
+ int index;
+ int cpu;
+
+ if (!record)
+ return NULL;
+
+ cpu = record->cpu;
+ offset = record->offset;
+ cpu_data = &handle->cpu_data[cpu];
+
+ page_offset = calc_page_offset(handle, offset);
+ index = offset - page_offset;
+
+ /* Note, the record passed in could have been a peek */
+ free_next(handle, cpu);
+
+ /* Reset the cursor */
+ /* Should not happen */
+ if (get_page(handle, cpu, page_offset) < 0)
+ return NULL;
+
+ update_page_info(handle, cpu);
+
+ /* Find the record before this record */
+ index = 0;
+ for (;;) {
+ record = tracecmd_read_data(handle, cpu);
+ /* Should not happen! */
+ if (!record)
+ return NULL;
+ if (record->offset == offset)
+ break;
+ index = record->offset - page_offset;
+ tracecmd_free_record(record);
+ }
+ tracecmd_free_record(record);
+
+ if (index)
+ /* we found our record */
+ return tracecmd_read_at(handle, page_offset + index, NULL);
+
+ /* reset the index to start at the beginning of the page */
+ update_page_info(handle, cpu);
+
+ /* The previous record is on the previous page */
+ for (;;) {
+ /* check if this is the first page */
+ if (page_offset == cpu_data->file_offset)
+ return NULL;
+ page_offset -= handle->page_size;
+
+ /* Updating page to a new page will reset index to 0 */
+ get_page(handle, cpu, page_offset);
+
+ record = NULL;
+ index = 0;
+ do {
+ if (record) {
+ index = record->offset - page_offset;
+ tracecmd_free_record(record);
+ }
+ record = tracecmd_read_data(handle, cpu);
+ /* Should not happen */
+ if (!record)
+ return NULL;
+ } while (record->offset != offset);
+ tracecmd_free_record(record);
+
+ if (index)
+ /* we found our record */
+ return tracecmd_read_at(handle, page_offset + index, NULL);
+ }
+
+ /* Not reached */
+}
+
+static int init_cpu_zfile(struct tracecmd_input *handle, int cpu)
+{
+ struct cpu_data *cpu_data;
+ unsigned long long size;
+ off64_t offset;
+
+ cpu_data = &handle->cpu_data[cpu];
+ offset = lseek64(handle->fd, 0, SEEK_CUR);
+ if (lseek64(handle->fd, cpu_data->file_offset, SEEK_SET) == (off_t)-1)
+ return -1;
+
+ strcpy(cpu_data->compress.file, COMPR_TEMP_FILE);
+ cpu_data->compress.fd = mkstemp(cpu_data->compress.file);
+ if (cpu_data->compress.fd < 0)
+ return -1;
+
+ if (tracecmd_uncompress_copy_to(handle->compress, cpu_data->compress.fd, NULL, &size))
+ return -1;
+
+ if (lseek64(handle->fd, offset, SEEK_SET) == (off_t)-1)
+ return -1;
+
+ cpu_data->file_offset = handle->next_offset;
+ handle->next_offset = (handle->next_offset + size + handle->page_size - 1) &
+ ~(handle->page_size - 1);
+ cpu_data->offset = cpu_data->file_offset;
+
+ cpu_data->file_size = size;
+ cpu_data->size = size;
+ return 0;
+}
+
+static int init_cpu_zpage(struct tracecmd_input *handle, int cpu)
+{
+ struct cpu_data *cpu_data = &handle->cpu_data[cpu];
+ int count;
+ int i;
+
+ if (lseek64(handle->fd, cpu_data->file_offset, SEEK_SET) == (off_t)-1)
+ return -1;
+
+ count = tracecmd_load_chunks_info(handle->compress, &cpu_data->compress.chunks);
+ if (count < 0)
+ return -1;
+
+ cpu_data->compress.count = count;
+ cpu_data->compress.last_chunk = 0;
+
+ cpu_data->file_offset = handle->next_offset;
+
+ for (i = 0; i < count; i++)
+ cpu_data->file_size += cpu_data->compress.chunks[i].size;
+
+ cpu_data->offset = cpu_data->file_offset;
+ cpu_data->size = cpu_data->file_size;
+ handle->next_offset = (handle->next_offset + cpu_data->size + handle->page_size - 1) &
+ ~(handle->page_size - 1);
+ return 0;
+}
+
+static int init_cpu(struct tracecmd_input *handle, int cpu)
+{
+ struct cpu_data *cpu_data = &handle->cpu_data[cpu];
+ int ret;
+ int i;
+
+ if (handle->cpu_compressed && cpu_data->file_size > 0) {
+ if (handle->read_zpage)
+ ret = init_cpu_zpage(handle, cpu);
+ else
+ ret = init_cpu_zfile(handle, cpu);
+ if (ret)
+ return ret;
+ } else {
+ cpu_data->offset = cpu_data->file_offset;
+ cpu_data->size = cpu_data->file_size;
+ }
+ cpu_data->timestamp = 0;
+
+ list_head_init(&cpu_data->page_maps);
+ list_head_init(&cpu_data->compress.cache);
+
+ if (!cpu_data->size) {
+ tracecmd_info("CPU %d is empty", cpu);
+ return 0;
+ }
+
+ cpu_data->nr_pages = (cpu_data->size + handle->page_size - 1) / handle->page_size;
+ if (!cpu_data->nr_pages)
+ cpu_data->nr_pages = 1;
+ cpu_data->pages = calloc(cpu_data->nr_pages, sizeof(*cpu_data->pages));
+ if (!cpu_data->pages)
+ return -1;
+
+ if (handle->use_pipe) {
+ /* Just make a page, it will be nuked later */
+ cpu_data->page = malloc(sizeof(*cpu_data->page));
+ if (!cpu_data->page)
+ goto fail;
+
+ memset(cpu_data->page, 0, sizeof(*cpu_data->page));
+ cpu_data->pages[0] = cpu_data->page;
+ cpu_data->page_cnt = 1;
+ cpu_data->page->ref_count = 1;
+ return 0;
+ }
+
+ cpu_data->page = allocate_page(handle, cpu, cpu_data->offset);
+ if (!cpu_data->page && !handle->read_page) {
+ perror("mmap");
+ fprintf(stderr, "Can not mmap file, will read instead\n");
+
+ if (cpu) {
+ /*
+ * If the other CPUs had size and was able to mmap
+ * then bail.
+ */
+ for (i = 0; i < cpu; i++) {
+ if (handle->cpu_data[i].size)
+ goto fail;
+ }
+ }
+
+ /* try again without mmapping, just read it directly */
+ handle->read_page = true;
+ cpu_data->page = allocate_page(handle, cpu, cpu_data->offset);
+ if (!cpu_data->page)
+ /* Still no luck, bail! */
+ goto fail;
+ }
+
+ if (update_page_info(handle, cpu))
+ goto fail;
+ cpu_data->first_ts = cpu_data->timestamp;
+
+ return 0;
+ fail:
+ free(cpu_data->pages);
+ cpu_data->pages = NULL;
+ free(cpu_data->page);
+ cpu_data->page = NULL;
+ return -1;
+}
+
+void tracecmd_set_ts_offset(struct tracecmd_input *handle,
+ long long offset)
+{
+ handle->ts_offset = offset;
+}
+
+/**
+ * tracecmd_add_ts_offset - Add value to the offset which will be applied to the timestamps of all
+ * events from given trace file
+ * @handle: input handle to the trace.dat file
+ * @offset: value, that will be added to the offset
+ */
+void tracecmd_add_ts_offset(struct tracecmd_input *handle,
+ long long offset)
+{
+ handle->ts_offset += offset;
+}
+
+void tracecmd_set_ts2secs(struct tracecmd_input *handle,
+ unsigned long long hz)
+{
+ double ts2secs;
+
+ ts2secs = (double)NSEC_PER_SEC / (double)hz;
+ handle->ts2secs = ts2secs;
+ handle->use_trace_clock = false;
+}
+
+static int tsync_offset_cmp(const void *a, const void *b)
+{
+ struct ts_offset_sample *ts_a = (struct ts_offset_sample *)a;
+ struct ts_offset_sample *ts_b = (struct ts_offset_sample *)b;
+
+ if (ts_a->time > ts_b->time)
+ return 1;
+ if (ts_a->time < ts_b->time)
+ return -1;
+ return 0;
+}
+
+#define safe_read(R, C) \
+ do { \
+ if ((C) > size) \
+ return -EFAULT; \
+ (R) = tep_read_number(tep, buf, (C)); \
+ buf += (C); \
+ size -= (C); \
+ } while (0)
+
+#define safe_read_loop(type) \
+ do { \
+ int ii; \
+ for (ii = 0; ii < ts_offsets->ts_samples_count; ii++) \
+ safe_read(ts_offsets->ts_samples[ii].type, 8); \
+ } while (0)
+
+static int tsync_cpu_offsets_load(struct tracecmd_input *handle, char *buf, int size)
+{
+ struct tep_handle *tep = handle->pevent;
+ struct timesync_offsets *ts_offsets;
+ int i, j, k;
+
+ safe_read(handle->host.cpu_count, 4);
+ handle->host.ts_offsets = calloc(handle->host.cpu_count,
+ sizeof(struct timesync_offsets));
+ if (!handle->host.ts_offsets)
+ return -ENOMEM;
+ for (i = 0; i < handle->host.cpu_count; i++) {
+ ts_offsets = &handle->host.ts_offsets[i];
+ safe_read(ts_offsets->ts_samples_count, 4);
+ ts_offsets->ts_samples = calloc(ts_offsets->ts_samples_count,
+ sizeof(struct ts_offset_sample));
+ if (!ts_offsets->ts_samples)
+ return -ENOMEM;
+ safe_read_loop(time);
+ safe_read_loop(offset);
+ safe_read_loop(scaling);
+ }
+
+ if (size > 0) {
+ for (i = 0; i < handle->host.cpu_count; i++) {
+ ts_offsets = &handle->host.ts_offsets[i];
+ safe_read_loop(fraction);
+ }
+ }
+
+ for (i = 0; i < handle->host.cpu_count; i++) {
+ ts_offsets = &handle->host.ts_offsets[i];
+ qsort(ts_offsets->ts_samples, ts_offsets->ts_samples_count,
+ sizeof(struct ts_offset_sample), tsync_offset_cmp);
+ /* Filter possible samples with equal time */
+ for (k = 0, j = 0; k < ts_offsets->ts_samples_count; k++) {
+ if (k == 0 || ts_offsets->ts_samples[k].time != ts_offsets->ts_samples[k-1].time)
+ ts_offsets->ts_samples[j++] = ts_offsets->ts_samples[k];
+ }
+ ts_offsets->ts_samples_count = j;
+ }
+
+ return 0;
+}
+
+static void trace_tsync_offset_free(struct host_trace_info *host)
+{
+ int i;
+
+ if (host->ts_offsets) {
+ for (i = 0; i < host->cpu_count; i++)
+ free(host->ts_offsets[i].ts_samples);
+ free(host->ts_offsets);
+ host->ts_offsets = NULL;
+ }
+}
+
+static int trace_pid_map_cmp(const void *a, const void *b)
+{
+ struct tracecmd_proc_addr_map *m_a = (struct tracecmd_proc_addr_map *)a;
+ struct tracecmd_proc_addr_map *m_b = (struct tracecmd_proc_addr_map *)b;
+
+ if (m_a->start > m_b->start)
+ if (m_a->start < m_b->start)
+ return -1;
+ return 0;
+}
+
+static void procmap_free(struct pid_addr_maps *maps)
+{
+ int i;
+
+ if (!maps)
+ return;
+ if (maps->lib_maps) {
+ for (i = 0; i < maps->nr_lib_maps; i++)
+ free(maps->lib_maps[i].lib_name);
+ free(maps->lib_maps);
+ }
+ free(maps->proc_name);
+ free(maps);
+}
+
+static void trace_guests_free(struct tracecmd_input *handle)
+{
+ struct guest_trace_info *guest;
+
+ while (handle->guest) {
+ guest = handle->guest;
+ handle->guest = handle->guest->next;
+ free(guest->name);
+ free(guest->cpu_pid);
+ free(guest);
+ }
+}
+
+static int trace_guest_load(struct tracecmd_input *handle, char *buf, int size)
+{
+ struct guest_trace_info *guest = NULL;
+ int cpu;
+ int i;
+
+ guest = calloc(1, sizeof(struct guest_trace_info));
+ if (!guest)
+ goto error;
+
+ /*
+ * Guest name, null terminated string
+ * long long (8 bytes) trace-id
+ * int (4 bytes) number of guest CPUs
+ * array of size number of guest CPUs:
+ * int (4 bytes) Guest CPU id
+ * int (4 bytes) Host PID, running the guest CPU
+ */
+
+ guest->name = strndup(buf, size);
+ if (!guest->name)
+ goto error;
+ buf += strlen(guest->name) + 1;
+ size -= strlen(guest->name) + 1;
+
+ if (size < sizeof(long long))
+ goto error;
+ guest->trace_id = tep_read_number(handle->pevent, buf, sizeof(long long));
+ buf += sizeof(long long);
+ size -= sizeof(long long);
+
+ if (size < sizeof(int))
+ goto error;
+ guest->vcpu_count = tep_read_number(handle->pevent, buf, sizeof(int));
+ buf += sizeof(int);
+ size -= sizeof(int);
+
+ guest->cpu_pid = calloc(guest->vcpu_count, sizeof(int));
+ if (!guest->cpu_pid)
+ goto error;
+
+ for (i = 0; i < guest->vcpu_count; i++) {
+ if (size < 2 * sizeof(int))
+ goto error;
+ cpu = tep_read_number(handle->pevent, buf, sizeof(int));
+ buf += sizeof(int);
+ if (cpu >= guest->vcpu_count)
+ goto error;
+ guest->cpu_pid[cpu] = tep_read_number(handle->pevent,
+ buf, sizeof(int));
+ buf += sizeof(int);
+ size -= 2 * sizeof(int);
+ }
+
+ guest->next = handle->guest;
+ handle->guest = guest;
+ return 0;
+
+error:
+ if (guest) {
+ free(guest->cpu_pid);
+ free(guest->name);
+ free(guest);
+ }
+ return -1;
+}
+
+/* Needs to be a constant, and 4K should be good enough */
+#define STR_PROCMAP_LINE_MAX 4096
+static int trace_pid_map_load(struct tracecmd_input *handle, char *buf)
+{
+ struct pid_addr_maps *maps = NULL;
+ char mapname[STR_PROCMAP_LINE_MAX+1];
+ char *line;
+ int res;
+ int ret;
+ int i;
+
+ maps = calloc(1, sizeof(*maps));
+ if (!maps)
+ return -ENOMEM;
+
+ ret = -EINVAL;
+ line = strchr(buf, '\n');
+ if (!line)
+ goto out_fail;
+
+ *line = '\0';
+ if (strlen(buf) > STR_PROCMAP_LINE_MAX)
+ goto out_fail;
+
+ res = sscanf(buf, "%x %x %"STRINGIFY(STR_PROCMAP_LINE_MAX)"s", &maps->pid, &maps->nr_lib_maps, mapname);
+ if (res != 3)
+ goto out_fail;
+
+ ret = -ENOMEM;
+ maps->proc_name = strdup(mapname);
+ if (!maps->proc_name)
+ goto out_fail;
+
+ maps->lib_maps = calloc(maps->nr_lib_maps, sizeof(struct tracecmd_proc_addr_map));
+ if (!maps->lib_maps)
+ goto out_fail;
+
+ buf = line + 1;
+ line = strchr(buf, '\n');
+ for (i = 0; i < maps->nr_lib_maps; i++) {
+ if (!line)
+ break;
+ *line = '\0';
+ if (strlen(buf) > STR_PROCMAP_LINE_MAX)
+ break;
+ res = sscanf(buf, "%llx %llx %s", &maps->lib_maps[i].start,
+ &maps->lib_maps[i].end, mapname);
+ if (res != 3)
+ break;
+ maps->lib_maps[i].lib_name = strdup(mapname);
+ if (!maps->lib_maps[i].lib_name)
+ goto out_fail;
+ buf = line + 1;
+ line = strchr(buf, '\n');
+ }
+
+ ret = -EINVAL;
+ if (i != maps->nr_lib_maps)
+ goto out_fail;
+
+ qsort(maps->lib_maps, maps->nr_lib_maps,
+ sizeof(*maps->lib_maps), trace_pid_map_cmp);
+
+ maps->next = handle->pid_maps;
+ handle->pid_maps = maps;
+
+ return 0;
+
+out_fail:
+ procmap_free(maps);
+ return ret;
+}
+
+static void trace_pid_map_free(struct pid_addr_maps *maps)
+{
+ struct pid_addr_maps *del;
+
+ while (maps) {
+ del = maps;
+ maps = maps->next;
+ procmap_free(del);
+ }
+}
+
+static int trace_pid_map_search(const void *a, const void *b)
+{
+ struct tracecmd_proc_addr_map *key = (struct tracecmd_proc_addr_map *)a;
+ struct tracecmd_proc_addr_map *map = (struct tracecmd_proc_addr_map *)b;
+
+ if (key->start >= map->end)
+ return 1;
+ if (key->start < map->start)
+ return -1;
+ return 0;
+}
+
+/**
+ * tracecmd_search_task_map - Search task memory address map
+ * @handle: input handle to the trace.dat file
+ * @pid: pid of the task
+ * @addr: address from the task memory space.
+ *
+ * Map of the task memory can be saved in the trace.dat file, using the option
+ * "--proc-map". If there is such information, this API can be used to look up
+ * into this memory map to find what library is loaded at the given @addr.
+ *
+ * A pointer to struct tracecmd_proc_addr_map is returned, containing the name
+ * of the library at given task @addr and the library start and end addresses.
+ */
+struct tracecmd_proc_addr_map *
+tracecmd_search_task_map(struct tracecmd_input *handle,
+ int pid, unsigned long long addr)
+{
+ struct tracecmd_proc_addr_map *lib;
+ struct tracecmd_proc_addr_map key;
+ struct pid_addr_maps *maps;
+
+ if (!handle || !handle->pid_maps)
+ return NULL;
+
+ maps = handle->pid_maps;
+ while (maps) {
+ if (maps->pid == pid)
+ break;
+ maps = maps->next;
+ }
+ if (!maps || !maps->nr_lib_maps || !maps->lib_maps)
+ return NULL;
+ key.start = addr;
+ lib = bsearch(&key, maps->lib_maps, maps->nr_lib_maps,
+ sizeof(*maps->lib_maps), trace_pid_map_search);
+
+ return lib;
+}
+
+__hidden unsigned int get_meta_strings_size(struct tracecmd_input *handle)
+{
+ return handle->strings_size;
+}
+
+__hidden unsigned long long get_last_option_offset(struct tracecmd_input *handle)
+{
+ return handle->options_last_offset;
+}
+
+static int handle_option_done(struct tracecmd_input *handle, char *buf, int size)
+{
+ unsigned long long offset;
+
+ if (size < 8)
+ return -1;
+
+ offset = lseek64(handle->fd, 0, SEEK_CUR);
+ if (offset >= size)
+ handle->options_last_offset = offset - size;
+
+ offset = tep_read_number(handle->pevent, buf, 8);
+ if (!offset)
+ return 0;
+
+ if (lseek64(handle->fd, offset, SEEK_SET) == (off_t)-1)
+ return -1;
+
+ return handle_options(handle);
+}
+
+static inline int save_read_number(struct tep_handle *tep, char *data, int *data_size,
+ int *read_pos, int bytes, unsigned long long *num)
+{
+ if (bytes > *data_size)
+ return -1;
+
+ *num = tep_read_number(tep, (data + *read_pos), bytes);
+ *read_pos += bytes;
+ *data_size -= bytes;
+ return 0;
+}
+
+static inline char *save_read_string(char *data, int *data_size, int *read_pos)
+{
+ char *str;
+
+ if (*data_size < 1)
+ return NULL;
+
+ str = strdup(data + *read_pos);
+ if (!str)
+ return NULL;
+ *data_size -= (strlen(str) + 1);
+ if (*data_size < 0) {
+ free(str);
+ return NULL;
+ }
+ *read_pos += (strlen(str) + 1);
+
+ return str;
+}
+
+static int handle_buffer_option(struct tracecmd_input *handle,
+ unsigned short id, char *data, int size)
+{
+ struct input_buffer_instance *buff;
+ struct cpu_file_data *cpu_data;
+ unsigned long long tmp;
+ long long max_cpu = -1;
+ int rsize = 0;
+ char *name;
+ int i;
+
+ if (save_read_number(handle->pevent, data, &size, &rsize, 8, &tmp))
+ return -1;
+
+ name = save_read_string(data, &size, &rsize);
+ if (!name)
+ return -1;
+
+ if (*name == '\0') {
+ /* top buffer */
+ buff = &handle->top_buffer;
+ } else {
+ buff = realloc(handle->buffers, sizeof(*handle->buffers) * (handle->nr_buffers + 1));
+ if (!buff) {
+ free(name);
+ return -1;
+ }
+ handle->buffers = buff;
+ handle->nr_buffers++;
+
+ buff = &handle->buffers[handle->nr_buffers - 1];
+ }
+ memset(buff, 0, sizeof(struct input_buffer_instance));
+ buff->name = name;
+ buff->offset = tmp;
+
+ if (!HAS_SECTIONS(handle))
+ return 0;
+
+ /* file sections specific data */
+ buff->clock = save_read_string(data, &size, &rsize);
+ if (!buff->clock)
+ return -1;
+
+ if (*name == '\0' && !handle->trace_clock)
+ handle->trace_clock = strdup(buff->clock);
+
+ if (id == TRACECMD_OPTION_BUFFER) {
+ if (save_read_number(handle->pevent, data, &size, &rsize, 4, &tmp))
+ return -1;
+ buff->page_size = tmp;
+
+ if (save_read_number(handle->pevent, data, &size, &rsize, 4, &tmp))
+ return -1;
+ buff->cpus = tmp;
+ if (!buff->cpus)
+ return 0;
+ cpu_data = calloc(buff->cpus, sizeof(*cpu_data));
+ if (!cpu_data)
+ return -1;
+ for (i = 0; i < buff->cpus; i++) {
+ if (save_read_number(handle->pevent, data, &size, &rsize, 4, &tmp))
+ goto fail;
+ if ((long long)tmp > max_cpu)
+ max_cpu = tmp;
+ cpu_data[i].cpu = tmp;
+ if (save_read_number(handle->pevent, data,
+ &size, &rsize, 8, &cpu_data[i].offset))
+ goto fail;
+ if (save_read_number(handle->pevent, data,
+ &size, &rsize, 8, &cpu_data[i].size))
+ goto fail;
+ }
+ if (buff->cpus == max_cpu + 1) {
+ /* Check to make sure cpus match the index */
+ for (i = 0; i < buff->cpus; i++) {
+ if (cpu_data[i].cpu != i)
+ goto copy_buffer;
+ }
+ buff->cpu_data = cpu_data;
+ } else {
+ copy_buffer:
+ buff->cpu_data = calloc(max_cpu + 1, sizeof(*cpu_data));
+ if (!buff->cpu_data)
+ goto fail;
+ for (i = 0; i < buff->cpus; i++) {
+ if (buff->cpu_data[cpu_data[i].cpu].size) {
+ tracecmd_warning("More than one buffer defined for CPU %d (buffer %d)\n",
+ cpu_data[i].cpu, i);
+ goto fail;
+ }
+ buff->cpu_data[cpu_data[i].cpu] = cpu_data[i];
+ }
+ buff->cpus = max_cpu + 1;
+ free(cpu_data);
+ }
+ } else {
+ buff->latency = true;
+ }
+ return 0;
+fail:
+ free(cpu_data);
+ return -1;
+}
+
+static int handle_options(struct tracecmd_input *handle)
+{
+ long long offset;
+ unsigned short option;
+ unsigned int size;
+ unsigned short id, flags;
+ char *cpustats = NULL;
+ struct hook_list *hook;
+ bool compress = false;
+ char *buf;
+ int cpus;
+ int ret;
+
+ if (!HAS_SECTIONS(handle)) {
+ handle->options_start = lseek64(handle->fd, 0, SEEK_CUR);
+ } else {
+ if (read_section_header(handle, &id, &flags, NULL, NULL))
+ return -1;
+ if (id != TRACECMD_OPTION_DONE)
+ return -1;
+ if (flags & TRACECMD_SEC_FL_COMPRESS)
+ compress = true;
+ }
+
+ if (compress && in_uncompress_block(handle))
+ return -1;
+
+ for (;;) {
+ ret = read2(handle, &option);
+ if (ret)
+ goto out;
+
+ if (!HAS_SECTIONS(handle) && option == TRACECMD_OPTION_DONE)
+ break;
+
+ /* next 4 bytes is the size of the option */
+ ret = read4(handle, &size);
+ if (ret)
+ goto out;
+ buf = malloc(size);
+ if (!buf) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = do_read_check(handle, buf, size);
+ if (ret)
+ goto out;
+
+ switch (option) {
+ case TRACECMD_OPTION_DATE:
+ /*
+ * A time has been mapped that is the
+ * difference between the timestamps and
+ * gtod. It is stored as ASCII with '0x'
+ * appended.
+ */
+ if (handle->flags &
+ (TRACECMD_FL_IGNORE_DATE | TRACECMD_FL_RAW_TS))
+ break;
+ offset = strtoll(buf, NULL, 0);
+ /* Convert from micro to nano */
+ offset *= 1000;
+ handle->ts_offset += offset;
+ break;
+ case TRACECMD_OPTION_OFFSET:
+ /*
+ * Similar to date option, but just adds an
+ * offset to the timestamp.
+ */
+ if (handle->flags & TRACECMD_FL_RAW_TS)
+ break;
+ offset = strtoll(buf, NULL, 0);
+ handle->ts_offset += offset;
+ break;
+ case TRACECMD_OPTION_TIME_SHIFT:
+ /*
+ * long long int (8 bytes) trace session ID
+ * int (4 bytes) protocol flags.
+ * int (4 bytes) CPU count.
+ * array of size [CPU count]:
+ * [
+ * int (4 bytes) count of timestamp offsets.
+ * long long array of size [count] of times,
+ * when the offsets were calculated.
+ * long long array of size [count] of timestamp offsets.
+ * long long array of size [count] of timestamp scaling ratios.*
+ * ]
+ * array of size [CPU count]:
+ * [
+ * long long array of size [count] of timestamp scaling fraction bits.*
+ * ]*
+ */
+ if (size < 16 || (handle->flags & TRACECMD_FL_RAW_TS))
+ break;
+ handle->host.peer_trace_id = tep_read_number(handle->pevent,
+ buf, 8);
+ handle->host.flags = tep_read_number(handle->pevent,
+ buf + 8, 4);
+ ret = tsync_cpu_offsets_load(handle, buf + 12, size - 12);
+ if (ret < 0)
+ goto out;
+ tracecmd_enable_tsync(handle, true);
+ break;
+ case TRACECMD_OPTION_CPUSTAT:
+ buf[size-1] = '\n';
+ cpustats = realloc(handle->cpustats,
+ handle->cpustats_size + size + 1);
+ if (!cpustats) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ memcpy(cpustats + handle->cpustats_size, buf, size);
+ handle->cpustats_size += size;
+ cpustats[handle->cpustats_size] = 0;
+ handle->cpustats = cpustats;
+ break;
+ case TRACECMD_OPTION_BUFFER:
+ case TRACECMD_OPTION_BUFFER_TEXT:
+ ret = handle_buffer_option(handle, option, buf, size);
+ if (ret < 0)
+ goto out;
+ break;
+ case TRACECMD_OPTION_TRACECLOCK:
+ tracecmd_parse_trace_clock(handle, buf, size);
+ if (!handle->ts2secs)
+ handle->use_trace_clock = true;
+ break;
+ case TRACECMD_OPTION_UNAME:
+ handle->uname = strdup(buf);
+ break;
+ case TRACECMD_OPTION_VERSION:
+ handle->version = strdup(buf);
+ break;
+ case TRACECMD_OPTION_HOOK:
+ hook = tracecmd_create_event_hook(buf);
+ hook->next = handle->hooks;
+ handle->hooks = hook;
+ break;
+ case TRACECMD_OPTION_CPUCOUNT:
+ cpus = *(int *)buf;
+ handle->cpus = tep_read_number(handle->pevent, &cpus, 4);
+ if (handle->cpus > handle->max_cpu)
+ handle->max_cpu = handle->cpus;
+ tep_set_cpus(handle->pevent, handle->cpus);
+ break;
+ case TRACECMD_OPTION_PROCMAPS:
+ if (buf[size-1] == '\0')
+ trace_pid_map_load(handle, buf);
+ break;
+ case TRACECMD_OPTION_TRACEID:
+ if (size < 8)
+ break;
+ handle->trace_id = tep_read_number(handle->pevent,
+ buf, 8);
+ break;
+ case TRACECMD_OPTION_GUEST:
+ trace_guest_load(handle, buf, size);
+ break;
+ case TRACECMD_OPTION_TSC2NSEC:
+ if (size < 16 || (handle->flags & TRACECMD_FL_RAW_TS))
+ break;
+ handle->tsc_calc.mult = tep_read_number(handle->pevent,
+ buf, 4);
+ handle->tsc_calc.shift = tep_read_number(handle->pevent,
+ buf + 4, 4);
+ handle->tsc_calc.offset = tep_read_number(handle->pevent,
+ buf + 8, 8);
+ break;
+ case TRACECMD_OPTION_HEADER_INFO:
+ case TRACECMD_OPTION_FTRACE_EVENTS:
+ case TRACECMD_OPTION_EVENT_FORMATS:
+ case TRACECMD_OPTION_KALLSYMS:
+ case TRACECMD_OPTION_PRINTK:
+ case TRACECMD_OPTION_CMDLINES:
+ if (size < 8)
+ break;
+ section_add_or_update(handle, option, -1,
+ tep_read_number(handle->pevent, buf, 8), 0);
+ break;
+ case TRACECMD_OPTION_DONE:
+ if (compress)
+ in_uncompress_reset(handle);
+ ret = handle_option_done(handle, buf, size);
+ free(buf);
+ return ret;
+
+ default:
+ tracecmd_warning("unknown option %d", option);
+ break;
+ }
+
+ free(buf);
+
+ }
+
+ ret = 0;
+
+out:
+ if (compress)
+ in_uncompress_reset(handle);
+ return ret;
+}
+
+static int read_options_type(struct tracecmd_input *handle)
+{
+ char buf[10];
+
+ if (CHECK_READ_STATE(handle, TRACECMD_FILE_CPU_LATENCY))
+ return 0;
+
+ if (do_read_check(handle, buf, 10))
+ return -1;
+
+ /* check if this handles options */
+ if (strncmp(buf, "options", 7) == 0) {
+ if (handle_options(handle) < 0)
+ return -1;
+ handle->file_state = TRACECMD_FILE_OPTIONS;
+ if (do_read_check(handle, buf, 10))
+ return -1;
+ }
+
+ /*
+ * Check if this is a latency report or flyrecord.
+ */
+ if (strncmp(buf, "latency", 7) == 0)
+ handle->file_state = TRACECMD_FILE_CPU_LATENCY;
+ else if (strncmp(buf, "flyrecord", 9) == 0)
+ handle->file_state = TRACECMD_FILE_CPU_FLYRECORD;
+ else
+ return -1;
+
+ return 0;
+}
+
+int tracecmd_latency_data_read(struct tracecmd_input *handle, char **buf, size_t *size)
+{
+ struct cpu_zdata *zdata = &handle->latz;
+ void *data;
+ int rsize;
+ int fd = -1;
+ int id;
+
+ if (!handle || !buf || !size)
+ return -1;
+ if (handle->file_state != TRACECMD_FILE_CPU_LATENCY)
+ return -1;
+
+ if (!handle->cpu_compressed) {
+ fd = handle->fd;
+ } else if (!handle->read_zpage) {
+ if (zdata->fd < 0)
+ return -1;
+ fd = zdata->fd;
+ }
+
+ /* Read data from a file */
+ if (fd >= 0) {
+ if (!(*buf)) {
+ *size = BUFSIZ;
+ *buf = malloc(*size);
+ if (!(*buf))
+ return -1;
+ }
+ return do_read_fd(fd, *buf, *size);
+ }
+
+ /* Uncompress data in memory */
+ if (zdata->last_chunk >= zdata->count)
+ return 0;
+
+ id = zdata->last_chunk;
+ if (!*buf || *size < zdata->chunks[id].size) {
+ data = realloc(*buf, zdata->chunks[id].size);
+ if (!data)
+ return -1;
+ *buf = data;
+ *size = zdata->chunks[id].size;
+ }
+
+ if (tracecmd_uncompress_chunk(handle->compress, &zdata->chunks[id], *buf))
+ return -1;
+
+ rsize = zdata->chunks[id].size;
+ zdata->last_chunk++;
+ return rsize;
+}
+
+static int init_cpu_data(struct tracecmd_input *handle)
+{
+ enum kbuffer_long_size long_size;
+ enum kbuffer_endian endian;
+ unsigned long long max_size = 0;
+ unsigned long long pages;
+ int cpu;
+
+ /* We expect this to be flyrecord */
+ if (handle->file_state != TRACECMD_FILE_CPU_FLYRECORD)
+ return -1;
+
+ if (force_read)
+ handle->read_page = true;
+
+ if (handle->long_size == 8)
+ long_size = KBUFFER_LSIZE_8;
+ else
+ long_size = KBUFFER_LSIZE_4;
+
+ if (tep_is_file_bigendian(handle->pevent))
+ endian = KBUFFER_ENDIAN_BIG;
+ else
+ endian = KBUFFER_ENDIAN_LITTLE;
+
+ for (cpu = 0; cpu < handle->cpus; cpu++) {
+ handle->cpu_data[cpu].compress.fd = -1;
+ handle->cpu_data[cpu].kbuf = kbuffer_alloc(long_size, endian);
+ if (!handle->cpu_data[cpu].kbuf)
+ goto out_free;
+ if (tep_is_old_format(handle->pevent))
+ kbuffer_set_old_format(handle->cpu_data[cpu].kbuf);
+
+ if (handle->cpu_data[cpu].file_size > max_size)
+ max_size = handle->cpu_data[cpu].file_size;
+ }
+
+ /* Calculate about a meg of pages for buffering */
+ pages = handle->page_size ? max_size / handle->page_size : 0;
+ if (!pages)
+ pages = 1;
+ pages = normalize_size(pages);
+ handle->page_map_size = handle->page_size * pages;
+ if (handle->page_map_size < handle->page_size)
+ handle->page_map_size = handle->page_size;
+
+
+ for (cpu = 0; cpu < handle->cpus; cpu++) {
+ if (init_cpu(handle, cpu))
+ goto out_free;
+ }
+
+ return 0;
+
+ out_free:
+ for ( ; cpu >= 0; cpu--) {
+ free_page(handle, cpu);
+ kbuffer_free(handle->cpu_data[cpu].kbuf);
+ handle->cpu_data[cpu].kbuf = NULL;
+ }
+ return -1;
+}
+
+int init_latency_data(struct tracecmd_input *handle)
+{
+ unsigned long long wsize;
+ int ret;
+
+ if (!handle->cpu_compressed)
+ return 0;
+
+ if (handle->read_zpage) {
+ handle->latz.count = tracecmd_load_chunks_info(handle->compress, &handle->latz.chunks);
+ if (handle->latz.count < 0)
+ return -1;
+ } else {
+ strcpy(handle->latz.file, COMPR_TEMP_FILE);
+ handle->latz.fd = mkstemp(handle->latz.file);
+ if (handle->latz.fd < 0)
+ return -1;
+
+ ret = tracecmd_uncompress_copy_to(handle->compress, handle->latz.fd, NULL, &wsize);
+ if (ret)
+ return -1;
+
+ lseek64(handle->latz.fd, 0, SEEK_SET);
+ }
+
+ return 0;
+}
+
+static int init_buffer_cpu_data(struct tracecmd_input *handle, struct input_buffer_instance *buffer)
+{
+ unsigned long long offset;
+ unsigned long long size;
+ unsigned short id, flags;
+ int cpu;
+
+ if (handle->cpu_data)
+ return -1;
+
+ if (lseek64(handle->fd, buffer->offset, SEEK_SET) == (off_t)-1)
+ return -1;
+ if (read_section_header(handle, &id, &flags, NULL, NULL))
+ return -1;
+ if (flags & TRACECMD_SEC_FL_COMPRESS)
+ handle->cpu_compressed = true;
+ if (buffer->latency) {
+ handle->file_state = TRACECMD_FILE_CPU_LATENCY;
+ return init_latency_data(handle) == 0 ? 1 : -1;
+ }
+ handle->file_state = TRACECMD_FILE_CPU_FLYRECORD;
+ handle->cpus = buffer->cpus;
+ if (handle->max_cpu < handle->cpus)
+ handle->max_cpu = handle->cpus;
+
+ handle->cpu_data = calloc(handle->cpus, sizeof(*handle->cpu_data));
+ if (!handle->cpu_data)
+ return -1;
+
+ for (cpu = 0; cpu < handle->cpus; cpu++) {
+ handle->cpu_data[cpu].cpu = buffer->cpu_data[cpu].cpu;
+ offset = buffer->cpu_data[cpu].offset;
+ size = buffer->cpu_data[cpu].size;
+ handle->cpu_data[cpu].file_offset = offset;
+ handle->cpu_data[cpu].file_size = size;
+ if (size && (offset + size > handle->total_file_size)) {
+ /* this happens if the file got truncated */
+ printf("File possibly truncated. "
+ "Need at least %llu, but file size is %zu.\n",
+ offset + size, handle->total_file_size);
+ errno = EINVAL;
+ return -1;
+ }
+ }
+
+ return init_cpu_data(handle);
+}
+
+static int read_cpu_data(struct tracecmd_input *handle)
+{
+ unsigned long long size;
+ int cpus;
+ int cpu;
+
+ /*
+ * Check if this is a latency report or not.
+ */
+ if (handle->file_state == TRACECMD_FILE_CPU_LATENCY)
+ return 1;
+
+ /* We expect this to be flyrecord */
+ if (handle->file_state != TRACECMD_FILE_CPU_FLYRECORD)
+ return -1;
+
+ cpus = handle->cpus;
+
+ handle->cpu_data = malloc(sizeof(*handle->cpu_data) * handle->cpus);
+ if (!handle->cpu_data)
+ return -1;
+ memset(handle->cpu_data, 0, sizeof(*handle->cpu_data) * handle->cpus);
+
+ for (cpu = 0; cpu < handle->cpus; cpu++) {
+ unsigned long long offset;
+
+ handle->cpu_data[cpu].cpu = cpu;
+ read8(handle, &offset);
+ read8(handle, &size);
+ handle->cpu_data[cpu].file_offset = offset;
+ handle->cpu_data[cpu].file_size = size;
+ if (size && (offset + size > handle->total_file_size)) {
+ /* this happens if the file got truncated */
+ printf("File possibly truncated. "
+ "Need at least %llu, but file size is %zu.\n",
+ offset + size, handle->total_file_size);
+ errno = EINVAL;
+ return -1;
+ }
+ }
+
+ /*
+ * It is possible that an option changed the number of CPUs.
+ * If that happened, then there's "empty" cpu data saved for
+ * backward compatibility.
+ */
+ if (cpus < handle->cpus) {
+ unsigned long long ignore;
+ int once = 0;
+
+ read8(handle, &ignore); /* offset */
+ read8(handle, &ignore); /* size */
+ if (ignore != 0) {
+ if (!once) {
+ tracecmd_warning("ignored CPU data not zero size");
+ once++;
+ }
+ }
+ }
+
+ return init_cpu_data(handle);
+}
+
+static int read_data_and_size(struct tracecmd_input *handle,
+ char **data, unsigned long long *size)
+{
+ if (read8(handle, size) < 0)
+ return -1;
+ *data = malloc(*size + 1);
+ if (!*data)
+ return -1;
+ if (do_read_check(handle, *data, *size)) {
+ free(*data);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int read_and_parse_cmdlines(struct tracecmd_input *handle)
+{
+ struct tep_handle *pevent = handle->pevent;
+ unsigned long long size;
+ char *cmdlines;
+
+ if (CHECK_READ_STATE(handle, TRACECMD_FILE_CMD_LINES))
+ return 0;
+
+ if (!HAS_SECTIONS(handle))
+ section_add_or_update(handle, TRACECMD_OPTION_CMDLINES, 0, 0,
+ lseek64(handle->fd, 0, SEEK_CUR));
+
+
+ if (read_data_and_size(handle, &cmdlines, &size) < 0)
+ return -1;
+ cmdlines[size] = 0;
+ tep_parse_saved_cmdlines(pevent, cmdlines);
+ free(cmdlines);
+
+ handle->file_state = TRACECMD_FILE_CMD_LINES;
+
+ return 0;
+}
+
+static void extract_trace_clock(struct tracecmd_input *handle, char *line)
+{
+ char *clock = NULL;
+ char *next = NULL;
+ char *data;
+
+ data = strtok_r(line, "[]", &next);
+ sscanf(data, "%ms", &clock);
+ /* TODO: report if it fails to allocate */
+ handle->trace_clock = clock;
+
+ if (!clock)
+ return;
+
+ /* Clear usecs if raw timestamps are requested */
+ if (handle->flags & TRACECMD_FL_RAW_TS)
+ handle->flags &= ~TRACECMD_FL_IN_USECS;
+
+ /* Clear usecs if not one of the specified clocks */
+ if (strcmp(clock, "local") && strcmp(clock, "global") &&
+ strcmp(clock, "uptime") && strcmp(clock, "perf") &&
+ strncmp(clock, "mono", 4) && strcmp(clock, TSCNSEC_CLOCK) &&
+ strcmp(clock, "tai"))
+ handle->flags &= ~TRACECMD_FL_IN_USECS;
+}
+
+void tracecmd_parse_trace_clock(struct tracecmd_input *handle,
+ char *file, int size __maybe_unused)
+{
+ char *line;
+ char *next = NULL;
+
+ line = strtok_r(file, " ", &next);
+ while (line) {
+ /* current trace_clock is shown as "[local]". */
+ if (*line == '[')
+ return extract_trace_clock(handle, line);
+ line = strtok_r(NULL, " ", &next);
+ }
+}
+
+static int read_and_parse_trace_clock(struct tracecmd_input *handle,
+ struct tep_handle *pevent)
+{
+ unsigned long long size;
+ char *trace_clock;
+
+ if (read_data_and_size(handle, &trace_clock, &size) < 0)
+ return -1;
+ trace_clock[size] = 0;
+ tracecmd_parse_trace_clock(handle, trace_clock, size);
+ free(trace_clock);
+ return 0;
+}
+
+static int init_data_v6(struct tracecmd_input *handle)
+{
+ struct tep_handle *pevent = handle->pevent;
+ int ret;
+
+ ret = read_cpu_data(handle);
+ if (ret < 0)
+ return ret;
+
+ if (handle->use_trace_clock) {
+ /*
+ * There was a bug in the original setting of
+ * the trace_clock file which let it get
+ * corrupted. If it fails to read, force local
+ * clock.
+ */
+ if (read_and_parse_trace_clock(handle, pevent) < 0) {
+ char clock[] = "[local]";
+ tracecmd_warning("File has trace_clock bug, using local clock");
+ tracecmd_parse_trace_clock(handle, clock, 8);
+ }
+ }
+ return ret;
+}
+
+static int init_data(struct tracecmd_input *handle)
+{
+ return init_buffer_cpu_data(handle, &handle->top_buffer);
+}
+
+/**
+ * tracecmd_init_data - prepare reading the data from trace.dat
+ * @handle: input handle for the trace.dat file
+ *
+ * This prepares reading the data from trace.dat. This is called
+ * after tracecmd_read_headers() and before tracecmd_read_data().
+ */
+int tracecmd_init_data(struct tracecmd_input *handle)
+{
+ int ret;
+
+ if (!HAS_SECTIONS(handle))
+ ret = init_data_v6(handle);
+ else
+ ret = init_data(handle);
+ tracecmd_blk_hack(handle);
+
+ return ret;
+}
+
+/**
+ * tracecmd_make_pipe - Have the handle read a pipe instead of a file
+ * @handle: input handle to read from a pipe
+ * @cpu: the cpu that the pipe represents
+ * @fd: the read end of the pipe
+ * @cpus: the total number of cpus for this handle
+ *
+ * In order to stream data from the binary trace files and produce
+ * output or analyze the data, a tracecmd_input descriptor needs to
+ * be created, and then converted into a form that can act on a
+ * pipe.
+ *
+ * Note, there are limitations to what this descriptor can do.
+ * Most notibly, it can not read backwards. Once a page is read
+ * it can not be read at a later time (except if a record is attached
+ * to it and is holding the page ref).
+ *
+ * It is expected that the handle has already been created and
+ * tracecmd_read_headers() has run on it.
+ */
+int tracecmd_make_pipe(struct tracecmd_input *handle, int cpu, int fd, int cpus)
+{
+ enum kbuffer_long_size long_size;
+ enum kbuffer_endian endian;
+
+ handle->read_page = true;
+ handle->use_pipe = true;
+
+ if (!handle->cpus) {
+ handle->cpus = cpus;
+ handle->cpu_data = malloc(sizeof(*handle->cpu_data) * handle->cpus);
+ if (!handle->cpu_data)
+ return -1;
+ }
+
+ if (cpu >= handle->cpus)
+ return -1;
+
+
+ if (handle->long_size == 8)
+ long_size = KBUFFER_LSIZE_8;
+ else
+ long_size = KBUFFER_LSIZE_4;
+
+ if (tep_is_file_bigendian(handle->pevent))
+ endian = KBUFFER_ENDIAN_BIG;
+ else
+ endian = KBUFFER_ENDIAN_LITTLE;
+
+ memset(&handle->cpu_data[cpu], 0, sizeof(handle->cpu_data[cpu]));
+ handle->cpu_data[cpu].pipe_fd = fd;
+ handle->cpu_data[cpu].cpu = cpu;
+
+ handle->cpu_data[cpu].kbuf = kbuffer_alloc(long_size, endian);
+ if (!handle->cpu_data[cpu].kbuf)
+ return -1;
+ if (tep_is_old_format(handle->pevent))
+ kbuffer_set_old_format(handle->cpu_data[cpu].kbuf);
+
+ handle->cpu_data[cpu].file_offset = 0;
+ handle->cpu_data[cpu].file_size = -1;
+
+ init_cpu(handle, cpu);
+
+ return 0;
+}
+
+/**
+ * tracecmd_print_events - print the events that are stored in trace.dat
+ * @handle: input handle for the trace.dat file
+ * @regex: regex of events to print (NULL is all events)
+ *
+ * This is a debugging routine to print out the events that
+ * are stored in a given trace.dat file.
+ */
+void tracecmd_print_events(struct tracecmd_input *handle, const char *regex)
+{
+ if (!regex)
+ regex = ".*";
+
+ if (!HAS_SECTIONS(handle))
+ read_headers_v6(handle, TRACECMD_FILE_ALL_EVENTS, regex);
+
+ read_headers(handle, regex);
+}
+
+/* Show the cpu data stats */
+static void show_cpu_stats(struct tracecmd_input *handle)
+{
+ struct cpu_data *cpu_data;
+ int i;
+
+ for (i = 0; i < handle->cpus; i++) {
+ cpu_data = &handle->cpu_data[i];
+ printf("CPU%d data recorded at offset=0x%llx\n",
+ i, cpu_data->file_offset);
+ printf(" %lld bytes in size\n", cpu_data->file_size);
+ }
+}
+
+/**
+ * tracecmd_print_stats - prints the stats recorded in the options.
+ * @handle: input handle for the trace.dat file
+ *
+ * Looks for the option TRACECMD_OPTION_CPUSTAT and prints out what's
+ * stored there, if it is found. Otherwise it prints that none were found.
+ */
+void tracecmd_print_stats(struct tracecmd_input *handle)
+{
+ if (handle->cpustats)
+ printf("%s\n", handle->cpustats);
+ else
+ printf(" No stats in this file\n");
+
+ show_cpu_stats(handle);
+}
+
+/**
+ * tracecmd_print_uname - prints the recorded uname if it was recorded
+ * @handle: input handle for the trace.dat file
+ *
+ * Looks for the option TRACECMD_OPTION_UNAME and prints out what's
+ * stored there, if it is found. Otherwise it prints that none were found.
+ */
+void tracecmd_print_uname(struct tracecmd_input *handle)
+{
+ if (handle->uname)
+ printf("%s\n", handle->uname);
+ else
+ printf(" uname was not recorded in this file\n");
+}
+
+/**
+ * tracecmd_print_uname - prints the recorded uname if it was recorded
+ * @handle: input handle for the trace.dat file
+ *
+ * Looks for the option TRACECMD_OPTION_VERSION and prints out what's
+ * stored there, if it is found. Otherwise it prints that none were found.
+ */
+void tracecmd_print_version(struct tracecmd_input *handle)
+{
+ if (handle->version)
+ printf("%s\n", handle->version);
+ else
+ printf(" version was not recorded in this file\n");
+}
+
+/**
+ * tracecmd_hooks - return the event hooks that were used in record
+ * @handle: input handle for the trace.dat file
+ *
+ * If trace-cmd record used -H to save hooks, they are parsed and
+ * presented as hooks here.
+ *
+ * Returns the hook list (do not free it, they are freed on close)
+ */
+struct hook_list *tracecmd_hooks(struct tracecmd_input *handle)
+{
+ return handle->hooks;
+}
+
+static int init_metadata_strings(struct tracecmd_input *handle, int size)
+{
+ char *tmp;
+
+ tmp = realloc(handle->strings, handle->strings_size + size);
+ if (!tmp)
+ return -1;
+
+ handle->strings = tmp;
+ if (do_read_check(handle, handle->strings + handle->strings_size, size))
+ return -1;
+
+ handle->strings_size += size;
+
+ return 0;
+}
+
+static int read_metadata_strings(struct tracecmd_input *handle)
+{
+ unsigned short flags;
+ int found = 0;
+ unsigned short id;
+ unsigned int csize, rsize;
+ unsigned long long size;
+ off64_t offset;
+
+ offset = lseek64(handle->fd, 0, SEEK_CUR);
+ do {
+ if (read_section_header(handle, &id, &flags, &size, NULL))
+ break;
+ if (id == TRACECMD_OPTION_STRINGS) {
+ found++;
+ if ((flags & TRACECMD_SEC_FL_COMPRESS)) {
+ read4(handle, &csize);
+ read4(handle, &rsize);
+ do_lseek(handle, -8, SEEK_CUR);
+ if (in_uncompress_block(handle))
+ break;
+ } else {
+ rsize = size;
+ }
+ init_metadata_strings(handle, rsize);
+ if (flags & TRACECMD_SEC_FL_COMPRESS)
+ in_uncompress_reset(handle);
+ } else {
+ if (lseek64(handle->fd, size, SEEK_CUR) == (off_t)-1)
+ break;
+ }
+ } while (1);
+
+ if (lseek64(handle->fd, offset, SEEK_SET) == (off_t)-1)
+ return -1;
+
+ return found ? 0 : -1;
+}
+
+/**
+ * tracecmd_alloc_fd - create a tracecmd_input handle from a file descriptor
+ * @fd: the file descriptor for the trace.dat file
+ * @flags: bitmask of enum tracecmd_open_flags
+ *
+ * Allocate a tracecmd_input handle from a file descriptor and open the
+ * file. This tests if the file is of trace-cmd format and allocates
+ * a parse event descriptor.
+ *
+ * The returned pointer is not ready to be read yet. A tracecmd_read_headers()
+ * and tracecmd_init_data() still need to be called on the descriptor.
+ *
+ * Unless you know what you are doing with this, you want to use
+ * tracecmd_open_fd() instead.
+ */
+struct tracecmd_input *tracecmd_alloc_fd(int fd, int flags)
+{
+ struct tracecmd_input *handle;
+ char test[] = TRACECMD_MAGIC;
+ unsigned int page_size;
+ size_t offset;
+ char *version = NULL;
+ char *zver = NULL;
+ char *zname = NULL;
+ char buf[BUFSIZ];
+ unsigned long ver;
+
+ handle = malloc(sizeof(*handle));
+ if (!handle)
+ return NULL;
+ memset(handle, 0, sizeof(*handle));
+
+ handle->fd = fd;
+ handle->ref = 1;
+ handle->latz.fd = -1;
+ /* By default, use usecs, unless told otherwise */
+ handle->flags |= TRACECMD_FL_IN_USECS;
+
+#ifdef INMEMORY_DECOMPRESS
+ handle->read_zpage = 1;
+#endif
+ if (do_read_check(handle, buf, 3))
+ goto failed_read;
+
+ if (memcmp(buf, test, 3) != 0)
+ goto failed_read;
+
+ if (do_read_check(handle, buf, 7))
+ goto failed_read;
+ if (memcmp(buf, "tracing", 7) != 0)
+ goto failed_read;
+
+ version = read_string(handle);
+ if (!version)
+ goto failed_read;
+ tracecmd_info("version = %s", version);
+ ver = strtol(version, NULL, 10);
+ if (!ver && errno)
+ goto failed_read;
+ if (!tracecmd_is_version_supported(ver)) {
+ tracecmd_warning("Unsupported file version %lu", ver);
+ goto failed_read;
+ }
+ handle->file_version = ver;
+ free(version);
+ version = NULL;
+
+ if (handle->file_version >= FILE_VERSION_SECTIONS)
+ handle->flags |= TRACECMD_FL_SECTIONED;
+ if (handle->file_version >= FILE_VERSION_COMPRESSION)
+ handle->flags |= TRACECMD_FL_COMPRESSION;
+
+ if (do_read_check(handle, buf, 1))
+ goto failed_read;
+
+ handle->pevent = tep_alloc();
+ if (!handle->pevent)
+ goto failed_read;
+
+ /* register default ftrace functions first */
+ if (!(flags & TRACECMD_FL_LOAD_NO_PLUGINS) &&
+ !(flags & TRACECMD_FL_LOAD_NO_SYSTEM_PLUGINS))
+ tracecmd_ftrace_overrides(handle, &handle->finfo);
+
+ handle->plugin_list = trace_load_plugins(handle->pevent, flags);
+
+ tep_set_file_bigendian(handle->pevent, buf[0]);
+ tep_set_local_bigendian(handle->pevent, tracecmd_host_bigendian());
+
+ do_read_check(handle, buf, 1);
+ handle->long_size = buf[0];
+ tep_set_long_size(handle->pevent, handle->long_size);
+
+ read4(handle, &page_size);
+ handle->page_size = page_size;
+ handle->next_offset = page_size;
+
+ offset = lseek64(handle->fd, 0, SEEK_CUR);
+ handle->total_file_size = lseek64(handle->fd, 0, SEEK_END);
+ lseek64(handle->fd, offset, SEEK_SET);
+
+ if (HAS_COMPRESSION(handle)) {
+ zname = read_string(handle);
+ if (!zname)
+ goto failed_read;
+
+ zver = read_string(handle);
+ if (!zver)
+ goto failed_read;
+
+ if (strcmp(zname, "none") == 0) {
+ handle->read_zpage = false;
+ handle->flags &= ~TRACECMD_FL_COMPRESSION;
+ } else {
+ handle->compress = tracecmd_compress_alloc(zname, zver,
+ handle->fd,
+ handle->pevent, NULL);
+ if (!handle->compress) {
+ tracecmd_warning("Unsupported file compression %s %s", zname, zver);
+ goto failed_read;
+ }
+ }
+
+ free(zname);
+ free(zver);
+ }
+
+ if (HAS_SECTIONS(handle)) {
+ if (read8(handle, &(handle->options_start))) {
+ tracecmd_warning("Filed to read the offset of the first option section");
+ goto failed_read;
+ }
+ read_metadata_strings(handle);
+ }
+
+ handle->file_state = TRACECMD_FILE_INIT;
+
+ return handle;
+
+ failed_read:
+ free(version);
+ free(zname);
+ free(zver);
+ free(handle);
+
+ return NULL;
+}
+
+/**
+ * tracecmd_alloc_fd - create a tracecmd_input handle from a file name
+ * @file: the file name of the file that is of tracecmd data type.
+ * @flags: bitmask of enum tracecmd_open_flags
+ *
+ * Allocate a tracecmd_input handle from a given file name and open the
+ * file. This tests if the file is of trace-cmd format and allocates
+ * a parse event descriptor.
+ *
+ * The returned pointer is not ready to be read yet. A tracecmd_read_headers()
+ * and tracecmd_init_data() still need to be called on the descriptor.
+ *
+ * Unless you know what you are doing with this, you want to use
+ * tracecmd_open() instead.
+ */
+struct tracecmd_input *tracecmd_alloc(const char *file, int flags)
+{
+ int fd;
+
+ fd = open(file, O_RDONLY);
+ if (fd < 0)
+ return NULL;
+
+ return tracecmd_alloc_fd(fd, flags);
+}
+
+/**
+ * tracecmd_open_fd - create a tracecmd_handle from the trace.dat file descriptor
+ * @fd: the file descriptor for the trace.dat file
+ * @flags: bitmask of enum tracecmd_open_flags
+ */
+struct tracecmd_input *tracecmd_open_fd(int fd, int flags)
+{
+ struct tracecmd_input *handle;
+ int ret;
+
+ handle = tracecmd_alloc_fd(fd, flags);
+ if (!handle)
+ return NULL;
+
+ if (tracecmd_read_headers(handle, 0) < 0)
+ goto fail;
+
+ if ((ret = tracecmd_init_data(handle)) < 0)
+ goto fail;
+
+ return handle;
+
+fail:
+ tracecmd_close(handle);
+ return NULL;
+}
+
+/**
+ * tracecmd_open - create a tracecmd_handle from a given file
+ * @file: the file name of the file that is of tracecmd data type.
+ * @flags: bitmask of enum tracecmd_open_flags
+ */
+struct tracecmd_input *tracecmd_open(const char *file, int flags)
+{
+ int fd;
+
+ fd = open(file, O_RDONLY);
+ if (fd < 0)
+ return NULL;
+
+ return tracecmd_open_fd(fd, flags);
+}
+
+/**
+ * tracecmd_open_head - create a tracecmd_handle from a given file, read
+ * and parse only the trace headers from the file
+ * @file: the file name of the file that is of tracecmd data type.
+ * @flags: bitmask of enum tracecmd_open_flags
+ */
+struct tracecmd_input *tracecmd_open_head(const char *file, int flags)
+{
+ struct tracecmd_input *handle;
+ int fd;
+
+ fd = open(file, O_RDONLY);
+ if (fd < 0)
+ return NULL;
+
+ handle = tracecmd_alloc_fd(fd, flags);
+ if (!handle)
+ return NULL;
+
+ if (tracecmd_read_headers(handle, 0) < 0)
+ goto fail;
+
+ return handle;
+
+fail:
+ tracecmd_close(handle);
+ return NULL;
+}
+
+/**
+ * tracecmd_ref - add a reference to the handle
+ * @handle: input handle for the trace.dat file
+ *
+ * Some applications may share a handle between parts of
+ * the application. Let those parts add reference counters
+ * to the handle, and the last one to close it will free it.
+ */
+void tracecmd_ref(struct tracecmd_input *handle)
+{
+ if (!handle)
+ return;
+
+ handle->ref++;
+}
+
+static inline void free_buffer(struct input_buffer_instance *buf)
+{
+ free(buf->name);
+ free(buf->clock);
+ free(buf->cpu_data);
+}
+
+/**
+ * tracecmd_close - close and free the trace.dat handle
+ * @handle: input handle for the trace.dat file
+ *
+ * Close the file descriptor of the handle and frees
+ * the resources allocated by the handle.
+ */
+void tracecmd_close(struct tracecmd_input *handle)
+{
+ struct zchunk_cache *cache;
+ struct file_section *del_sec;
+ struct cpu_data *cpu_data;
+ struct page_map *page_map, *n;
+ int cpu;
+ int i;
+
+ if (!handle)
+ return;
+
+ if (handle->ref <= 0) {
+ tracecmd_warning("tracecmd: bad ref count on handle");
+ return;
+ }
+
+ if (--handle->ref)
+ return;
+
+ for (cpu = 0; cpu < handle->cpus; cpu++) {
+ /* The tracecmd_peek_data may have cached a record */
+ free_next(handle, cpu);
+ free_page(handle, cpu);
+ if (handle->cpu_data) {
+ cpu_data = &handle->cpu_data[cpu];
+ if (cpu_data->kbuf) {
+ kbuffer_free(cpu_data->kbuf);
+ if (cpu_data->page_map)
+ free_page_map(cpu_data->page_map);
+
+ if (cpu_data->page_cnt)
+ tracecmd_warning("%d pages still allocated on cpu %d%s",
+ cpu_data->page_cnt, cpu,
+ show_records(cpu_data->pages,
+ cpu_data->nr_pages));
+ free(cpu_data->pages);
+ }
+ if (cpu_data->compress.fd >= 0) {
+ close(cpu_data->compress.fd);
+ unlink(cpu_data->compress.file);
+ }
+ while (!list_empty(&cpu_data->compress.cache)) {
+ cache = container_of(cpu_data->compress.cache.next,
+ struct zchunk_cache, list);
+ list_del(&cache->list);
+ free(cache->map);
+ free(cache);
+ }
+ free(cpu_data->compress.chunks);
+ list_for_each_entry_safe(page_map, n, &cpu_data->page_maps, list) {
+ list_del(&page_map->list);
+ free(page_map);
+ }
+ }
+ }
+
+ free(handle->cpustats);
+ free(handle->cpu_data);
+ free(handle->uname);
+ free(handle->trace_clock);
+ free(handle->strings);
+ free(handle->version);
+ close(handle->fd);
+ free(handle->latz.chunks);
+ if (handle->latz.fd >= 0) {
+ close(handle->latz.fd);
+ unlink(handle->latz.file);
+ }
+ while (handle->sections) {
+ del_sec = handle->sections;
+ handle->sections = handle->sections->next;
+ free(del_sec);
+ }
+
+ free_buffer(&handle->top_buffer);
+ for (i = 0; i < handle->nr_buffers; i++)
+ free_buffer(&handle->buffers[i]);
+ free(handle->buffers);
+
+ tracecmd_free_hooks(handle->hooks);
+ handle->hooks = NULL;
+
+ trace_pid_map_free(handle->pid_maps);
+ handle->pid_maps = NULL;
+
+ trace_tsync_offset_free(&handle->host);
+ trace_guests_free(handle);
+
+ if (handle->flags & TRACECMD_FL_BUFFER_INSTANCE)
+ tracecmd_close(handle->parent);
+ else {
+ /* Only main handle frees plugins, pevent and compression context */
+ tracecmd_compress_destroy(handle->compress);
+ tep_unload_plugins(handle->plugin_list, handle->pevent);
+ tep_free(handle->pevent);
+ }
+ free(handle);
+}
+
+static int read_copy_size8(struct tracecmd_input *in_handle,
+ struct tracecmd_output *out_handle, unsigned long long *size)
+{
+ /* read size */
+ if (do_read_check(in_handle, size, 8))
+ return -1;
+
+ if (do_write_check(out_handle, size, 8))
+ return -1;
+
+ *size = tep_read_number(in_handle->pevent, size, 8);
+ return 0;
+}
+
+static int read_copy_size4(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle,
+ unsigned int *size)
+{
+ /* read size */
+ if (do_read_check(in_handle, size, 4))
+ return -1;
+
+ if (do_write_check(out_handle, size, 4))
+ return -1;
+
+ *size = tep_read_number(in_handle->pevent, size, 4);
+ return 0;
+}
+
+static int read_copy_data(struct tracecmd_input *in_handle,
+ unsigned long long size,
+ struct tracecmd_output *out_handle)
+{
+ char *buf;
+
+ buf = malloc(size);
+ if (!buf)
+ return -1;
+ if (do_read_check(in_handle, buf, size))
+ goto failed_read;
+
+ if (do_write_check(out_handle, buf, size))
+ goto failed_read;
+
+ free(buf);
+
+ return 0;
+
+ failed_read:
+ free(buf);
+ return -1;
+}
+
+
+static bool check_in_state(struct tracecmd_input *handle, int new_state)
+{
+ return check_file_state(handle->file_version, handle->file_state, new_state);
+}
+
+static int copy_header_files(struct tracecmd_input *in_handle,
+ struct tracecmd_output *out_handle)
+{
+ bool compress = out_check_compression(out_handle);
+ struct file_section *sec;
+ unsigned long long offset;
+ unsigned long long size;
+
+ if (!check_in_state(in_handle, TRACECMD_FILE_HEADERS) ||
+ !check_out_state(out_handle, TRACECMD_FILE_HEADERS))
+ return -1;
+
+ sec = section_open(in_handle, TRACECMD_OPTION_HEADER_INFO);
+ if (!sec)
+ return -1;
+
+ offset = out_write_section_header(out_handle, TRACECMD_OPTION_HEADER_INFO,
+ "headers", TRACECMD_SEC_FL_COMPRESS, true);
+ out_compression_start(out_handle, compress);
+
+ /* "header_page" */
+ if (read_copy_data(in_handle, 12, out_handle) < 0)
+ goto error;
+
+ if (read_copy_size8(in_handle, out_handle, &size) < 0)
+ goto error;
+
+ if (read_copy_data(in_handle, size, out_handle) < 0)
+ goto error;
+
+ /* "header_event" */
+ if (read_copy_data(in_handle, 13, out_handle) < 0)
+ goto error;
+
+ if (read_copy_size8(in_handle, out_handle, &size) < 0)
+ goto error;
+
+ if (read_copy_data(in_handle, size, out_handle) < 0)
+ goto error;
+
+ in_handle->file_state = TRACECMD_FILE_HEADERS;
+ if (out_compression_end(out_handle, compress))
+ goto error;
+
+ out_set_file_state(out_handle, in_handle->file_state);
+ section_close(in_handle, sec);
+
+ if (out_update_section_header(out_handle, offset))
+ goto error;
+
+ return 0;
+error:
+ out_compression_reset(out_handle, compress);
+ section_close(in_handle, sec);
+ return -1;
+}
+
+static int copy_ftrace_files(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle)
+{
+ bool compress = out_check_compression(out_handle);
+ struct file_section *sec;
+ unsigned long long offset;
+ unsigned long long size;
+ unsigned int count;
+ unsigned int i;
+
+ if (!check_in_state(in_handle, TRACECMD_FILE_FTRACE_EVENTS) ||
+ !check_out_state(out_handle, TRACECMD_FILE_FTRACE_EVENTS))
+ return -1;
+
+ sec = section_open(in_handle, TRACECMD_OPTION_FTRACE_EVENTS);
+ if (!sec)
+ return -1;
+ offset = out_write_section_header(out_handle, TRACECMD_OPTION_FTRACE_EVENTS,
+ "ftrace events", TRACECMD_SEC_FL_COMPRESS, true);
+
+ out_compression_start(out_handle, compress);
+
+ if (read_copy_size4(in_handle, out_handle, &count) < 0)
+ goto error;
+
+ for (i = 0; i < count; i++) {
+
+ if (read_copy_size8(in_handle, out_handle, &size) < 0)
+ goto error;
+
+ if (read_copy_data(in_handle, size, out_handle) < 0)
+ goto error;
+ }
+
+ in_handle->file_state = TRACECMD_FILE_FTRACE_EVENTS;
+ if (out_compression_end(out_handle, compress))
+ goto error;
+
+ out_set_file_state(out_handle, in_handle->file_state);
+
+ section_close(in_handle, sec);
+
+ if (out_update_section_header(out_handle, offset))
+ goto error;
+
+ return 0;
+error:
+ out_compression_reset(out_handle, compress);
+ section_close(in_handle, sec);
+ return -1;
+}
+
+static int copy_event_files(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle)
+{
+ bool compress = out_check_compression(out_handle);
+ struct file_section *sec;
+ unsigned long long offset;
+ unsigned long long size;
+ char *system;
+ unsigned int systems;
+ unsigned int count;
+ unsigned int i,x;
+
+ if (!check_in_state(in_handle, TRACECMD_FILE_ALL_EVENTS) ||
+ !check_out_state(out_handle, TRACECMD_FILE_ALL_EVENTS))
+ return -1;
+
+ sec = section_open(in_handle, TRACECMD_OPTION_EVENT_FORMATS);
+ if (!sec)
+ return -1;
+ offset = out_write_section_header(out_handle, TRACECMD_OPTION_EVENT_FORMATS,
+ "events format", TRACECMD_SEC_FL_COMPRESS, true);
+
+ out_compression_start(out_handle, compress);
+
+ if (read_copy_size4(in_handle, out_handle, &systems) < 0)
+ goto error;
+
+ for (i = 0; i < systems; i++) {
+ system = read_string(in_handle);
+ if (!system)
+ goto error;
+ if (do_write_check(out_handle, system, strlen(system) + 1)) {
+ free(system);
+ goto error;
+ }
+ free(system);
+
+ if (read_copy_size4(in_handle, out_handle, &count) < 0)
+ goto error;
+
+ for (x=0; x < count; x++) {
+ if (read_copy_size8(in_handle, out_handle, &size) < 0)
+ goto error;
+
+ if (read_copy_data(in_handle, size, out_handle) < 0)
+ goto error;
+ }
+ }
+
+ in_handle->file_state = TRACECMD_FILE_ALL_EVENTS;
+ if (out_compression_end(out_handle, compress))
+ goto error;
+
+ out_set_file_state(out_handle, in_handle->file_state);
+
+ section_close(in_handle, sec);
+
+ if (out_update_section_header(out_handle, offset))
+ goto error;
+
+ return 0;
+error:
+ out_compression_reset(out_handle, compress);
+ section_close(in_handle, sec);
+ return -1;
+}
+
+static int copy_proc_kallsyms(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle)
+{
+ bool compress = out_check_compression(out_handle);
+ struct file_section *sec;
+ unsigned long long offset;
+ unsigned int size;
+
+ if (!check_in_state(in_handle, TRACECMD_FILE_KALLSYMS) ||
+ !check_out_state(out_handle, TRACECMD_FILE_KALLSYMS))
+ return -1;
+
+ sec = section_open(in_handle, TRACECMD_OPTION_KALLSYMS);
+ if (!sec)
+ return -1;
+ offset = out_write_section_header(out_handle, TRACECMD_OPTION_KALLSYMS,
+ "kallsyms", TRACECMD_SEC_FL_COMPRESS, true);
+
+ out_compression_start(out_handle, compress);
+ if (read_copy_size4(in_handle, out_handle, &size) < 0)
+ goto error;
+
+ if (!size)
+ goto out; /* OK? */
+
+ if (read_copy_data(in_handle, size, out_handle) < 0)
+ goto error;
+out:
+ in_handle->file_state = TRACECMD_FILE_KALLSYMS;
+ if (out_compression_end(out_handle, compress))
+ goto error;
+
+ out_set_file_state(out_handle, in_handle->file_state);
+
+ section_close(in_handle, sec);
+
+ if (out_update_section_header(out_handle, offset))
+ goto error;
+
+ return 0;
+error:
+ out_compression_reset(out_handle, compress);
+ section_close(in_handle, sec);
+ return -1;
+}
+
+static int copy_ftrace_printk(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle)
+{
+ bool compress = out_check_compression(out_handle);
+ struct file_section *sec;
+ unsigned long long offset;
+ unsigned int size;
+
+ if (!check_in_state(in_handle, TRACECMD_FILE_PRINTK) ||
+ !check_out_state(out_handle, TRACECMD_FILE_PRINTK))
+ return -1;
+
+ sec = section_open(in_handle, TRACECMD_OPTION_PRINTK);
+ if (!sec)
+ return -1;
+
+ offset = out_write_section_header(out_handle, TRACECMD_OPTION_PRINTK,
+ "printk", TRACECMD_SEC_FL_COMPRESS, true);
+
+ out_compression_start(out_handle, compress);
+
+ if (read_copy_size4(in_handle, out_handle, &size) < 0)
+ goto error;
+
+ if (!size)
+ goto out; /* OK? */
+
+ if (read_copy_data(in_handle, size, out_handle) < 0)
+ goto error;
+
+out:
+ in_handle->file_state = TRACECMD_FILE_PRINTK;
+ if (out_compression_end(out_handle, compress))
+ goto error;
+
+ out_set_file_state(out_handle, in_handle->file_state);
+
+ section_close(in_handle, sec);
+
+ if (out_update_section_header(out_handle, offset))
+ goto error;
+
+ return 0;
+error:
+ out_compression_reset(out_handle, compress);
+ section_close(in_handle, sec);
+ return -1;
+}
+
+static int copy_command_lines(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle)
+{
+ bool compress = out_check_compression(out_handle);
+ struct file_section *sec;
+ unsigned long long offset;
+ unsigned long long size;
+
+ if (!check_in_state(in_handle, TRACECMD_FILE_CMD_LINES) ||
+ !check_out_state(out_handle, TRACECMD_FILE_CMD_LINES))
+ return -1;
+
+ sec = section_open(in_handle, TRACECMD_OPTION_CMDLINES);
+ if (!sec)
+ return -1;
+ offset = out_write_section_header(out_handle, TRACECMD_OPTION_CMDLINES,
+ "command lines", TRACECMD_SEC_FL_COMPRESS, true);
+
+ out_compression_start(out_handle, compress);
+
+ if (read_copy_size8(in_handle, out_handle, &size) < 0)
+ goto error;
+
+ if (!size)
+ goto out; /* OK? */
+
+ if (read_copy_data(in_handle, size, out_handle) < 0)
+ goto error;
+
+out:
+ in_handle->file_state = TRACECMD_FILE_CMD_LINES;
+ if (out_compression_end(out_handle, compress))
+ goto error;
+
+ out_set_file_state(out_handle, in_handle->file_state);
+
+ section_close(in_handle, sec);
+
+ if (out_update_section_header(out_handle, offset))
+ goto error;
+
+ return 0;
+error:
+ out_compression_reset(out_handle, compress);
+ section_close(in_handle, sec);
+ return -1;
+}
+
+static int copy_cpu_count(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle)
+{
+ unsigned int cpus;
+
+ if (!check_in_state(in_handle, TRACECMD_FILE_CPU_COUNT) ||
+ !check_out_state(out_handle, TRACECMD_FILE_CPU_COUNT))
+ return -1;
+
+ if (!HAS_SECTIONS(in_handle)) {
+ if (read4(in_handle, &cpus))
+ return -1;
+ } else {
+ cpus = in_handle->max_cpu;
+ }
+
+ if (tracecmd_get_out_file_version(out_handle) < FILE_VERSION_SECTIONS) {
+ cpus = tep_read_number(in_handle->pevent, &cpus, 4);
+ if (do_write_check(out_handle, &cpus, 4))
+ return -1;
+ } else {
+ tracecmd_add_option(out_handle, TRACECMD_OPTION_CPUCOUNT, sizeof(int), &cpus);
+ }
+
+ in_handle->file_state = TRACECMD_FILE_CPU_COUNT;
+ out_set_file_state(out_handle, in_handle->file_state);
+
+ return 0;
+}
+
+/**
+ * tracecmd_copy_headers - Copy headers from a tracecmd_input handle to a file descriptor
+ * @in_handle: input handle for the trace.dat file to copy from.
+ * @out_handle: output handle to the trace.dat file to copy to.
+ * @start_state: The file state to start copying from (zero for the beginnig)
+ * @end_state: The file state to stop at (zero for up to cmdlines)
+ *
+ * This is used to copy trace header data of a trace.dat file to a
+ * file descriptor. Using @start_state and @end_state it may be used
+ * multiple times against the input handle.
+ *
+ * NOTE: The input handle is also modified, and ends at the end
+ * state as well.
+ */
+int tracecmd_copy_headers(struct tracecmd_input *in_handle,
+ struct tracecmd_output *out_handle,
+ enum tracecmd_file_states start_state,
+ enum tracecmd_file_states end_state)
+{
+ struct file_section *sec = NULL;
+ int ret;
+
+ if (!start_state)
+ start_state = TRACECMD_FILE_HEADERS;
+ if (!end_state)
+ end_state = TRACECMD_FILE_CMD_LINES;
+
+ if (start_state > end_state)
+ return -1;
+
+ if (end_state < TRACECMD_FILE_HEADERS)
+ return 0;
+
+ if (in_handle->file_state >= start_state) {
+ /* Set the handle to just before the start state */
+ sec = section_open(in_handle, TRACECMD_OPTION_HEADER_INFO);
+ if (!sec)
+ return -1;
+ /* Now that the file handle has moved, change its state */
+ in_handle->file_state = TRACECMD_FILE_INIT;
+ }
+
+ /* Try to bring the input up to the start state - 1 */
+ ret = tracecmd_read_headers(in_handle, start_state - 1);
+ if (sec)
+ section_close(in_handle, sec);
+ if (ret < 0)
+ goto out;
+
+ switch (start_state) {
+ case TRACECMD_FILE_HEADERS:
+ ret = copy_header_files(in_handle, out_handle);
+ if (ret < 0)
+ goto out;
+
+ /* fallthrough */
+ case TRACECMD_FILE_FTRACE_EVENTS:
+ /* handle's state is now updating with the copies */
+ if (end_state <= in_handle->file_state)
+ return 0;
+
+ ret = copy_ftrace_files(in_handle, out_handle);
+ if (ret < 0)
+ goto out;
+
+ /* fallthrough */
+ case TRACECMD_FILE_ALL_EVENTS:
+ if (end_state <= in_handle->file_state)
+ return 0;
+
+ ret = copy_event_files(in_handle, out_handle);
+ if (ret < 0)
+ goto out;
+
+ /* fallthrough */
+ case TRACECMD_FILE_KALLSYMS:
+ if (end_state <= in_handle->file_state)
+ return 0;
+
+ ret = copy_proc_kallsyms(in_handle, out_handle);
+ if (ret < 0)
+ goto out;
+
+ /* fallthrough */
+ case TRACECMD_FILE_PRINTK:
+ if (end_state <= in_handle->file_state)
+ return 0;
+
+ ret = copy_ftrace_printk(in_handle, out_handle);
+ if (ret < 0)
+ goto out;
+
+ /* fallthrough */
+ case TRACECMD_FILE_CMD_LINES:
+ if (end_state <= in_handle->file_state)
+ return 0;
+
+ ret = copy_command_lines(in_handle, out_handle);
+ if (ret < 0)
+ goto out;
+
+ /* fallthrough */
+ case TRACECMD_FILE_CPU_COUNT:
+ if (end_state <= in_handle->file_state)
+ return 0;
+
+ ret = copy_cpu_count(in_handle, out_handle);
+ if (ret < 0)
+ goto out;
+
+ /* fallthrough */
+ default:
+ break;
+ }
+
+ out:
+ return ret < 0 ? -1 : 0;
+}
+
+int tracecmd_copy_buffer_descr(struct tracecmd_input *in_handle,
+ struct tracecmd_output *out_handle)
+{
+ int i;
+
+ if (tracecmd_get_out_file_version(out_handle) >= FILE_VERSION_SECTIONS)
+ return 0;
+
+ for (i = 0; i < in_handle->nr_buffers; i++)
+ tracecmd_add_buffer_info(out_handle, in_handle->buffers[i].name, 0);
+
+ return tracecmd_write_buffer_info(out_handle);
+}
+
+static int copy_options_recursive(struct tracecmd_input *in_handle,
+ struct tracecmd_output *out_handle)
+{
+ unsigned short id, flags = 0;
+ unsigned short option, en2;
+ unsigned long long next;
+ unsigned int size, en4;
+ bool skip;
+
+ for (;;) {
+ if (do_read_check(in_handle, &option, 2))
+ return -1;
+
+ en2 = tep_read_number(in_handle->pevent, &option, 2);
+
+ if (en2 == TRACECMD_OPTION_DONE && !HAS_SECTIONS(in_handle))
+ return 0;
+
+ /* next 4 bytes is the size of the option */
+ if (do_read_check(in_handle, &size, 4))
+ return -1;
+
+ en4 = tep_read_number(in_handle->pevent, &size, 4);
+ if (en2 == TRACECMD_OPTION_DONE) {
+ /* option done v7 */
+ if (en4 < 8)
+ return -1;
+
+ if (read8(in_handle, &next))
+ return -1;
+
+ if (!next)
+ break;
+
+ if (do_lseek(in_handle, next, SEEK_SET) == (off64_t)-1)
+ return -1;
+
+ if (read_section_header(in_handle, &id, &flags, NULL, NULL))
+ return -1;
+
+ if (id != TRACECMD_OPTION_DONE)
+ return -1;
+
+ if (flags & TRACECMD_SEC_FL_COMPRESS && in_uncompress_block(in_handle))
+ return -1;
+
+ return copy_options_recursive(in_handle, out_handle);
+ }
+ /* Do not copy these, as they have file specific offsets */
+ switch (en2) {
+ case TRACECMD_OPTION_BUFFER:
+ case TRACECMD_OPTION_BUFFER_TEXT:
+ case TRACECMD_OPTION_HEADER_INFO:
+ case TRACECMD_OPTION_FTRACE_EVENTS:
+ case TRACECMD_OPTION_EVENT_FORMATS:
+ case TRACECMD_OPTION_KALLSYMS:
+ case TRACECMD_OPTION_PRINTK:
+ case TRACECMD_OPTION_CMDLINES:
+ skip = true;
+ break;
+ default:
+ skip = false;
+ break;
+ }
+ if (skip) {
+ do_lseek(in_handle, en4, SEEK_CUR);
+ continue;
+ }
+ if (do_write_check(out_handle, &option, 2))
+ return -1;
+
+ if (do_write_check(out_handle, &size, 4))
+ return -1;
+
+ if (read_copy_data(in_handle, en4, out_handle))
+ return -1;
+ }
+
+ return 0;
+}
+
+static int copy_options(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle)
+{
+ unsigned long long offset, start;
+ unsigned short id, en2, flags = 0;
+ int tmp;
+
+ if (HAS_SECTIONS(in_handle)) {
+ if (read_section_header(in_handle, &id, &flags, NULL, NULL))
+ return -1;
+
+ if (id != TRACECMD_OPTION_DONE)
+ return -1;
+
+ if (flags & TRACECMD_SEC_FL_COMPRESS && in_uncompress_block(in_handle))
+ return -1;
+ }
+ start = tracecmd_get_out_file_offset(out_handle);
+ if (tracecmd_get_out_file_version(out_handle) < FILE_VERSION_SECTIONS) {
+ if (do_write_check(out_handle, "options ", 10))
+ return -1;
+ }
+
+ offset = out_write_section_header(out_handle, TRACECMD_OPTION_DONE, "options", 0, false);
+
+ if (copy_options_recursive(in_handle, out_handle))
+ goto error;
+
+ id = TRACECMD_OPTION_DONE;
+ en2 = tep_read_number(in_handle->pevent, &id, 2);
+ if (do_write_check(out_handle, &en2, 2))
+ goto error;
+
+ if (tracecmd_get_out_file_version(out_handle) < FILE_VERSION_SECTIONS) {
+ out_save_options_offset(out_handle, start);
+ } else {
+ tmp = 8;
+ if (do_write_check(out_handle, &tmp, 4))
+ goto error;
+
+ out_save_options_offset(out_handle, start);
+ start = 0;
+ if (do_write_check(out_handle, &start, 8))
+ goto error;
+ }
+ out_update_section_header(out_handle, offset);
+ if (flags & TRACECMD_SEC_FL_COMPRESS)
+ in_uncompress_reset(in_handle);
+ in_handle->file_state = TRACECMD_FILE_OPTIONS;
+ out_set_file_state(out_handle, in_handle->file_state);
+ /* Append local options */
+ return tracecmd_append_options(out_handle);
+
+error:
+ if (flags & TRACECMD_SEC_FL_COMPRESS)
+ in_uncompress_reset(in_handle);
+ return 0;
+}
+
+int tracecmd_copy_options(struct tracecmd_input *in_handle,
+ struct tracecmd_output *out_handle)
+{
+ if (!check_in_state(in_handle, TRACECMD_FILE_OPTIONS) ||
+ !check_out_state(out_handle, TRACECMD_FILE_OPTIONS))
+ return -1;
+
+ if (!in_handle->options_start)
+ return 0;
+
+ if (lseek64(in_handle->fd, in_handle->options_start, SEEK_SET) == (off64_t)-1)
+ return -1;
+
+ if (copy_options(in_handle, out_handle) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int copy_trace_latency(struct tracecmd_input *in_handle,
+ struct tracecmd_output *out_handle, const char *buf_name)
+{
+ int page_size = getpagesize();
+ unsigned long long wsize;
+ unsigned long long offset;
+ int fd;
+
+ if (tracecmd_get_out_file_version(out_handle) < FILE_VERSION_SECTIONS &&
+ do_write_check(out_handle, "latency ", 10))
+ return -1;
+
+ offset = tracecmd_get_out_file_offset(out_handle);
+
+ if (tracecmd_get_out_file_version(out_handle) >= FILE_VERSION_SECTIONS &&
+ !out_add_buffer_option(out_handle, buf_name, TRACECMD_OPTION_BUFFER_TEXT,
+ offset, 0, NULL, page_size))
+ return -1;
+
+ offset = out_write_section_header(out_handle, TRACECMD_OPTION_BUFFER_TEXT,
+ "buffer latency", TRACECMD_SEC_FL_COMPRESS, false);
+
+ if (in_handle->latz.fd >= 0)
+ fd = in_handle->latz.fd;
+ else
+ fd = in_handle->fd;
+
+ if (!out_copy_fd_compress(out_handle, fd, 0, &wsize, page_size))
+ return -1;
+
+ if (out_update_section_header(out_handle, offset))
+ return -1;
+
+ out_set_file_state(out_handle, TRACECMD_FILE_CPU_LATENCY);
+ return 0;
+}
+
+static int copy_trace_flyrecord_data(struct tracecmd_input *in_handle,
+ struct tracecmd_output *out_handle, const char *buff_name)
+{
+ struct cpu_data_source *data;
+ int total_size = 0;
+ int cpus;
+ int ret;
+ int i, j;
+
+ if (tracecmd_get_out_file_version(out_handle) < FILE_VERSION_SECTIONS)
+ cpus = in_handle->max_cpu;
+ else
+ cpus = in_handle->cpus;
+
+ data = calloc(cpus, sizeof(struct cpu_data_source));
+ if (!data)
+ return -1;
+
+ for (i = 0; i < in_handle->cpus; i++) {
+ j = in_handle->cpu_data[i].cpu;
+ data[j].size = in_handle->cpu_data[i].file_size;
+ total_size += data[j].size;
+ if (in_handle->cpu_data[i].compress.fd >= 0) {
+ data[j].fd = in_handle->cpu_data[i].compress.fd;
+ data[j].offset = 0;
+ } else {
+ data[j].fd = in_handle->fd;
+ data[j].offset = in_handle->cpu_data[i].file_offset;
+ }
+ }
+ if (total_size || tracecmd_get_out_file_version(out_handle) < FILE_VERSION_SECTIONS)
+ ret = out_write_cpu_data(out_handle, cpus, data, buff_name);
+ else
+ ret = 0;
+ free(data);
+
+ return ret;
+}
+
+static int copy_flyrecord_buffer(struct tracecmd_input *in_handle,
+ struct tracecmd_output *out_handle, int index)
+{
+ struct tracecmd_input *instance;
+ const char *name;
+ int ret;
+
+ name = tracecmd_buffer_instance_name(in_handle, index);
+ if (!name)
+ return -1;
+
+ instance = tracecmd_buffer_instance_handle(in_handle, index);
+ if (!instance)
+ return -1;
+
+ if (!tracecmd_get_quiet(out_handle) && *name)
+ fprintf(stderr, "\nBuffer: %s\n\n", name);
+
+ if (in_handle->buffers[index].latency)
+ ret = copy_trace_latency(in_handle, out_handle, name);
+ else
+ ret = copy_trace_flyrecord_data(instance, out_handle, name);
+ tracecmd_close(instance);
+
+ return ret;
+}
+
+static int copy_trace_data_from_v6(struct tracecmd_input *in_handle,
+ struct tracecmd_output *out_handle)
+{
+ char buf[10];
+ int ret;
+ int i;
+
+ if (do_read_check(in_handle, buf, 10))
+ return -1;
+
+ if (strncmp(buf, "latency", 7) == 0)
+ in_handle->file_state = TRACECMD_FILE_CPU_LATENCY;
+ else if (strncmp(buf, "flyrecord", 9) == 0)
+ in_handle->file_state = TRACECMD_FILE_CPU_FLYRECORD;
+
+ tracecmd_init_data(in_handle);
+ tracecmd_set_out_clock(out_handle, in_handle->trace_clock);
+
+ if (in_handle->file_state == TRACECMD_FILE_CPU_LATENCY)
+ return copy_trace_latency(in_handle, out_handle, "");
+
+ /* top instance */
+ ret = copy_trace_flyrecord_data(in_handle, out_handle, "");
+ if (ret)
+ return ret;
+
+ for (i = 0; i < in_handle->nr_buffers; i++)
+ copy_flyrecord_buffer(in_handle, out_handle, i);
+
+ return 0;
+}
+
+static int copy_trace_data_from_v7(struct tracecmd_input *in_handle,
+ struct tracecmd_output *out_handle)
+{
+ int ret = 0;
+ int i;
+
+ /* Force using temporary files for trace data decompression */
+ in_handle->read_zpage = false;
+ tracecmd_init_data(in_handle);
+ tracecmd_set_out_clock(out_handle, in_handle->trace_clock);
+
+ /* copy top buffer */
+ if (in_handle->top_buffer.latency)
+ ret = copy_trace_latency(in_handle, out_handle, in_handle->top_buffer.name);
+ else if (in_handle->top_buffer.cpus)
+ ret = copy_trace_flyrecord_data(in_handle, out_handle,
+ in_handle->top_buffer.name);
+ else if (tracecmd_get_out_file_version(out_handle) < FILE_VERSION_SECTIONS)
+ ret = out_write_emty_cpu_data(out_handle, in_handle->max_cpu);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < in_handle->nr_buffers; i++)
+ copy_flyrecord_buffer(in_handle, out_handle, i);
+
+ return 0;
+}
+
+__hidden int tracecmd_copy_trace_data(struct tracecmd_input *in_handle,
+ struct tracecmd_output *out_handle)
+{
+ int ret;
+
+ if (!check_in_state(in_handle, TRACECMD_FILE_CPU_FLYRECORD) ||
+ !check_out_state(out_handle, TRACECMD_FILE_CPU_FLYRECORD))
+ return -1;
+
+ if (in_handle->file_version < FILE_VERSION_SECTIONS)
+ ret = copy_trace_data_from_v6(in_handle, out_handle);
+ else
+ ret = copy_trace_data_from_v7(in_handle, out_handle);
+
+ return ret;
+}
+
+/**
+ * tracecmd_record_at_buffer_start - return true if record is first on subbuffer
+ * @handle: input handle for the trace.dat file
+ * @record: The record to test if it is the first record on page
+ *
+ * Returns true if the record is the first record on the page.
+ */
+int tracecmd_record_at_buffer_start(struct tracecmd_input *handle,
+ struct tep_record *record)
+{
+ struct page *page = record->priv;
+ struct kbuffer *kbuf = handle->cpu_data[record->cpu].kbuf;
+ int offset;
+
+ if (!page || !kbuf)
+ return 0;
+
+ offset = record->offset - page->offset;
+ return offset == kbuffer_start_of_data(kbuf);
+}
+
+unsigned long long tracecmd_page_ts(struct tracecmd_input *handle,
+ struct tep_record *record)
+{
+ struct page *page = record->priv;
+ struct kbuffer *kbuf = handle->cpu_data[record->cpu].kbuf;
+
+ if (!page || !kbuf)
+ return 0;
+
+ return kbuffer_subbuf_timestamp(kbuf, page->map);
+}
+
+unsigned int tracecmd_record_ts_delta(struct tracecmd_input *handle,
+ struct tep_record *record)
+{
+ struct kbuffer *kbuf = handle->cpu_data[record->cpu].kbuf;
+ struct page *page = record->priv;
+ int offset;
+
+ if (!page || !kbuf)
+ return 0;
+
+ offset = record->offset - page->offset;
+
+ return kbuffer_ptr_delta(kbuf, page->map + offset);
+}
+
+struct kbuffer *tracecmd_record_kbuf(struct tracecmd_input *handle,
+ struct tep_record *record)
+{
+ return handle->cpu_data[record->cpu].kbuf;
+}
+
+void *tracecmd_record_page(struct tracecmd_input *handle,
+ struct tep_record *record)
+{
+ struct page *page = record->priv;
+
+ return page ? page->map : NULL;
+}
+
+void *tracecmd_record_offset(struct tracecmd_input *handle,
+ struct tep_record *record)
+{
+ struct page *page = record->priv;
+ int offset;
+
+ if (!page)
+ return NULL;
+
+ offset = record->offset - page->offset;
+
+ return page->map + offset;
+}
+
+int tracecmd_buffer_instances(struct tracecmd_input *handle)
+{
+ return handle->nr_buffers;
+}
+
+const char *tracecmd_buffer_instance_name(struct tracecmd_input *handle, int indx)
+{
+ if (indx >= handle->nr_buffers)
+ return NULL;
+
+ return handle->buffers[indx].name;
+}
+
+struct tracecmd_input *
+tracecmd_buffer_instance_handle(struct tracecmd_input *handle, int indx)
+{
+ struct tracecmd_input *new_handle;
+ struct input_buffer_instance *buffer = &handle->buffers[indx];
+ size_t offset;
+ ssize_t ret;
+
+ if (indx >= handle->nr_buffers)
+ return NULL;
+
+ /*
+ * We make a copy of the current handle, but we substitute
+ * the cpu data with the cpu data for this buffer.
+ */
+ new_handle = malloc(sizeof(*handle));
+ if (!new_handle)
+ return NULL;
+
+ *new_handle = *handle;
+ memset(&new_handle->top_buffer, 0, sizeof(new_handle->top_buffer));
+ new_handle->cpu_data = NULL;
+ new_handle->nr_buffers = 0;
+ new_handle->buffers = NULL;
+ new_handle->version = NULL;
+ new_handle->sections = NULL;
+ new_handle->strings = NULL;
+ new_handle->guest = NULL;
+ new_handle->ref = 1;
+ if (handle->trace_clock) {
+ new_handle->trace_clock = strdup(handle->trace_clock);
+ if (!new_handle->trace_clock) {
+ free(new_handle);
+ return NULL;
+ }
+ }
+ memset(&new_handle->host, 0, sizeof(new_handle->host));
+ new_handle->parent = handle;
+ new_handle->cpustats = NULL;
+ new_handle->hooks = NULL;
+ if (handle->uname)
+ /* Ignore if fails to malloc, no biggy */
+ new_handle->uname = strdup(handle->uname);
+ tracecmd_ref(handle);
+
+ new_handle->fd = dup(handle->fd);
+
+ new_handle->flags |= TRACECMD_FL_BUFFER_INSTANCE;
+
+ new_handle->pid_maps = NULL;
+ if (!HAS_SECTIONS(handle)) {
+ /* Save where we currently are */
+ offset = lseek64(handle->fd, 0, SEEK_CUR);
+
+ ret = lseek64(handle->fd, buffer->offset, SEEK_SET);
+ if (ret == (off64_t)-1) {
+ tracecmd_warning("could not seek to buffer %s offset %ld",
+ buffer->name, buffer->offset);
+ goto error;
+ }
+ /*
+ * read_options_type() is called right after the CPU count so update
+ * file state accordingly.
+ */
+ new_handle->file_state = TRACECMD_FILE_CPU_COUNT;
+ ret = read_options_type(new_handle);
+ if (!ret)
+ ret = read_cpu_data(new_handle);
+
+ if (ret < 0) {
+ tracecmd_warning("failed to read sub buffer %s", buffer->name);
+ goto error;
+ }
+ ret = lseek64(handle->fd, offset, SEEK_SET);
+ if (ret < 0) {
+ tracecmd_warning("could not seek to back to offset %ld", offset);
+ goto error;
+ }
+ } else {
+ new_handle->page_size = handle->buffers[indx].page_size;
+ if (init_buffer_cpu_data(new_handle, buffer) < 0)
+ goto error;
+ }
+
+ return new_handle;
+
+error:
+ tracecmd_close(new_handle);
+ return NULL;
+}
+
+int tracecmd_is_buffer_instance(struct tracecmd_input *handle)
+{
+ return handle->flags & TRACECMD_FL_BUFFER_INSTANCE;
+}
+
+/**
+ * tracecmd_long_size - return the size of "long" for the arch
+ * @handle: input handle for the trace.dat file
+ */
+int tracecmd_long_size(struct tracecmd_input *handle)
+{
+ return handle->long_size;
+}
+
+/**
+ * tracecmd_page_size - return the PAGE_SIZE for the arch
+ * @handle: input handle for the trace.dat file
+ */
+int tracecmd_page_size(struct tracecmd_input *handle)
+{
+ return handle->page_size;
+}
+
+/**
+ * tracecmd_page_size - return the number of CPUs recorded
+ * @handle: input handle for the trace.dat file
+ */
+int tracecmd_cpus(struct tracecmd_input *handle)
+{
+ return handle->max_cpu;
+}
+
+/**
+ * tracecmd_get_tep - return the tep handle
+ * @handle: input handle for the trace.dat file
+ */
+struct tep_handle *tracecmd_get_tep(struct tracecmd_input *handle)
+{
+ return handle->pevent;
+}
+
+/**
+ * tracecmd_get_in_file_version - return the trace.dat file version
+ * @handle: input handle for the trace.dat file
+ */
+unsigned long tracecmd_get_in_file_version(struct tracecmd_input *handle)
+{
+ return handle->file_version;
+}
+
+/**
+ * tracecmd_get_file_compress_proto - get name and version of compression algorithm
+ * @handle: input handle for the trace.dat file
+ * @name: return, name of the compression algorithm.
+ * @version: return, version of the compression algorithm.
+ *
+ * Get the name and the version of the compression algorithm, used to
+ * compress the file associated with @handle.
+ * Returns 0 on success, or -1 in case of an error. If 0 is returned,
+ * the name and version of the algorithm are stored in @name and @version.
+ * The returned strings must *not* be freed.
+ */
+int tracecmd_get_file_compress_proto(struct tracecmd_input *handle,
+ const char **name, const char **version)
+{
+ return tracecmd_compress_proto_get_name(handle->compress, name, version);
+}
+
+/**
+ * tracecmd_get_use_trace_clock - return use_trace_clock
+ * @handle: input handle for the trace.dat file
+ */
+bool tracecmd_get_use_trace_clock(struct tracecmd_input *handle)
+{
+ return handle->use_trace_clock;
+}
+
+/**
+ * tracecmd_get_options_offset - get offset of the options sections in the file
+ * @handle: input handle for the trace.dat file
+ */
+size_t tracecmd_get_options_offset(struct tracecmd_input *handle)
+{
+ return handle->options_start;
+}
+
+/**
+ * tracecmd_get_trace_clock - return the saved trace clock
+ * @handle: input handle for the trace.dat file
+ *
+ * Returns a string of the clock that was saved in the trace.dat file.
+ * The string should not be freed, as it points to the internal
+ * structure data.
+ */
+const char *tracecmd_get_trace_clock(struct tracecmd_input *handle)
+{
+ return handle->trace_clock;
+}
+
+/**
+ * tracecmd_get_cpustats - return the saved cpu stats
+ * @handle: input handle for the trace.dat file
+ *
+ * Provides a method to extract the cpu stats saved in @handle.
+ *
+ * Returns a string of the cpu stats that was saved in the trace.dat file.
+ * The string should not be freed, as it points to the internal
+ * structure data.
+ */
+const char *tracecmd_get_cpustats(struct tracecmd_input *handle)
+{
+ return handle->cpustats;
+}
+
+/**
+ * tracecmd_get_uname - return the saved name and kernel information
+ * @handle: input handle for the trace.dat file
+ *
+ * Provides a method to extract the system information saved in @handle.
+ *
+ * Returns a string of the system information that was saved in the
+ * trace.dat file.
+ * The string should not be freed, as it points to the internal
+ * structure data.
+ */
+const char *tracecmd_get_uname(struct tracecmd_input *handle)
+{
+ return handle->uname;
+}
+
+/**
+ * tracecmd_get_version - return the saved version information
+ * @handle: input handle for the trace.dat file
+ *
+ * Provides a method to extract the version string saved in @handle.
+ *
+ * Returns a string of the version that was saved in the trace.dat file.
+ * The string should not be freed, as it points to the internal
+ * structure data.
+ */
+const char *tracecmd_get_version(struct tracecmd_input *handle)
+{
+ return handle->version;
+}
+
+/**
+ * tracecmd_get_cpu_file_size - return the saved cpu file size
+ * @handle: input handle for the trace.dat file
+ * @cpu: cpu index
+ *
+ * Provides a method to extract the cpu file size saved in @handle.
+ *
+ * Returns the cpu file size saved in trace.dat file or (off64_t)-1 for
+ * invalid cpu index.
+ */
+off64_t tracecmd_get_cpu_file_size(struct tracecmd_input *handle, int cpu)
+{
+ if (cpu < 0 || cpu >= handle->cpus)
+ return (off64_t)-1;
+ return handle->cpu_data[cpu].file_size;
+}
+
+/**
+ * tracecmd_get_show_data_func - return the show data func
+ * @handle: input handle for the trace.dat file
+ */
+tracecmd_show_data_func
+tracecmd_get_show_data_func(struct tracecmd_input *handle)
+{
+ return handle->show_data_func;
+}
+
+/**
+ * tracecmd_set_show_data_func - set the show data func
+ * @handle: input handle for the trace.dat file
+ */
+void tracecmd_set_show_data_func(struct tracecmd_input *handle,
+ tracecmd_show_data_func func)
+{
+ handle->show_data_func = func;
+}
+
+/**
+ * tracecmd_get_traceid - get the trace id of the session
+ * @handle: input handle for the trace.dat file
+ *
+ * Returns the trace id, written in the trace file
+ */
+unsigned long long tracecmd_get_traceid(struct tracecmd_input *handle)
+{
+ return handle->trace_id;
+}
+
+/**
+ * tracecmd_get_first_ts - get the timestamp of the first recorded event
+ * @handle: input handle for the trace.dat file
+ *
+ * Returns the timestamp of the first recorded event
+ */
+unsigned long long tracecmd_get_first_ts(struct tracecmd_input *handle)
+{
+ unsigned long long ts = 0;
+ bool first = true;
+ int i;
+
+ for (i = 0; i < handle->cpus; i++) {
+ /* Ignore empty buffers */
+ if (!handle->cpu_data[i].size)
+ continue;
+ if (first || ts > handle->cpu_data[i].first_ts)
+ ts = handle->cpu_data[i].first_ts;
+ first = false;
+ }
+
+ return ts;
+}
+
+/**
+ * tracecmd_get_guest_cpumap - get the mapping of guest VCPU to host process
+ * @handle: input handle for the trace.dat file
+ * @trace_id: ID of the guest tracing session
+ * @name: return, name of the guest
+ * @vcpu_count: return, number of VPUs
+ * @cpu_pid: return, array with guest VCPU to host process mapping
+ *
+ * Returns @name of the guest, number of VPUs (@vcpu_count)
+ * and array @cpu_pid with size @vcpu_count. Array index is VCPU id, array
+ * content is PID of the host process, running this VCPU.
+ *
+ * This information is stored in host trace.dat file
+ */
+int tracecmd_get_guest_cpumap(struct tracecmd_input *handle,
+ unsigned long long trace_id,
+ const char **name,
+ int *vcpu_count, const int **cpu_pid)
+{
+ struct guest_trace_info *guest = handle->guest;
+
+ while (guest) {
+ if (guest->trace_id == trace_id)
+ break;
+ guest = guest->next;
+ }
+ if (!guest)
+ return -1;
+
+ if (name)
+ *name = guest->name;
+ if (vcpu_count)
+ *vcpu_count = guest->vcpu_count;
+ if (cpu_pid)
+ *cpu_pid = guest->cpu_pid;
+ return 0;
+}
+
+/**
+ * tracecmd_enable_tsync - enable / disable the timestamps correction
+ * @handle: input handle for the trace.dat file
+ * @enable: enable / disable the timestamps correction
+ *
+ * Enables or disables timestamps correction on file load, using the array of
+ * recorded time offsets. If "enable" is true, but there are no time offsets,
+ * function fails and -1 is returned.
+ *
+ * Returns -1 in case of an error, or 0 otherwise
+ */
+int tracecmd_enable_tsync(struct tracecmd_input *handle, bool enable)
+{
+ if (enable &&
+ (!handle->host.ts_offsets || !handle->host.cpu_count))
+ return -1;
+
+ handle->host.sync_enable = enable;
+
+ return 0;
+}
+
diff --git a/lib/trace-cmd/trace-msg.c b/lib/trace-cmd/trace-msg.c
new file mode 100644
index 00000000..39465ade
--- /dev/null
+++ b/lib/trace-cmd/trace-msg.c
@@ -0,0 +1,1404 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * trace-msg.c : define message protocol for communication between clients and
+ * a server
+ *
+ * Copyright (C) 2013 Hitachi, Ltd.
+ * Created by Yoshihiro YUNOMAE <yoshihiro.yunomae.ez@hitachi.com>
+ *
+ */
+
+#include <errno.h>
+#include <poll.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <sys/types.h>
+#include <linux/types.h>
+
+#include "trace-write-local.h"
+#include "trace-cmd-local.h"
+#include "trace-local.h"
+#include "trace-msg.h"
+#include "trace-cmd.h"
+
+typedef __u32 u32;
+typedef __be32 be32;
+
+#define dprint(fmt, ...) tracecmd_debug(fmt, ##__VA_ARGS__)
+
+/* Two (4k) pages is the max transfer for now */
+#define MSG_MAX_LEN 8192
+
+#define MSG_HDR_LEN sizeof(struct tracecmd_msg_header)
+
+#define MSG_MAX_DATA_LEN (MSG_MAX_LEN - MSG_HDR_LEN)
+
+unsigned int page_size;
+
+struct tracecmd_msg_tinit {
+ be32 cpus;
+ be32 page_size;
+ be32 opt_num;
+} __packed;
+
+struct tracecmd_msg_rinit {
+ be32 cpus;
+} __packed;
+
+#define TRACE_REQ_PARAM_SIZE (2 * sizeof(int))
+enum trace_req_params {
+ TRACE_REQUEST_ARGS,
+ TRACE_REQUEST_TSYNC_PROTOS,
+};
+
+struct tracecmd_msg_trace_req_param {
+ int id;
+ int length;
+ char *value;
+};
+
+struct tracecmd_msg_trace_req {
+ be32 flags;
+ be32 argc;
+ u64 trace_id;
+} __packed;
+
+struct tracecmd_msg_trace_resp {
+ be32 flags;
+ be32 cpus;
+ be32 page_size;
+ u64 trace_id;
+ char tsync_proto_name[TRACECMD_TSYNC_PNAME_LENGTH];
+ be32 tsync_port;
+} __packed;
+
+struct tracecmd_msg_tsync {
+ char sync_protocol_name[TRACECMD_TSYNC_PNAME_LENGTH];
+ be32 sync_msg_id;
+} __packed;
+
+struct tracecmd_msg_header {
+ be32 size;
+ be32 cmd;
+ be32 cmd_size;
+} __packed;
+
+#define MSG_MAP \
+ C(CLOSE, 0, 0), \
+ C(TINIT, 1, sizeof(struct tracecmd_msg_tinit)), \
+ C(RINIT, 2, sizeof(struct tracecmd_msg_rinit)), \
+ C(SEND_DATA, 3, 0), \
+ C(FIN_DATA, 4, 0), \
+ C(NOT_SUPP, 5, 0), \
+ C(TRACE_REQ, 6, sizeof(struct tracecmd_msg_trace_req)), \
+ C(TRACE_RESP, 7, sizeof(struct tracecmd_msg_trace_resp)),\
+ C(CLOSE_RESP, 8, 0), \
+ C(TIME_SYNC, 9, sizeof(struct tracecmd_msg_tsync)),
+
+#undef C
+#define C(a,b,c) MSG_##a = b
+
+enum tracecmd_msg_cmd {
+ MSG_MAP
+ MSG_NR_COMMANDS
+};
+
+#undef C
+#define C(a,b,c) c
+
+static be32 msg_cmd_sizes[] = { MSG_MAP };
+
+#undef C
+#define C(a,b,c) #a
+
+static const char *msg_names[] = { MSG_MAP };
+
+static const char *cmd_to_name(int cmd)
+{
+ if (cmd < 0 || cmd >= MSG_NR_COMMANDS)
+ return "Unknown";
+ return msg_names[cmd];
+}
+
+struct tracecmd_msg {
+ struct tracecmd_msg_header hdr;
+ union {
+ struct tracecmd_msg_tinit tinit;
+ struct tracecmd_msg_rinit rinit;
+ struct tracecmd_msg_trace_req trace_req;
+ struct tracecmd_msg_trace_resp trace_resp;
+ struct tracecmd_msg_tsync tsync;
+ };
+ char *buf;
+} __packed;
+
+static inline int msg_buf_len(struct tracecmd_msg *msg)
+{
+ return ntohl(msg->hdr.size) - MSG_HDR_LEN - ntohl(msg->hdr.cmd_size);
+}
+
+static int __msg_write(int fd, struct tracecmd_msg *msg, bool network)
+{
+ int msg_size, data_size;
+ int ret;
+ int cmd;
+
+ if (network) {
+ cmd = ntohl(msg->hdr.cmd);
+ if (cmd < 0 || cmd >= MSG_NR_COMMANDS)
+ return -EINVAL;
+ dprint("msg send: %d (%s) [%d]\n",
+ cmd, cmd_to_name(cmd), ntohl(msg->hdr.size));
+ }
+ msg_size = MSG_HDR_LEN + ntohl(msg->hdr.cmd_size);
+ data_size = ntohl(msg->hdr.size) - msg_size;
+ if (data_size < 0)
+ return -EINVAL;
+
+ if (network) {
+ ret = __do_write_check(fd, msg, msg_size);
+ if (ret < 0)
+ return ret;
+ }
+ if (!data_size)
+ return 0;
+
+ return __do_write_check(fd, msg->buf, data_size);
+}
+
+__hidden off64_t msg_lseek(struct tracecmd_msg_handle *msg_handle, off64_t offset, int whence)
+{
+ /*
+ * lseek works only if the handle is in cache mode,
+ * cannot seek on a network socket
+ */
+ if (!msg_handle->cache || msg_handle->cfd < 0)
+ return (off64_t)-1;
+ return lseek64(msg_handle->cfd, offset, whence);
+}
+
+static int msg_write(struct tracecmd_msg_handle *msg_handle, struct tracecmd_msg *msg)
+{
+ if (msg_handle->cache && msg_handle->cfd >= 0)
+ return __msg_write(msg_handle->cfd, msg, false);
+
+
+ return __msg_write(msg_handle->fd, msg, true);
+}
+
+enum msg_trace_flags {
+ MSG_TRACE_USE_FIFOS = 1 << 0,
+};
+
+static int make_tinit(struct tracecmd_msg_handle *msg_handle,
+ struct tracecmd_msg *msg)
+{
+ int cpu_count = msg_handle->cpu_count;
+ int opt_num = 0;
+ int data_size = 0;
+
+ if (msg_handle->flags & (TRACECMD_MSG_FL_USE_TCP |
+ TRACECMD_MSG_FL_USE_VSOCK)) {
+ msg->buf = msg_handle->flags & TRACECMD_MSG_FL_USE_TCP ?
+ strdup("tcp") : strdup("vsock");
+ if (!msg->buf)
+ return -1;
+ opt_num++;
+ data_size += strlen(msg->buf) + 1;
+ }
+
+ msg->tinit.cpus = htonl(cpu_count);
+ msg->tinit.page_size = htonl(page_size);
+ msg->tinit.opt_num = htonl(opt_num);
+
+ msg->hdr.size = htonl(ntohl(msg->hdr.size) + data_size);
+
+ return 0;
+}
+
+/* test a to u */
+static int tatou(const char *s, unsigned int *res)
+{
+ long r;
+
+ r = atol(s);
+ if (r >= 0 && r <= UINT_MAX) {
+ *res = (unsigned int)r;
+ return 0;
+ }
+ return -1;
+}
+
+static int write_uints(char *buf, size_t buf_len,
+ unsigned int *arr, int arr_len)
+{
+ int i, ret, tot = 0;
+
+ for (i = 0; i < arr_len; i++) {
+ ret = snprintf(buf, buf_len, "%u", arr[i]);
+ if (ret < 0)
+ return ret;
+
+ /* Count the '\0' byte */
+ ret++;
+ tot += ret;
+ if (buf)
+ buf += ret;
+ if (buf_len >= ret)
+ buf_len -= ret;
+ else
+ buf_len = 0;
+ }
+
+ return tot;
+}
+
+static int make_rinit(struct tracecmd_msg *msg, int cpus, unsigned int *ports)
+{
+ int data_size;
+
+ data_size = write_uints(NULL, 0, ports, cpus);
+ msg->buf = malloc(data_size);
+ if (!msg->buf)
+ return -ENOMEM;
+ write_uints(msg->buf, data_size, ports, cpus);
+
+ msg->rinit.cpus = htonl(cpus);
+ msg->hdr.size = htonl(ntohl(msg->hdr.size) + data_size);
+
+ return 0;
+}
+
+static void tracecmd_msg_init(u32 cmd, struct tracecmd_msg *msg)
+{
+ memset(msg, 0, sizeof(*msg));
+ msg->hdr.size = htonl(MSG_HDR_LEN + msg_cmd_sizes[cmd]);
+ msg->hdr.cmd = htonl(cmd);
+ msg->hdr.cmd_size = htonl(msg_cmd_sizes[cmd]);
+}
+
+static void msg_free(struct tracecmd_msg *msg)
+{
+ free(msg->buf);
+ memset(msg, 0, sizeof(*msg));
+}
+
+static int tracecmd_msg_send(struct tracecmd_msg_handle *msg_handle, struct tracecmd_msg *msg)
+{
+ int ret = 0;
+
+ ret = msg_write(msg_handle, msg);
+ if (ret < 0)
+ ret = -ECOMM;
+
+ msg_free(msg);
+
+ return ret;
+}
+
+static int msg_send_nofree(struct tracecmd_msg_handle *msg_handle, struct tracecmd_msg *msg)
+{
+ int ret = 0;
+
+ ret = msg_write(msg_handle, msg);
+ if (ret < 0)
+ ret = -ECOMM;
+
+ return ret;
+}
+
+static int msg_read(int fd, void *buf, u32 size, int *n)
+{
+ ssize_t r;
+
+ while (size) {
+ r = read(fd, buf + *n, size);
+ if (r < 0) {
+ if (errno == EINTR)
+ continue;
+ return -errno;
+ } else if (!r)
+ return -ENOTCONN;
+ size -= r;
+ *n += r;
+ }
+
+ return 0;
+}
+
+static char scratch_buf[MSG_MAX_LEN];
+
+static int msg_read_extra(int fd, struct tracecmd_msg *msg,
+ int *n, int size)
+{
+ int cmd, cmd_size, rsize;
+ int ret;
+
+ cmd = ntohl(msg->hdr.cmd);
+ if (cmd < 0 || cmd >= MSG_NR_COMMANDS)
+ return -EINVAL;
+
+ cmd_size = ntohl(msg->hdr.cmd_size);
+ if (cmd_size < 0)
+ return -EINVAL;
+
+ if (cmd_size > 0) {
+ rsize = cmd_size;
+ if (rsize > msg_cmd_sizes[cmd])
+ rsize = msg_cmd_sizes[cmd];
+
+ ret = msg_read(fd, msg, rsize, n);
+ if (ret < 0)
+ return ret;
+
+ ret = msg_read(fd, scratch_buf, cmd_size - rsize, n);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (size > *n) {
+ size -= *n;
+ msg->buf = malloc(size);
+ if (!msg->buf)
+ return -ENOMEM;
+
+ *n = 0;
+ return msg_read(fd, msg->buf, size, n);
+ }
+
+ return 0;
+}
+
+/*
+ * Read header information of msg first, then read all data
+ */
+static int tracecmd_msg_recv(int fd, struct tracecmd_msg *msg)
+{
+ u32 size = 0;
+ int n = 0;
+ int ret;
+
+ ret = msg_read(fd, msg, MSG_HDR_LEN, &n);
+ if (ret < 0)
+ return ret;
+
+ dprint("msg received: %d (%s) [%d]\n",
+ ntohl(msg->hdr.cmd), cmd_to_name(ntohl(msg->hdr.cmd)),
+ ntohl(msg->hdr.size));
+
+ size = ntohl(msg->hdr.size);
+ if (size > MSG_MAX_LEN)
+ /* too big */
+ goto error;
+ else if (size < MSG_HDR_LEN)
+ /* too small */
+ goto error;
+ else if (size > MSG_HDR_LEN)
+ return msg_read_extra(fd, msg, &n, size);
+
+ return 0;
+error:
+ tracecmd_plog("Receive an invalid message(size=%d)\n", size);
+ return -ENOMSG;
+}
+
+#define MSG_WAIT_MSEC 5000
+static int msg_wait_to = MSG_WAIT_MSEC;
+
+bool tracecmd_msg_done(struct tracecmd_msg_handle *msg_handle)
+{
+ return (volatile int)msg_handle->done;
+}
+
+void tracecmd_msg_set_done(struct tracecmd_msg_handle *msg_handle)
+{
+ msg_handle->done = true;
+}
+
+static void error_operation(struct tracecmd_msg *msg)
+{
+ tracecmd_warning("Message: cmd=%d size=%d", ntohl(msg->hdr.cmd), ntohl(msg->hdr.size));
+}
+
+/*
+ * A return value of 0 indicates time-out
+ */
+static int tracecmd_msg_recv_wait(int fd, struct tracecmd_msg *msg)
+{
+ struct pollfd pfd;
+ int ret;
+
+ pfd.fd = fd;
+ pfd.events = POLLIN;
+ ret = poll(&pfd, 1, tracecmd_get_debug() ? -1 : msg_wait_to);
+ if (ret < 0)
+ return -errno;
+ else if (ret == 0)
+ return -ETIMEDOUT;
+
+ return tracecmd_msg_recv(fd, msg);
+}
+
+static int tracecmd_msg_wait_for_msg(int fd, struct tracecmd_msg *msg)
+{
+ u32 cmd;
+ int ret;
+
+ ret = tracecmd_msg_recv_wait(fd, msg);
+ if (ret < 0) {
+ if (ret == -ETIMEDOUT)
+ tracecmd_warning("Connection timed out");
+ return ret;
+ }
+
+ cmd = ntohl(msg->hdr.cmd);
+ if (cmd == MSG_CLOSE)
+ return -ECONNABORTED;
+
+ return 0;
+}
+
+static int tracecmd_msg_send_notsupp(struct tracecmd_msg_handle *msg_handle)
+{
+ struct tracecmd_msg msg;
+
+ tracecmd_msg_init(MSG_NOT_SUPP, &msg);
+ return tracecmd_msg_send(msg_handle, &msg);
+}
+
+static int handle_unexpected_msg(struct tracecmd_msg_handle *msg_handle,
+ struct tracecmd_msg *msg)
+{
+ /* Don't send MSG_NOT_SUPP back if we just received one */
+ if (ntohl(msg->hdr.cmd) == MSG_NOT_SUPP)
+ return 0;
+
+ return tracecmd_msg_send_notsupp(msg_handle);
+
+}
+
+int tracecmd_msg_send_init_data(struct tracecmd_msg_handle *msg_handle,
+ unsigned int **client_ports)
+{
+ struct tracecmd_msg msg;
+ unsigned int *ports;
+ int i, cpus, ret;
+ char *p, *buf_end;
+ ssize_t buf_len;
+
+ *client_ports = NULL;
+
+ tracecmd_msg_init(MSG_TINIT, &msg);
+ ret = make_tinit(msg_handle, &msg);
+ if (ret < 0)
+ goto out;
+
+ ret = tracecmd_msg_send(msg_handle, &msg);
+ if (ret < 0)
+ goto out;
+
+ msg_free(&msg);
+
+ ret = tracecmd_msg_wait_for_msg(msg_handle->fd, &msg);
+ if (ret < 0)
+ goto out;
+
+ if (ntohl(msg.hdr.cmd) != MSG_RINIT) {
+ ret = -EOPNOTSUPP;
+ goto error;
+ }
+
+ buf_len = msg_buf_len(&msg);
+ if (buf_len <= 0) {
+ ret = -EINVAL;
+ goto error;
+ }
+
+ if (msg.buf[buf_len-1] != '\0') {
+ ret = -EINVAL;
+ goto error;
+ }
+
+ cpus = ntohl(msg.rinit.cpus);
+ ports = malloc(sizeof(*ports) * cpus);
+ if (!ports) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ buf_end = msg.buf + buf_len;
+ for (i = 0, p = msg.buf; i < cpus; i++, p++) {
+ if (p >= buf_end || tatou(p, &ports[i])) {
+ free(ports);
+ ret = -EINVAL;
+ goto error;
+ }
+ p = strchr(p, '\0');
+ }
+
+ *client_ports = ports;
+
+ msg_free(&msg);
+ return 0;
+
+error:
+ error_operation(&msg);
+ if (ret == -EOPNOTSUPP)
+ handle_unexpected_msg(msg_handle, &msg);
+out:
+ msg_free(&msg);
+ return ret;
+}
+
+static bool process_option(struct tracecmd_msg_handle *msg_handle,
+ const char *opt)
+{
+ if (strcmp(opt, "tcp") == 0) {
+ msg_handle->flags |= TRACECMD_MSG_FL_USE_TCP;
+ return true;
+ }
+ if (strcmp(opt, "vsock") == 0) {
+ msg_handle->flags |= TRACECMD_MSG_FL_USE_VSOCK;
+ return true;
+ }
+ return false;
+}
+
+struct tracecmd_msg_handle *
+tracecmd_msg_handle_alloc(int fd, unsigned long flags)
+{
+ struct tracecmd_msg_handle *handle;
+
+ handle = calloc(1, sizeof(struct tracecmd_msg_handle));
+ if (!handle)
+ return NULL;
+
+ handle->fd = fd;
+ handle->flags = flags;
+ handle->cfd = -1;
+ handle->cache = false;
+ return handle;
+}
+
+int tracecmd_msg_handle_cache(struct tracecmd_msg_handle *msg_handle)
+{
+ if (msg_handle->cfd < 0) {
+ strcpy(msg_handle->cfile, MSG_CACHE_FILE);
+ msg_handle->cfd = mkstemp(msg_handle->cfile);
+ if (msg_handle->cfd < 0)
+ return -1;
+ unlink(msg_handle->cfile);
+ }
+ msg_handle->cache = true;
+ return 0;
+}
+
+static int flush_cache(struct tracecmd_msg_handle *msg_handle)
+{
+ char buf[MSG_MAX_DATA_LEN];
+ int ret;
+
+ if (!msg_handle->cache || msg_handle->cfd < 0)
+ return 0;
+ msg_handle->cache = false;
+ if (lseek64(msg_handle->cfd, 0, SEEK_SET) == (off64_t)-1)
+ return -1;
+ do {
+ ret = read(msg_handle->cfd, buf, MSG_MAX_DATA_LEN);
+ if (ret <= 0)
+ break;
+ ret = tracecmd_msg_data_send(msg_handle, buf, ret);
+ if (ret < 0)
+ break;
+ } while (ret >= 0);
+
+ close(msg_handle->cfd);
+ msg_handle->cfd = -1;
+ return ret;
+}
+
+void tracecmd_msg_handle_close(struct tracecmd_msg_handle *msg_handle)
+{
+ if (msg_handle->fd >= 0)
+ close(msg_handle->fd);
+ if (msg_handle->cfd >= 0)
+ close(msg_handle->cfd);
+ free(msg_handle);
+}
+
+#define MAX_OPTION_SIZE 4096
+
+int tracecmd_msg_initial_setting(struct tracecmd_msg_handle *msg_handle)
+{
+ struct tracecmd_msg msg;
+ char *p, *buf_end;
+ ssize_t buf_len;
+ int pagesize;
+ int options, i;
+ int cpus;
+ int ret;
+
+ memset(&msg, 0, sizeof(msg));
+ ret = tracecmd_msg_recv_wait(msg_handle->fd, &msg);
+ if (ret < 0) {
+ if (ret == -ETIMEDOUT)
+ tracecmd_warning("Connection timed out");
+ return ret;
+ }
+
+ if (ntohl(msg.hdr.cmd) != MSG_TINIT) {
+ ret = -EOPNOTSUPP;
+ goto error;
+ }
+
+ cpus = ntohl(msg.tinit.cpus);
+ tracecmd_plog("cpus=%d\n", cpus);
+ if (cpus < 0) {
+ ret = -EINVAL;
+ goto error;
+ }
+
+ msg_handle->cpu_count = cpus;
+
+ pagesize = ntohl(msg.tinit.page_size);
+ tracecmd_plog("pagesize=%d\n", pagesize);
+ if (pagesize <= 0) {
+ ret = -EINVAL;
+ goto error;
+ }
+
+ buf_len = msg_buf_len(&msg);
+ if (buf_len < 0) {
+ ret = -EINVAL;
+ goto error;
+ }
+
+ if (buf_len == 0)
+ goto no_options;
+
+ if (msg.buf[buf_len-1] != '\0') {
+ ret = -EINVAL;
+ goto error;
+ }
+
+ buf_end = msg.buf + buf_len;
+ options = ntohl(msg.tinit.opt_num);
+ for (i = 0, p = msg.buf; i < options; i++, p++) {
+ if (p >= buf_end) {
+ ret = -EINVAL;
+ goto error;
+ }
+
+ /* do we understand this option? */
+ if (!process_option(msg_handle, p))
+ tracecmd_plog("Cannot understand option '%s'\n", p);
+
+ p = strchr(p, '\0');
+ }
+
+no_options:
+ msg_free(&msg);
+ return pagesize;
+
+error:
+ error_operation(&msg);
+ if (ret == -EOPNOTSUPP)
+ handle_unexpected_msg(msg_handle, &msg);
+ msg_free(&msg);
+ return ret;
+}
+
+int tracecmd_msg_send_port_array(struct tracecmd_msg_handle *msg_handle,
+ unsigned int *ports)
+{
+ struct tracecmd_msg msg;
+ int ret;
+
+ tracecmd_msg_init(MSG_RINIT, &msg);
+ ret = make_rinit(&msg, msg_handle->cpu_count, ports);
+ if (ret < 0)
+ return ret;
+
+ ret = tracecmd_msg_send(msg_handle, &msg);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+int tracecmd_msg_send_close_msg(struct tracecmd_msg_handle *msg_handle)
+{
+ struct tracecmd_msg msg;
+
+ tracecmd_msg_init(MSG_CLOSE, &msg);
+ return tracecmd_msg_send(msg_handle, &msg);
+}
+
+int tracecmd_msg_send_close_resp_msg(struct tracecmd_msg_handle *msg_handle)
+{
+ struct tracecmd_msg msg;
+
+ tracecmd_msg_init(MSG_CLOSE_RESP, &msg);
+ return tracecmd_msg_send(msg_handle, &msg);
+}
+
+int tracecmd_msg_data_send(struct tracecmd_msg_handle *msg_handle,
+ const char *buf, int size)
+{
+ struct tracecmd_msg msg;
+ int n;
+ int ret;
+ int count = 0;
+
+ /* Don't bother doing anything if there's nothing to do */
+ if (!size)
+ return 0;
+
+ tracecmd_msg_init(MSG_SEND_DATA, &msg);
+
+ msg.buf = malloc(MSG_MAX_DATA_LEN);
+ if (!msg.buf)
+ return -ENOMEM;
+
+ msg.hdr.size = htonl(MSG_MAX_LEN);
+
+ n = size;
+ while (n) {
+ if (n > MSG_MAX_DATA_LEN) {
+ memcpy(msg.buf, buf + count, MSG_MAX_DATA_LEN);
+ n -= MSG_MAX_DATA_LEN;
+ count += MSG_MAX_DATA_LEN;
+ } else {
+ msg.hdr.size = htonl(MSG_HDR_LEN + n);
+ memcpy(msg.buf, buf + count, n);
+ n = 0;
+ }
+ ret = msg_write(msg_handle, &msg);
+ if (ret < 0)
+ break;
+ }
+
+ msg_free(&msg);
+ return ret;
+}
+
+int tracecmd_msg_finish_sending_data(struct tracecmd_msg_handle *msg_handle)
+{
+ struct tracecmd_msg msg;
+ int ret;
+
+ flush_cache(msg_handle);
+ tracecmd_msg_init(MSG_FIN_DATA, &msg);
+ ret = tracecmd_msg_send(msg_handle, &msg);
+ if (ret < 0)
+ return ret;
+ return 0;
+}
+
+int tracecmd_msg_read_data(struct tracecmd_msg_handle *msg_handle, int ofd)
+{
+ struct tracecmd_msg msg;
+ int t, n, cmd;
+ ssize_t s;
+ int ret;
+
+ while (!tracecmd_msg_done(msg_handle)) {
+ ret = tracecmd_msg_recv_wait(msg_handle->fd, &msg);
+ if (ret < 0) {
+ tracecmd_warning("reading client %d (%s)", ret, strerror(ret));
+ return ret;
+ }
+
+ cmd = ntohl(msg.hdr.cmd);
+ if (cmd == MSG_FIN_DATA) {
+ /* Finish receiving data */
+ break;
+ } else if (cmd != MSG_SEND_DATA) {
+ ret = handle_unexpected_msg(msg_handle, &msg);
+ if (ret < 0)
+ goto error;
+ goto next;
+ }
+
+ n = msg_buf_len(&msg);
+ t = n;
+ s = 0;
+ while (t > 0) {
+ s = write(ofd, msg.buf+s, t);
+ if (s < 0) {
+ if (errno == EINTR)
+ continue;
+ tracecmd_warning("writing to file");
+ ret = -errno;
+ goto error;
+ }
+ t -= s;
+ s = n - t;
+ }
+
+next:
+ msg_free(&msg);
+ }
+
+ return 0;
+
+error:
+ error_operation(&msg);
+ msg_free(&msg);
+ return ret;
+}
+
+int tracecmd_msg_collect_data(struct tracecmd_msg_handle *msg_handle, int ofd)
+{
+ int ret;
+
+ ret = tracecmd_msg_read_data(msg_handle, ofd);
+ if (ret)
+ return ret;
+
+ return tracecmd_msg_wait_close(msg_handle);
+}
+
+static int tracecmd_msg_wait_for_cmd(struct tracecmd_msg_handle *msg_handle, enum tracecmd_msg_cmd cmd)
+{
+ struct tracecmd_msg msg;
+ int ret = -1;
+
+ memset(&msg, 0, sizeof(msg));
+ while (!tracecmd_msg_done(msg_handle)) {
+ ret = tracecmd_msg_recv(msg_handle->fd, &msg);
+ if (ret < 0)
+ goto error;
+
+ if (ntohl(msg.hdr.cmd) == cmd)
+ return 0;
+
+ error_operation(&msg);
+ ret = handle_unexpected_msg(msg_handle, &msg);
+ if (ret < 0)
+ goto error;
+
+ msg_free(&msg);
+ }
+
+error:
+ msg_free(&msg);
+ return ret;
+}
+
+int tracecmd_msg_wait_close(struct tracecmd_msg_handle *msg_handle)
+{
+ return tracecmd_msg_wait_for_cmd(msg_handle, MSG_CLOSE);
+}
+
+int tracecmd_msg_wait_close_resp(struct tracecmd_msg_handle *msg_handle)
+{
+ return tracecmd_msg_wait_for_cmd(msg_handle, MSG_CLOSE_RESP);
+}
+
+static int make_trace_req_protos(char **buf, int *size,
+ struct tracecmd_tsync_protos *protos)
+{
+ int protos_size = 1;
+ size_t buf_size;
+ char **names;
+ char *nbuf;
+ char *p;
+
+ names = protos->names;
+ while (*names) {
+ protos_size += strlen(*names) + 1;
+ names++;
+ }
+
+ buf_size = TRACE_REQ_PARAM_SIZE + protos_size;
+ nbuf = realloc(*buf, *size + buf_size);
+ if (!nbuf)
+ return -1;
+
+ p = nbuf + *size;
+ memset(p, 0, buf_size);
+
+ *(unsigned int *)p = htonl(TRACE_REQUEST_TSYNC_PROTOS);
+ p += sizeof(int);
+ *(unsigned int *)p = htonl(protos_size);
+ p += sizeof(int);
+
+ names = protos->names;
+ while (*names) {
+ strcpy(p, *names);
+ p += strlen(*names) + 1;
+ names++;
+ }
+ p = NULL;
+
+ *size += buf_size;
+ *buf = nbuf;
+ return 0;
+}
+
+static int make_trace_req_args(char **buf, int *size, int argc, char **argv)
+{
+ size_t args_size;
+ size_t buf_size;
+ char *nbuf;
+ char *p;
+ int i;
+
+ args_size = sizeof(int);
+ for (i = 0; i < argc; i++)
+ args_size += strlen(argv[i]) + 1;
+
+ buf_size = TRACE_REQ_PARAM_SIZE + args_size;
+ nbuf = realloc(*buf, *size + buf_size);
+ if (!nbuf)
+ return -1;
+
+ p = nbuf + *size;
+ memset(p, 0, buf_size);
+
+ *(unsigned int *)p = htonl(TRACE_REQUEST_ARGS);
+ p += sizeof(int);
+ *(unsigned int *)p = htonl(args_size);
+ p += sizeof(int);
+
+ *(unsigned int *)p = htonl(argc);
+ p += sizeof(int);
+ for (i = 0; i < argc; i++)
+ p = stpcpy(p, argv[i]) + 1;
+
+ *size += buf_size;
+ *buf = nbuf;
+ return 0;
+}
+
+static int make_trace_req(struct tracecmd_msg *msg, int argc, char **argv,
+ bool use_fifos, unsigned long long trace_id,
+ struct tracecmd_tsync_protos *protos)
+{
+ int size = 0;
+ char *buf = NULL;
+
+ msg->trace_req.flags = 0;
+ if (use_fifos)
+ msg->trace_req.flags |= MSG_TRACE_USE_FIFOS;
+ msg->trace_req.flags = htonl(msg->trace_req.flags);
+ msg->trace_req.trace_id = htonll(trace_id);
+
+ if (argc && argv)
+ make_trace_req_args(&buf, &size, argc, argv);
+ if (protos && protos->names)
+ make_trace_req_protos(&buf, &size, protos);
+
+ msg->buf = buf;
+ msg->hdr.size = htonl(ntohl(msg->hdr.size) + size);
+
+ return 0;
+}
+
+int tracecmd_msg_send_trace_req(struct tracecmd_msg_handle *msg_handle,
+ int argc, char **argv, bool use_fifos,
+ unsigned long long trace_id,
+ struct tracecmd_tsync_protos *protos)
+{
+ struct tracecmd_msg msg;
+ int ret;
+
+ tracecmd_msg_init(MSG_TRACE_REQ, &msg);
+ ret = make_trace_req(&msg, argc, argv, use_fifos, trace_id, protos);
+ if (ret < 0)
+ return ret;
+
+ return tracecmd_msg_send(msg_handle, &msg);
+}
+
+static int get_trace_req_protos(char *buf, int length,
+ struct tracecmd_tsync_protos **protos)
+{
+ struct tracecmd_tsync_protos *plist = NULL;
+ int count = 0;
+ char *p;
+ int i, j;
+
+ i = length;
+ p = buf;
+ while (i > 0) {
+ i -= strlen(p) + 1;
+ count++;
+ p += strlen(p) + 1;
+ }
+
+ plist = calloc(1, sizeof(struct tracecmd_tsync_protos));
+ if (!plist)
+ goto error;
+ plist->names = calloc(count + 1, sizeof(char *));
+ if (!plist->names)
+ goto error;
+ i = length;
+ p = buf;
+ j = 0;
+ while (i > 0 && j < (count - 1)) {
+ i -= strlen(p) + 1;
+ plist->names[j++] = strdup(p);
+ p += strlen(p) + 1;
+ }
+
+ *protos = plist;
+ return 0;
+error:
+ if (plist) {
+ free(plist->names);
+ free(plist);
+ }
+ return -1;
+}
+
+static int get_trace_req_args(char *buf, int length, int *argc, char ***argv)
+{
+ unsigned int nr_args;
+ char *p, *buf_end;
+ char **args = NULL;
+ char *vagrs = NULL;
+ int ret;
+ int i;
+
+ if (length <= sizeof(int) || buf[length - 1] != '\0') {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ nr_args = ntohl(*(unsigned int *)buf);
+ buf += sizeof(int);
+ length -= sizeof(int);
+
+ args = calloc(nr_args, sizeof(*args));
+ if (!args) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ vagrs = calloc(length, sizeof(char));
+ if (!vagrs) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ memcpy(vagrs, buf, length);
+ buf_end = vagrs + length;
+ for (i = 0, p = vagrs; i < nr_args; i++, p++) {
+ if (p >= buf_end) {
+ ret = -EINVAL;
+ goto out;
+ }
+ args[i] = p;
+ p = strchr(p, '\0');
+ }
+
+ *argc = nr_args;
+ *argv = args;
+ return 0;
+
+out:
+ free(args);
+ free(vagrs);
+ return ret;
+
+}
+
+/*
+ * NOTE: On success, the returned `argv` should be freed with:
+ * free(argv[0]);
+ * free(argv);
+ * and `tsync_protos` with free(tsync_protos);
+ */
+int tracecmd_msg_recv_trace_req(struct tracecmd_msg_handle *msg_handle,
+ int *argc, char ***argv, bool *use_fifos,
+ unsigned long long *trace_id,
+ struct tracecmd_tsync_protos **protos)
+{
+ struct tracecmd_msg msg;
+ unsigned int param_id;
+ int param_length;
+ ssize_t buf_len;
+ char *p;
+ int ret;
+
+ ret = tracecmd_msg_recv(msg_handle->fd, &msg);
+ if (ret < 0)
+ return ret;
+
+ if (ntohl(msg.hdr.cmd) != MSG_TRACE_REQ) {
+ ret = -ENOTSUP;
+ goto out;
+ }
+
+ buf_len = ntohl(msg.hdr.size) - MSG_HDR_LEN - ntohl(msg.hdr.cmd_size);
+ if (buf_len < 0) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ *use_fifos = ntohl(msg.trace_req.flags) & MSG_TRACE_USE_FIFOS;
+ *trace_id = ntohll(msg.trace_req.trace_id);
+ p = msg.buf;
+ while (buf_len > 2 * sizeof(int)) {
+ param_id = ntohl(*((unsigned int *)p));
+ p += sizeof(int);
+ buf_len -= sizeof(int);
+ param_length = ntohl(*((unsigned int *)p));
+ p += sizeof(int);
+ buf_len -= sizeof(int);
+ if (buf_len < param_length)
+ break;
+ ret = 0;
+ switch (param_id) {
+ case TRACE_REQUEST_ARGS:
+ ret = get_trace_req_args(p, param_length, argc, argv);
+ break;
+ case TRACE_REQUEST_TSYNC_PROTOS:
+ ret = get_trace_req_protos(p, param_length, protos);
+ break;
+ default:
+ break;
+ }
+ if (ret)
+ break;
+ buf_len -= param_length;
+ p += param_length;
+ }
+
+ msg_free(&msg);
+ return 0;
+
+out:
+ error_operation(&msg);
+ if (ret == -EOPNOTSUPP)
+ handle_unexpected_msg(msg_handle, &msg);
+ msg_free(&msg);
+ return ret;
+}
+
+/**
+ * tracecmd_msg_send_time_sync - Send a time sync packet
+ * @msg_handle: message handle, holding the communication context
+ * @sync_protocol: name of the time synch protocol, string up to
+ * TRACECMD_TSYNC_PNAME_LENGTH characters length.
+ * @sync_msg_id: id if the time synch message, protocol dependent
+ * @payload_size: size of the packet payload, 0 in case of no payload
+ * @payload: pointer to the packet payload, or NULL in case of no payload
+ *
+ * Returns 0 if packet is sent successfully, or negative error otherwise.
+ */
+int tracecmd_msg_send_time_sync(struct tracecmd_msg_handle *msg_handle,
+ char *sync_protocol, unsigned int sync_msg_id,
+ unsigned int payload_size, char *payload)
+{
+ struct tracecmd_msg msg;
+
+ tracecmd_msg_init(MSG_TIME_SYNC, &msg);
+ strncpy(msg.tsync.sync_protocol_name, sync_protocol, TRACECMD_TSYNC_PNAME_LENGTH);
+ msg.tsync.sync_msg_id = htonl(sync_msg_id);
+ msg.hdr.size = htonl(ntohl(msg.hdr.size) + payload_size);
+
+ msg.buf = payload;
+ return msg_send_nofree(msg_handle, &msg);
+}
+
+/**
+ * tracecmd_msg_recv_time_sync - Receive a time sync packet
+ * @msg_handle: message handle, holding the communication context
+ * @sync_protocol: return the name of the packet's time synch protocol.
+ * It must point to a prealocated buffer with size
+ * TRACECMD_TSYNC_PNAME_LENGTH
+ * @sync_msg_id: return the id of the packet's time synch message
+ * @payload_size: size of the packet's payload, can be:
+ * NULL - the payload is not interested and should be ignored
+ * pointer to int, with value 0 - update with the size of the payload
+ * allocate memory and cpy the payload
+ * into it
+ * pointer to int, with value greater than 0 - expected size of the
+ * payload, preallocated
+ * memory is passed to the API
+ * with that size
+ *@payload: pointer to the packet payload, can be:
+ * NULL - the payload is not interested and should be ignored
+ * pointer to char *, with value NULL - a new memory is allocated and returned
+ * here, containing the packet's payload
+ * the @payload_size is updated with the
+ * size of the allocated memory. It must be
+ * freed by free()
+ * pointer to char *, with no-NULL value - A prealocated array is passed, with size
+ * @payload_size. If payload's size is equal
+ * or less, it will be copied here.
+ *
+ * Returns 0 if packet is received successfully, or negative error otherwise.
+ */
+int tracecmd_msg_recv_time_sync(struct tracecmd_msg_handle *msg_handle,
+ char *sync_protocol,
+ unsigned int *sync_msg_id,
+ unsigned int *payload_size, char **payload)
+{
+ struct tracecmd_msg msg;
+ int ret = -1;
+ int buf_size;
+
+ memset(&msg, 0, sizeof(msg));
+ ret = tracecmd_msg_recv(msg_handle->fd, &msg);
+ if (ret < 0)
+ goto out;
+
+ if (ntohl(msg.hdr.cmd) != MSG_TIME_SYNC) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
+ if (sync_protocol)
+ strncpy(sync_protocol, msg.tsync.sync_protocol_name,
+ TRACECMD_TSYNC_PNAME_LENGTH);
+ if (sync_msg_id)
+ *sync_msg_id = ntohl(msg.tsync.sync_msg_id);
+
+ buf_size = msg_buf_len(&msg);
+ if (buf_size < 0) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (buf_size && payload && payload_size) {
+ if (*payload_size) {
+ if (*payload_size < buf_size || *payload == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ memcpy(*payload, msg.buf, buf_size);
+ goto out;
+ }
+
+ *payload = malloc(buf_size);
+ if (*payload == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ *payload_size = buf_size;
+ memcpy(*payload, msg.buf, buf_size);
+ }
+
+out:
+ msg_free(&msg);
+ return ret;
+}
+
+static int make_trace_resp(struct tracecmd_msg *msg, int page_size, int nr_cpus,
+ unsigned int *ports, bool use_fifos,
+ unsigned long long trace_id,
+ const char *tsync_proto,
+ unsigned int tsync_port)
+{
+ int data_size;
+
+ if (!tsync_proto)
+ tsync_proto = "";
+
+ data_size = write_uints(NULL, 0, ports, nr_cpus);
+ msg->buf = malloc(data_size);
+ if (!msg->buf)
+ return -ENOMEM;
+ write_uints(msg->buf, data_size, ports, nr_cpus);
+
+ msg->hdr.size = htonl(ntohl(msg->hdr.size) + data_size);
+ msg->trace_resp.flags = use_fifos ? MSG_TRACE_USE_FIFOS : 0;
+ msg->trace_resp.flags = htonl(msg->trace_resp.flags);
+ strncpy(msg->trace_resp.tsync_proto_name, tsync_proto, TRACECMD_TSYNC_PNAME_LENGTH);
+ msg->trace_resp.tsync_port = htonl(tsync_port);
+
+ msg->trace_resp.cpus = htonl(nr_cpus);
+ msg->trace_resp.page_size = htonl(page_size);
+ msg->trace_resp.trace_id = htonll(trace_id);
+
+ return 0;
+}
+
+int tracecmd_msg_send_trace_resp(struct tracecmd_msg_handle *msg_handle,
+ int nr_cpus, int page_size,
+ unsigned int *ports, bool use_fifos,
+ unsigned long long trace_id,
+ const char *tsync_proto, unsigned int tsync_port)
+{
+ struct tracecmd_msg msg;
+ int ret;
+
+ tracecmd_msg_init(MSG_TRACE_RESP, &msg);
+ ret = make_trace_resp(&msg, page_size, nr_cpus, ports,
+ use_fifos, trace_id, tsync_proto, tsync_port);
+ if (ret < 0)
+ return ret;
+
+ return tracecmd_msg_send(msg_handle, &msg);
+}
+
+int tracecmd_msg_recv_trace_resp(struct tracecmd_msg_handle *msg_handle,
+ int *nr_cpus, int *page_size,
+ unsigned int **ports, bool *use_fifos,
+ unsigned long long *trace_id,
+ char **tsync_proto,
+ unsigned int *tsync_port)
+{
+ struct tracecmd_msg msg;
+ char *p, *buf_end;
+ ssize_t buf_len;
+ int i, ret;
+
+ ret = tracecmd_msg_recv(msg_handle->fd, &msg);
+ if (ret < 0)
+ return ret;
+
+ if (ntohl(msg.hdr.cmd) != MSG_TRACE_RESP) {
+ ret = -ENOTSUP;
+ goto out;
+ }
+
+ buf_len = msg_buf_len(&msg);
+ if (buf_len <= 0) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ *use_fifos = ntohl(msg.trace_resp.flags) & MSG_TRACE_USE_FIFOS;
+ *nr_cpus = ntohl(msg.trace_resp.cpus);
+ *page_size = ntohl(msg.trace_resp.page_size);
+ *trace_id = ntohll(msg.trace_resp.trace_id);
+ *tsync_proto = strdup(msg.trace_resp.tsync_proto_name);
+ *tsync_port = ntohl(msg.trace_resp.tsync_port);
+ *ports = calloc(*nr_cpus, sizeof(**ports));
+ if (!*ports) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ buf_end = msg.buf + buf_len;
+ for (i = 0, p = msg.buf; i < *nr_cpus; i++, p++) {
+ if (p >= buf_end || tatou(p, &(*ports)[i])) {
+ free(*ports);
+ ret = -EINVAL;
+ goto out;
+ }
+ p = strchr(p, '\0');
+ }
+
+ msg_free(&msg);
+ return 0;
+
+out:
+ error_operation(&msg);
+ if (ret == -EOPNOTSUPP)
+ handle_unexpected_msg(msg_handle, &msg);
+ msg_free(&msg);
+ return ret;
+}
diff --git a/lib/trace-cmd/trace-output.c b/lib/trace-cmd/trace-output.c
new file mode 100644
index 00000000..ca7132e1
--- /dev/null
+++ b/lib/trace-cmd/trace-output.c
@@ -0,0 +1,2819 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#define _LARGEFILE64_SOURCE
+#include <dirent.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <errno.h>
+#include <glob.h>
+
+#include "tracefs.h"
+#include "trace-cmd.h"
+#include "trace-cmd-local.h"
+#include "trace-write-local.h"
+#include "list.h"
+#include "trace-msg.h"
+
+/* We can't depend on the host size for size_t, all must be 64 bit */
+typedef unsigned long long tsize_t;
+typedef long long stsize_t;
+
+struct tracecmd_option {
+ unsigned short id;
+ int size;
+ void *data;
+ tsize_t offset;
+ struct list_head list;
+};
+
+struct tracecmd_buffer {
+ int cpus;
+ void *name;
+ tsize_t offset;
+ struct tracecmd_option *option;
+ struct list_head list;
+};
+
+enum {
+ OUTPUT_FL_SEND_META = (1 << 0),
+};
+
+struct tracecmd_output {
+ int fd;
+ int page_size;
+ int cpus;
+ struct tep_handle *pevent;
+ char *tracing_dir;
+ char *kallsyms;
+ int nr_options;
+ bool quiet;
+ unsigned long file_state;
+ unsigned long file_version;
+
+ /* size of meta-data strings, not yet stored in the file */
+ unsigned long strings_p;
+ /* current virtual offset of meta-data string */
+ unsigned long strings_offs;
+
+ unsigned long long options_start;
+ bool big_endian;
+ bool do_compress;
+ struct tracecmd_compression *compress;
+
+ struct list_head options;
+ struct list_head buffers;
+ struct tracecmd_msg_handle *msg_handle;
+ char *trace_clock;
+
+ /* meta-data strings, not yet stored in the file */
+ char *strings;
+};
+
+struct list_event {
+ struct list_event *next;
+ char *name;
+ char *file;
+};
+
+struct list_event_system {
+ struct list_event_system *next;
+ struct list_event *events;
+ char *name;
+};
+
+#define HAS_SECTIONS(H) ((H)->file_version >= FILE_VERSION_SECTIONS)
+
+static int write_options(struct tracecmd_output *handle);
+static int save_string_section(struct tracecmd_output *handle, bool compress);
+
+__hidden long long
+do_write_check(struct tracecmd_output *handle, const void *data, long long size)
+{
+ if (handle->do_compress)
+ return tracecmd_compress_buffer_write(handle->compress, data, size);
+
+ if (handle->msg_handle)
+ return tracecmd_msg_data_send(handle->msg_handle, data, size);
+
+ return __do_write_check(handle->fd, data, size);
+}
+
+static inline off64_t do_lseek(struct tracecmd_output *handle, off_t offset, int whence)
+{
+ if (handle->do_compress)
+ return tracecmd_compress_lseek(handle->compress, offset, whence);
+
+ if (handle->msg_handle)
+ return msg_lseek(handle->msg_handle, offset, whence);
+
+ return lseek64(handle->fd, offset, whence);
+}
+
+static inline int do_preed(struct tracecmd_output *handle, void *dst, int len, off_t offset)
+{
+ if (handle->do_compress)
+ return tracecmd_compress_pread(handle->compress, dst, len, offset);
+
+ return pread(handle->fd, dst, len, offset);
+}
+
+static short convert_endian_2(struct tracecmd_output *handle, short val)
+{
+ if (!handle->pevent)
+ return val;
+
+ return tep_read_number(handle->pevent, &val, 2);
+}
+
+static int convert_endian_4(struct tracecmd_output *handle, int val)
+{
+ if (!handle->pevent)
+ return val;
+
+ return tep_read_number(handle->pevent, &val, 4);
+}
+
+static unsigned long long convert_endian_8(struct tracecmd_output *handle,
+ unsigned long long val)
+{
+ if (!handle->pevent)
+ return val;
+
+ return tep_read_number(handle->pevent, &val, 8);
+}
+
+__hidden void out_compression_reset(struct tracecmd_output *handle, bool compress)
+{
+ if (!compress || !handle->compress)
+ return;
+
+ tracecmd_compress_reset(handle->compress);
+ handle->do_compress = false;
+}
+
+__hidden int out_uncompress_block(struct tracecmd_output *handle)
+{
+ int ret = 0;
+
+ if (!handle->compress)
+ return 0;
+
+ ret = tracecmd_uncompress_block(handle->compress);
+ if (!ret)
+ handle->do_compress = true;
+
+ return ret;
+}
+
+__hidden int out_compression_start(struct tracecmd_output *handle, bool compress)
+{
+ if (!compress || !handle->compress)
+ return 0;
+
+ tracecmd_compress_reset(handle->compress);
+ handle->do_compress = true;
+
+ return 0;
+}
+
+__hidden int out_compression_end(struct tracecmd_output *handle, bool compress)
+{
+ if (!compress || !handle->compress)
+ return 0;
+
+ handle->do_compress = false;
+ return tracecmd_compress_block(handle->compress);
+}
+
+static long add_string(struct tracecmd_output *handle, const char *string)
+{
+ int size = strlen(string) + 1;
+ int pos = handle->strings_p;
+ char *strings;
+
+ strings = realloc(handle->strings, pos + size);
+ if (!strings)
+ return -1;
+ handle->strings = strings;
+ memcpy(handle->strings + pos, string, size);
+ handle->strings_p += size;
+
+ return handle->strings_offs + pos;
+}
+
+/**
+ * tracecmd_set_quiet - Set if to print output to the screen
+ * @quiet: If non zero, print no output to the screen
+ *
+ */
+void tracecmd_set_quiet(struct tracecmd_output *handle, bool set_quiet)
+{
+ if (handle)
+ handle->quiet = set_quiet;
+}
+
+void tracecmd_set_out_clock(struct tracecmd_output *handle, const char *clock)
+{
+ if (handle && clock) {
+ free(handle->trace_clock);
+ handle->trace_clock = strdup(clock);
+ }
+}
+
+/**
+ * tracecmd_get_quiet - Get if to print output to the screen
+ * Returns non zero, if no output to the screen should be printed
+ *
+ */
+bool tracecmd_get_quiet(struct tracecmd_output *handle)
+{
+ if (handle)
+ return handle->quiet;
+ return false;
+}
+
+void tracecmd_output_free(struct tracecmd_output *handle)
+{
+ struct tracecmd_option *option;
+ struct tracecmd_buffer *buffer;
+
+ if (!handle)
+ return;
+
+ if (handle->tracing_dir)
+ free(handle->tracing_dir);
+
+ if (handle->pevent)
+ tep_unref(handle->pevent);
+
+ while (!list_empty(&handle->buffers)) {
+ buffer = container_of(handle->buffers.next,
+ struct tracecmd_buffer, list);
+ list_del(&buffer->list);
+ free(buffer->name);
+ free(buffer);
+ }
+ while (!list_empty(&handle->options)) {
+ option = container_of(handle->options.next,
+ struct tracecmd_option, list);
+ list_del(&option->list);
+ free(option->data);
+ free(option);
+ }
+
+ free(handle->strings);
+ free(handle->trace_clock);
+ tracecmd_compress_destroy(handle->compress);
+ free(handle);
+}
+
+void tracecmd_output_close(struct tracecmd_output *handle)
+{
+ if (!handle)
+ return;
+
+ if (HAS_SECTIONS(handle)) {
+ /* write any unsaved options at the end of trace files with sections */
+ write_options(handle);
+
+ /* write strings section */
+ save_string_section(handle, true);
+ }
+
+ if (handle->fd >= 0) {
+ close(handle->fd);
+ handle->fd = -1;
+ }
+
+ tracecmd_output_free(handle);
+}
+static unsigned long get_size_fd(int fd)
+{
+ unsigned long long size = 0;
+ char buf[BUFSIZ];
+ int r;
+
+ do {
+ r = read(fd, buf, BUFSIZ);
+ if (r > 0)
+ size += r;
+ } while (r > 0);
+
+ lseek(fd, 0, SEEK_SET);
+
+ return size;
+}
+
+static unsigned long get_size(const char *file)
+{
+ unsigned long long size = 0;
+ int fd;
+
+ fd = open(file, O_RDONLY);
+ if (fd < 0) {
+ tracecmd_warning("Can't read '%s'", file);
+ return 0; /* Caller will fail with zero */
+ }
+ size = get_size_fd(fd);
+ close(fd);
+
+ return size;
+}
+
+static tsize_t copy_file_fd(struct tracecmd_output *handle, int fd, unsigned long long max)
+{
+ tsize_t rsize = BUFSIZ;
+ tsize_t size = 0;
+ char buf[BUFSIZ];
+ stsize_t r;
+
+ do {
+ if (max && rsize > max)
+ rsize = max;
+
+ r = read(fd, buf, rsize);
+ if (r > 0) {
+ size += r;
+ if (do_write_check(handle, buf, r))
+ return 0;
+ if (max) {
+ max -= r;
+ if (!max)
+ break;
+ }
+ }
+ } while (r > 0);
+
+ return size;
+}
+
+static tsize_t copy_file(struct tracecmd_output *handle,
+ const char *file)
+{
+ tsize_t size = 0;
+ int fd;
+
+ fd = open(file, O_RDONLY);
+ if (fd < 0) {
+ tracecmd_warning("Can't read '%s'", file);
+ return 0;
+ }
+ size = copy_file_fd(handle, fd, 0);
+ close(fd);
+
+ return size;
+}
+
+#define PAGES_IN_CHUNK 10
+__hidden unsigned long long out_copy_fd_compress(struct tracecmd_output *handle,
+ int fd, unsigned long long max,
+ unsigned long long *write_size,
+ int page)
+{
+ unsigned long long rsize = 0;
+ unsigned long long wsize = 0;
+ unsigned long long size;
+ int ret;
+
+ if (handle->compress) {
+ rsize = max;
+ ret = tracecmd_compress_copy_from(handle->compress, fd,
+ PAGES_IN_CHUNK * page,
+ &rsize, &wsize);
+ if (ret < 0)
+ return 0;
+
+ size = rsize;
+ if (write_size)
+ *write_size = wsize;
+ } else {
+ size = copy_file_fd(handle, fd, max);
+ if (write_size)
+ *write_size = size;
+ }
+
+ return size;
+}
+
+static tsize_t copy_file_compress(struct tracecmd_output *handle,
+ const char *file, unsigned long long *write_size)
+{
+ int ret;
+ int fd;
+
+ fd = open(file, O_RDONLY);
+ if (fd < 0) {
+ tracecmd_warning("Can't read '%s'", file);
+ return 0;
+ }
+
+ ret = out_copy_fd_compress(handle, fd, 0, write_size, getpagesize());
+ if (!ret)
+ tracecmd_warning("Can't compress '%s'", file);
+
+ close(fd);
+ return ret;
+}
+
+/*
+ * Finds the path to the debugfs/tracing
+ * Allocates the string and stores it.
+ */
+static const char *find_tracing_dir(struct tracecmd_output *handle)
+{
+ if (!handle->tracing_dir) {
+ const char *dir = tracefs_tracing_dir();
+
+ if (dir)
+ handle->tracing_dir = strdup(dir);
+ }
+ return handle->tracing_dir;
+}
+
+static char *get_tracing_file(struct tracecmd_output *handle, const char *name)
+{
+ const char *tracing;
+ char *file;
+ int ret;
+
+ tracing = find_tracing_dir(handle);
+ if (!tracing)
+ return NULL;
+
+ ret = asprintf(&file, "%s/%s", tracing, name);
+ if (ret < 0)
+ return NULL;
+
+ return file;
+}
+
+static void put_tracing_file(char *file)
+{
+ free(file);
+}
+
+int tracecmd_ftrace_enable(int set)
+{
+ struct stat buf;
+ char *path = "/proc/sys/kernel/ftrace_enabled";
+ int fd;
+ char *val = set ? "1" : "0";
+ int ret = 0;
+
+ /* if ftace_enable does not exist, simply ignore it */
+ fd = stat(path, &buf);
+ if (fd < 0)
+ return ENODEV;
+
+ fd = open(path, O_WRONLY);
+ if (fd < 0) {
+ tracecmd_warning("Can't %s ftrace", set ? "enable" : "disable");
+ return EIO;
+ }
+
+ if (write(fd, val, 1) < 0)
+ ret = -1;
+ close(fd);
+
+ return ret;
+}
+
+__hidden unsigned long long
+out_write_section_header(struct tracecmd_output *handle, unsigned short header_id,
+ char *description, int flags, bool option)
+{
+ tsize_t endian8;
+ tsize_t offset;
+ long long size;
+ short endian2;
+ int endian4;
+ int desc;
+
+ if (header_id >= TRACECMD_OPTION_MAX)
+ return -1;
+ if (!HAS_SECTIONS(handle))
+ return 0;
+ if (!handle->compress)
+ flags &= ~TRACECMD_SEC_FL_COMPRESS;
+ offset = do_lseek(handle, 0, SEEK_CUR);
+ if (option) {
+ endian8 = convert_endian_8(handle, offset);
+ if (!tracecmd_add_option(handle, header_id, 8, &endian8))
+ return -1;
+ }
+ /* Section ID */
+ endian2 = convert_endian_2(handle, header_id);
+ if (do_write_check(handle, &endian2, 2))
+ return (off64_t)-1;
+
+ /* Section flags */
+ endian2 = convert_endian_2(handle, flags);
+ if (do_write_check(handle, &endian2, 2))
+ return (off64_t)-1;
+
+ /* Section description */
+ if (description)
+ desc = add_string(handle, description);
+ else
+ desc = -1;
+ endian4 = convert_endian_4(handle, desc);
+ if (do_write_check(handle, &endian4, 4))
+ return (off64_t)-1;
+
+ offset = do_lseek(handle, 0, SEEK_CUR);
+ size = 0;
+ /* Reserve for section size */
+ if (do_write_check(handle, &size, 8))
+ return (off64_t)-1;
+ return offset;
+}
+
+__hidden int out_update_section_header(struct tracecmd_output *handle, tsize_t offset)
+{
+ tsize_t current;
+ tsize_t endian8;
+ tsize_t size;
+
+ if (!HAS_SECTIONS(handle) || offset == 0)
+ return 0;
+
+ current = do_lseek(handle, 0, SEEK_CUR);
+ /* The real size is the difference between the saved offset and
+ * the current offset - 8 bytes, the reserved space for the section size.
+ */
+ size = current - offset;
+ if (size < 8)
+ return -1;
+ size -= 8;
+ if (do_lseek(handle, offset, SEEK_SET) == (off64_t)-1)
+ return -1;
+
+ endian8 = convert_endian_8(handle, size);
+ if (do_write_check(handle, &endian8, 8))
+ return -1;
+ if (do_lseek(handle, current, SEEK_SET) == (off64_t)-1)
+ return -1;
+ return 0;
+}
+
+static int save_string_section(struct tracecmd_output *handle, bool compress)
+{
+ enum tracecmd_section_flags flags = 0;
+ tsize_t offset;
+
+ if (!handle->strings || !handle->strings_p)
+ return 0;
+
+ if (!check_out_state(handle, TRACECMD_OPTION_STRINGS)) {
+ tracecmd_warning("Cannot write strings, unexpected state 0x%X",
+ handle->file_state);
+ return -1;
+ }
+
+ if (compress)
+ flags |= TRACECMD_SEC_FL_COMPRESS;
+ offset = out_write_section_header(handle, TRACECMD_OPTION_STRINGS, "strings", flags, false);
+ if (offset == (off64_t)-1)
+ return -1;
+
+ out_compression_start(handle, compress);
+
+ if (do_write_check(handle, handle->strings, handle->strings_p))
+ goto error;
+
+ if (out_compression_end(handle, compress))
+ goto error;
+
+ if (out_update_section_header(handle, offset))
+ return -1;
+
+ handle->strings_offs += handle->strings_p;
+ free(handle->strings);
+ handle->strings = NULL;
+ handle->strings_p = 0;
+ handle->file_state = TRACECMD_OPTION_STRINGS;
+ return 0;
+
+error:
+ out_compression_reset(handle, compress);
+ return -1;
+}
+
+static int read_header_files(struct tracecmd_output *handle, bool compress)
+{
+ enum tracecmd_section_flags flags = 0;
+ tsize_t size, check_size, endian8;
+ struct stat st;
+ tsize_t offset;
+ char *path;
+ int fd = -1;
+ int ret;
+
+ if (!check_out_state(handle, TRACECMD_FILE_HEADERS)) {
+ tracecmd_warning("Cannot read header files, unexpected state 0x%X",
+ handle->file_state);
+ return -1;
+ }
+
+ path = get_tracing_file(handle, "events/header_page");
+ if (!path)
+ return -1;
+
+ if (compress)
+ flags |= TRACECMD_SEC_FL_COMPRESS;
+ offset = out_write_section_header(handle, TRACECMD_OPTION_HEADER_INFO,
+ "headers", flags, true);
+ if (offset == (off64_t)-1)
+ return -1;
+
+ out_compression_start(handle, compress);
+ ret = stat(path, &st);
+ if (ret < 0) {
+ /* old style did not show this info, just add zero */
+ put_tracing_file(path);
+ if (do_write_check(handle, "header_page", 12))
+ goto out_close;
+ size = 0;
+ if (do_write_check(handle, &size, 8))
+ goto out_close;
+ if (do_write_check(handle, "header_event", 13))
+ goto out_close;
+ if (do_write_check(handle, &size, 8))
+ goto out_close;
+ if (out_compression_end(handle, compress))
+ goto out_close;
+ if (out_update_section_header(handle, offset))
+ goto out_close;
+ return 0;
+ }
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0) {
+ tracecmd_warning("can't read '%s'", path);
+ goto out_close;
+ }
+
+ /* unfortunately, you can not stat debugfs files for size */
+ size = get_size_fd(fd);
+
+ if (do_write_check(handle, "header_page", 12))
+ goto out_close;
+ endian8 = convert_endian_8(handle, size);
+ if (do_write_check(handle, &endian8, 8))
+ goto out_close;
+ check_size = copy_file_fd(handle, fd, 0);
+ close(fd);
+ if (size != check_size) {
+ tracecmd_warning("wrong size for '%s' size=%lld read=%lld", path, size, check_size);
+ errno = EINVAL;
+ goto out_close;
+ }
+ put_tracing_file(path);
+
+ path = get_tracing_file(handle, "events/header_event");
+ if (!path)
+ goto out_close;
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0) {
+ tracecmd_warning("can't read '%s'", path);
+ goto out_close;
+ }
+
+ size = get_size_fd(fd);
+
+ if (do_write_check(handle, "header_event", 13))
+ goto out_close;
+ endian8 = convert_endian_8(handle, size);
+ if (do_write_check(handle, &endian8, 8))
+ goto out_close;
+ check_size = copy_file_fd(handle, fd, 0);
+ close(fd);
+ if (size != check_size) {
+ tracecmd_warning("wrong size for '%s'", path);
+ goto out_close;
+ }
+ put_tracing_file(path);
+ if (out_compression_end(handle, compress))
+ goto out_close;
+
+ if (out_update_section_header(handle, offset))
+ goto out_close;
+ handle->file_state = TRACECMD_FILE_HEADERS;
+
+ return 0;
+
+ out_close:
+ out_compression_reset(handle, compress);
+ if (fd >= 0)
+ close(fd);
+ return -1;
+}
+
+static int copy_event_system(struct tracecmd_output *handle,
+ struct list_event_system *slist)
+{
+ struct list_event *elist;
+ unsigned long long size, check_size, endian8;
+ struct stat st;
+ char *format;
+ int endian4;
+ int count = 0;
+ int ret;
+
+ for (elist = slist->events; elist; elist = elist->next)
+ count++;
+
+ endian4 = convert_endian_4(handle, count);
+ if (do_write_check(handle, &endian4, 4))
+ return -1;
+
+ for (elist = slist->events; elist; elist = elist->next) {
+ format = elist->file;
+ ret = stat(format, &st);
+
+ if (ret >= 0) {
+ /* unfortunately, you can not stat debugfs files for size */
+ size = get_size(format);
+ endian8 = convert_endian_8(handle, size);
+ if (do_write_check(handle, &endian8, 8))
+ return -1;
+ check_size = copy_file(handle, format);
+ if (size != check_size) {
+ tracecmd_warning("error in size of file '%s'", format);
+ return -1;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static void add_list_event_system(struct list_event_system **systems,
+ const char *system,
+ const char *event,
+ const char *path)
+{
+ struct list_event_system *slist;
+ struct list_event *elist;
+
+ for (slist = *systems; slist; slist = slist->next)
+ if (strcmp(slist->name, system) == 0)
+ break;
+
+ if (!slist) {
+ slist = malloc(sizeof(*slist));
+ if (!slist)
+ goto err_mem;
+ slist->name = strdup(system);
+ if (!slist->name) {
+ free(slist);
+ goto err_mem;
+ }
+ slist->next = *systems;
+ slist->events = NULL;
+ *systems = slist;
+ }
+
+ for (elist = slist->events; elist; elist = elist->next)
+ if (strcmp(elist->name, event) == 0)
+ break;
+
+ if (!elist) {
+ elist = malloc(sizeof(*elist));
+ if (!elist)
+ goto err_mem;
+ elist->name = strdup(event);
+ elist->file = strdup(path);
+ if (!elist->name || !elist->file) {
+ free(elist->name);
+ free(elist->file);
+ free(elist);
+ goto err_mem;
+ }
+ elist->next = slist->events;
+ slist->events = elist;
+ }
+ return;
+ err_mem:
+ tracecmd_warning("Insufficient memory");
+}
+
+static void free_list_events(struct list_event_system *list)
+{
+ struct list_event_system *slist;
+ struct list_event *elist;
+
+ while (list) {
+ slist = list;
+ list = list->next;
+ while (slist->events) {
+ elist = slist->events;
+ slist->events = elist->next;
+ free(elist->name);
+ free(elist->file);
+ free(elist);
+ }
+ free(slist->name);
+ free(slist);
+ }
+}
+
+static void glob_events(struct tracecmd_output *handle,
+ struct list_event_system **systems,
+ const char *str)
+{
+ glob_t globbuf;
+ char *events_path;
+ char *system;
+ char *event;
+ char *path;
+ char *file;
+ char *ptr;
+ int do_ftrace = 0;
+ int events_len;
+ int ret;
+ int i;
+
+ if (strncmp(str, "ftrace/", 7) == 0)
+ do_ftrace = 1;
+
+ events_path = get_tracing_file(handle, "events");
+ events_len = strlen(events_path);
+
+ path = malloc(events_len + strlen(str) +
+ strlen("/format") + 2);
+ if (!path)
+ return;
+ path[0] = '\0';
+ strcat(path, events_path);
+ strcat(path, "/");
+ strcat(path, str);
+ strcat(path, "/format");
+ put_tracing_file(events_path);
+
+ globbuf.gl_offs = 0;
+ ret = glob(path, 0, NULL, &globbuf);
+ free(path);
+ if (ret < 0)
+ return;
+
+ for (i = 0; i < globbuf.gl_pathc; i++) {
+ file = globbuf.gl_pathv[i];
+ system = strdup(file + events_len + 1);
+ system = strtok_r(system, "/", &ptr);
+ if (!ptr) {
+ /* ?? should we warn? */
+ free(system);
+ continue;
+ }
+
+ if (!do_ftrace && strcmp(system, "ftrace") == 0) {
+ free(system);
+ continue;
+ }
+
+ event = strtok_r(NULL, "/", &ptr);
+ if (!ptr) {
+ /* ?? should we warn? */
+ free(system);
+ continue;
+ }
+
+ add_list_event_system(systems, system, event, file);
+ free(system);
+ }
+ globfree(&globbuf);
+}
+
+static void
+create_event_list_item(struct tracecmd_output *handle,
+ struct list_event_system **systems,
+ struct tracecmd_event_list *list)
+{
+ char *ptr;
+ char *str;
+
+ str = strdup(list->glob);
+ if (!str)
+ goto err_mem;
+
+ /* system and event names are separated by a ':' */
+ ptr = strchr(str, ':');
+ if (ptr)
+ *ptr = '/';
+ else
+ /* system and event may also be separated by a '/' */
+ ptr = strchr(str, '/');
+
+ if (ptr) {
+ glob_events(handle, systems, str);
+ free(str);
+ return;
+ }
+
+ ptr = str;
+ str = malloc(strlen(ptr) + 3);
+ if (!str)
+ goto err_mem;
+ str[0] = '\0';
+ strcat(str, ptr);
+ strcat(str, "/*");
+ glob_events(handle, systems, str);
+
+ str[0] = '\0';
+ strcat(str, "*/");
+ strcat(str, ptr);
+ glob_events(handle, systems, str);
+
+ free(ptr);
+ free(str);
+ return;
+ err_mem:
+ tracecmd_warning("Insufficient memory");
+}
+
+static int read_ftrace_files(struct tracecmd_output *handle, bool compress)
+{
+ enum tracecmd_section_flags flags = 0;
+ struct list_event_system *systems = NULL;
+ struct tracecmd_event_list list = { .glob = "ftrace/*" };
+ tsize_t offset;
+ int ret;
+
+ if (!check_out_state(handle, TRACECMD_FILE_FTRACE_EVENTS)) {
+ tracecmd_warning("Cannot read ftrace files, unexpected state 0x%X",
+ handle->file_state);
+ return -1;
+ }
+
+ if (compress)
+ flags |= TRACECMD_SEC_FL_COMPRESS;
+ offset = out_write_section_header(handle, TRACECMD_OPTION_FTRACE_EVENTS,
+ "ftrace events", flags, true);
+ if (offset == (off64_t)-1)
+ return -1;
+
+ create_event_list_item(handle, &systems, &list);
+ out_compression_start(handle, compress);
+
+ ret = copy_event_system(handle, systems);
+ if (!ret)
+ ret = out_compression_end(handle, compress);
+ else
+ out_compression_reset(handle, compress);
+
+ free_list_events(systems);
+ if (ret)
+ return ret;
+ if (out_update_section_header(handle, offset))
+ return -1;
+
+ handle->file_state = TRACECMD_FILE_FTRACE_EVENTS;
+
+ return ret;
+}
+
+static struct list_event_system *
+create_event_list(struct tracecmd_output *handle,
+ struct tracecmd_event_list *event_list)
+{
+ struct list_event_system *systems = NULL;
+ struct tracecmd_event_list *list;
+
+ for (list = event_list; list; list = list->next)
+ create_event_list_item(handle, &systems, list);
+
+ return systems;
+}
+
+static int read_event_files(struct tracecmd_output *handle,
+ struct tracecmd_event_list *event_list, bool compress)
+{
+ enum tracecmd_section_flags flags = 0;
+ struct list_event_system *systems;
+ struct list_event_system *slist;
+ struct tracecmd_event_list *list;
+ struct tracecmd_event_list all_events = { .glob = "*/*" };
+ int count = 0;
+ tsize_t offset;
+ int endian4;
+ int ret;
+
+ if (!check_out_state(handle, TRACECMD_FILE_ALL_EVENTS)) {
+ tracecmd_warning("Cannot read event files, unexpected state 0x%X",
+ handle->file_state);
+ return -1;
+ }
+
+ if (compress)
+ flags |= TRACECMD_SEC_FL_COMPRESS;
+ offset = out_write_section_header(handle, TRACECMD_OPTION_EVENT_FORMATS,
+ "events format", flags, true);
+ if (offset == (off64_t)-1)
+ return -1;
+ /*
+ * If any of the list is the special keyword "all" then
+ * just do all files.
+ */
+ for (list = event_list; list; list = list->next) {
+ if (strcmp(list->glob, "all") == 0)
+ break;
+ }
+ /* all events are listed, use a global glob */
+ if (!event_list || list)
+ event_list = &all_events;
+
+ systems = create_event_list(handle, event_list);
+
+ for (slist = systems; slist; slist = slist->next)
+ count++;
+ out_compression_start(handle, compress);
+ ret = -1;
+ endian4 = convert_endian_4(handle, count);
+ if (do_write_check(handle, &endian4, 4))
+ goto out_free;
+
+ ret = 0;
+ for (slist = systems; !ret && slist; slist = slist->next) {
+ if (do_write_check(handle, slist->name,
+ strlen(slist->name) + 1)) {
+ ret = -1;
+ continue;
+ }
+ ret = copy_event_system(handle, slist);
+ }
+ if (ret)
+ goto out_free;
+
+ ret = out_compression_end(handle, compress);
+ if (ret)
+ goto out_free;
+ ret = out_update_section_header(handle, offset);
+
+ out_free:
+ if (!ret)
+ handle->file_state = TRACECMD_FILE_ALL_EVENTS;
+ else
+ out_compression_reset(handle, compress);
+
+ free_list_events(systems);
+
+ return ret;
+}
+
+#define KPTR_UNINITIALIZED 'X'
+
+static void set_proc_kptr_restrict(int reset)
+{
+ char *path = "/proc/sys/kernel/kptr_restrict";
+ static char saved = KPTR_UNINITIALIZED;
+ int fd, ret = -1;
+ struct stat st;
+ char buf;
+
+ if ((reset && saved == KPTR_UNINITIALIZED) ||
+ (stat(path, &st) < 0))
+ return;
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ goto err;
+
+ if (reset) {
+ buf = saved;
+ } else {
+ if (read(fd, &buf, 1) < 0)
+ goto err;
+ saved = buf;
+ buf = '0';
+ }
+ close(fd);
+
+ fd = open(path, O_WRONLY);
+ if (fd < 0)
+ goto err;
+ if (write(fd, &buf, 1) > 0)
+ ret = 0;
+err:
+ if (fd > 0)
+ close(fd);
+ if (ret)
+ tracecmd_warning("can't set kptr_restrict");
+}
+
+static int read_proc_kallsyms(struct tracecmd_output *handle, bool compress)
+{
+ enum tracecmd_section_flags flags = 0;
+ unsigned int size, check_size, endian4;
+ const char *path = "/proc/kallsyms";
+ tsize_t offset;
+ struct stat st;
+ int ret;
+
+ if (!check_out_state(handle, TRACECMD_FILE_KALLSYMS)) {
+ tracecmd_warning("Cannot read kallsyms, unexpected state 0x%X",
+ handle->file_state);
+ return -1;
+ }
+
+ if (handle->kallsyms)
+ path = handle->kallsyms;
+
+ if (compress)
+ flags |= TRACECMD_SEC_FL_COMPRESS;
+ offset = out_write_section_header(handle, TRACECMD_OPTION_KALLSYMS,
+ "kallsyms", flags, true);
+ if (offset == (off64_t)-1)
+ return -1;
+
+ out_compression_start(handle, compress);
+ ret = stat(path, &st);
+ if (ret < 0) {
+ /* not found */
+ size = 0;
+ endian4 = convert_endian_4(handle, size);
+ ret = do_write_check(handle, &endian4, 4);
+ goto out;
+ }
+ size = get_size(path);
+ endian4 = convert_endian_4(handle, size);
+ ret = do_write_check(handle, &endian4, 4);
+ if (ret)
+ goto out;
+
+ set_proc_kptr_restrict(0);
+ check_size = copy_file(handle, path);
+ if (size != check_size) {
+ errno = EINVAL;
+ tracecmd_warning("error in size of file '%s'", path);
+ set_proc_kptr_restrict(1);
+ ret = -1;
+ goto out;
+ }
+ set_proc_kptr_restrict(1);
+
+ ret = out_compression_end(handle, compress);
+ if (ret)
+ goto out;
+
+ ret = out_update_section_header(handle, offset);
+out:
+ if (!ret)
+ handle->file_state = TRACECMD_FILE_KALLSYMS;
+ else
+ out_compression_reset(handle, compress);
+ return ret;
+}
+
+static int read_ftrace_printk(struct tracecmd_output *handle, bool compress)
+{
+ enum tracecmd_section_flags flags = 0;
+ unsigned int size, check_size, endian4;
+ tsize_t offset;
+ struct stat st;
+ char *path;
+ int ret;
+
+ if (!check_out_state(handle, TRACECMD_FILE_PRINTK)) {
+ tracecmd_warning("Cannot read printk, unexpected state 0x%X",
+ handle->file_state);
+ return -1;
+ }
+
+ path = get_tracing_file(handle, "printk_formats");
+ if (!path)
+ return -1;
+
+ if (compress)
+ flags |= TRACECMD_SEC_FL_COMPRESS;
+ offset = out_write_section_header(handle, TRACECMD_OPTION_PRINTK, "printk", flags, true);
+ if (offset == (off64_t)-1)
+ return -1;
+
+ out_compression_start(handle, compress);
+ ret = stat(path, &st);
+ if (ret < 0) {
+ /* not found */
+ size = 0;
+ endian4 = convert_endian_4(handle, size);
+ if (do_write_check(handle, &endian4, 4))
+ goto fail;
+ goto out;
+ }
+ size = get_size(path);
+ endian4 = convert_endian_4(handle, size);
+ if (do_write_check(handle, &endian4, 4))
+ goto fail;
+ check_size = copy_file(handle, path);
+ if (size != check_size) {
+ errno = EINVAL;
+ tracecmd_warning("error in size of file '%s'", path);
+ goto fail;
+ }
+
+ out:
+ put_tracing_file(path);
+ if (out_compression_end(handle, compress))
+ return -1;
+
+ if (out_update_section_header(handle, offset))
+ return -1;
+ handle->file_state = TRACECMD_FILE_PRINTK;
+ return 0;
+ fail:
+ put_tracing_file(path);
+ out_compression_reset(handle, compress);
+ return -1;
+}
+
+static int save_tracing_file_data(struct tracecmd_output *handle,
+ const char *filename)
+{
+ unsigned long long endian8;
+ char *file = NULL;
+ struct stat st;
+ off64_t check_size;
+ off64_t size;
+ int ret = -1;
+
+ file = get_tracing_file(handle, filename);
+ if (!file)
+ return -1;
+
+ ret = stat(file, &st);
+ if (ret >= 0) {
+ size = get_size(file);
+ endian8 = convert_endian_8(handle, size);
+ if (do_write_check(handle, &endian8, 8))
+ goto out_free;
+ check_size = copy_file(handle, file);
+ if (size != check_size) {
+ errno = EINVAL;
+ tracecmd_warning("error in size of file '%s'", file);
+ goto out_free;
+ }
+ } else {
+ size = 0;
+ endian8 = convert_endian_8(handle, size);
+ if (do_write_check(handle, &endian8, 8))
+ goto out_free;
+ }
+ ret = 0;
+
+out_free:
+ put_tracing_file(file);
+ return ret;
+}
+
+static int write_compression_header(struct tracecmd_output *handle)
+{
+ const char *name = NULL;
+ const char *ver = NULL;
+ int ret;
+
+ ret = tracecmd_compress_proto_get_name(handle->compress, &name, &ver);
+ if (ret < 0 || !name || !ver) {
+ name = "none";
+ ver = "";
+ }
+
+ if (do_write_check(handle, name, strlen(name) + 1))
+ return -1;
+
+ if (do_write_check(handle, ver, strlen(ver) + 1))
+ return -1;
+
+ return 0;
+}
+
+static int get_trace_page_size(struct tracecmd_output *handle, const char *name)
+{
+ struct tracefs_instance *instance;
+ struct tep_handle *tep = NULL;
+ int psize, size;
+ char *buff = NULL;
+
+ /* In case of an error, return user space page size */
+ psize = getpagesize();
+
+ instance = tracefs_instance_alloc(find_tracing_dir(handle), name);
+ if (!instance)
+ goto out;
+
+ buff = tracefs_instance_file_read(instance, "events/header_page", &size);
+ if (!buff)
+ goto out;
+
+ tep = tep_alloc();
+ if (!tep)
+ goto out;
+
+ if (tep_parse_header_page(tep, buff, size, sizeof(long long)))
+ goto out;
+
+ psize = tep_get_sub_buffer_size(tep);
+
+out:
+ tracefs_instance_free(instance);
+ tep_free(tep);
+ free(buff);
+
+ return psize;
+}
+
+/**
+ * tracecmd_output_create_fd - allocate new output handle to a trace file
+ * @fd: File descriptor for the handle to write to.
+ *
+ * Allocate a tracecmd_output descriptor and perform minimal initialization.
+ * @fd will be set as the file descriptor for the handle. Nothing is
+ * written in the file yet, and if @fd is -1, then all writes will be ignored.
+ *
+ * Returns a pointer to a newly allocated file descriptor for the use of creating
+ * a tracecmd data file. In case of an error, NULL is returned. The returned
+ * handle must be freed with tracecmd_output_close() or tracecmd_output_free()
+ */
+struct tracecmd_output *tracecmd_output_create_fd(int fd)
+{
+ struct tracecmd_output *handle;
+
+ handle = calloc(1, sizeof(*handle));
+ if (!handle)
+ return NULL;
+
+ handle->fd = fd;
+
+ handle->file_version = FILE_VERSION_DEFAULT;
+
+ handle->page_size = get_trace_page_size(handle, NULL);
+ handle->big_endian = tracecmd_host_bigendian();
+
+ list_head_init(&handle->options);
+ list_head_init(&handle->buffers);
+
+ handle->file_state = TRACECMD_FILE_ALLOCATED;
+
+ return handle;
+}
+
+/**
+ * tracecmd_output_set_msg - associated an output file handle with network message handle
+ * @handle: output handle to a trace file.
+ * @msg_handle: network handle, allocated by tracecmd_msg_handle_alloc()
+ *
+ * Associate an output file handle (@handle) to a network stream (@msg_handle).
+ * All subsequent calls to @handle will send data over the network using @msg_handle
+ * instead of writing to a file.
+ *
+ * This must be called after the handle file version is set and before calling
+ * tracecmd_output_write_headers().
+ *
+ * Returns 0 on success, or -1 if the output file handle is not allocated or not
+ * in the expected state.
+ */
+int tracecmd_output_set_msg(struct tracecmd_output *handle, struct tracecmd_msg_handle *msg_handle)
+{
+ if (!handle || handle->file_state != TRACECMD_FILE_ALLOCATED)
+ return -1;
+
+ handle->msg_handle = msg_handle;
+ /* Force messages to be cached in a temp file before sending through the socket */
+ if (handle->msg_handle && HAS_SECTIONS(handle))
+ tracecmd_msg_handle_cache(handle->msg_handle);
+
+ return 0;
+}
+
+/**
+ * tracecmd_output_set_trace_dir - Set a custom tracing dir, instead of system default
+ * @handle: output handle to a trace file.
+ * @tracing_dir: full path to a directory with tracing files
+ *
+ * Associate the output file handle (@handle) with a custom tracing directory
+ * (@tracing_dir), to be used when creating the trace file instead of using the
+ * system default tracig directory.
+ *
+ * Must be called before tracecmd_output_write_headers().
+ *
+ * Returns 0 on success, or -1 if the output file handle is not allocated or not
+ * in the expected state.
+ */
+int tracecmd_output_set_trace_dir(struct tracecmd_output *handle, const char *tracing_dir)
+{
+ if (!handle || handle->file_state != TRACECMD_FILE_ALLOCATED)
+ return -1;
+
+ free(handle->tracing_dir);
+ if (tracing_dir) {
+ handle->tracing_dir = strdup(tracing_dir);
+ if (!handle->tracing_dir)
+ return -1;
+ } else
+ handle->tracing_dir = NULL;
+
+ return 0;
+}
+
+/**
+ * tracecmd_output_set_kallsyms - Set a custom kernel symbols file
+ * @handle: output handle to a trace file.
+ * @tracing_dir: full path to a file with kernel symbols
+ *
+ * Have the output file handle (@handle) use a custom kernel symbols file instead
+ * of the default /proc/kallsyms.
+ *
+ * Must be called before tracecmd_output_write_headers().
+ *
+ * Returns 0 on success, or -1 if the output file handle is not allocated or
+ * not in the expected state.
+ */
+int tracecmd_output_set_kallsyms(struct tracecmd_output *handle, const char *kallsyms)
+{
+ if (!handle || handle->file_state != TRACECMD_FILE_ALLOCATED)
+ return -1;
+
+ free(handle->kallsyms);
+ if (kallsyms) {
+ handle->kallsyms = strdup(kallsyms);
+ if (!handle->kallsyms)
+ return -1;
+ } else
+ handle->kallsyms = NULL;
+
+ return 0;
+}
+
+/**
+ * tracecmd_output_set_from_input - Inherit parameters from an existing trace file
+ * @handle: output handle to a trace file.
+ * @ihandle: input handle to an existing trace file.
+ *
+ * Have the output file handle (@handle) inherit the properties of a given
+ * input file handle (@ihandle).
+ *
+ * The parameters that are copied are:
+ * - tep handle
+ * - page size
+ * - file endian
+ * - file version
+ * - file compression protocol
+ *
+ * Must be called before tracecmd_output_write_headers().
+ *
+ * Returns 0 on success, or -1 if the output file handle is not allocated or
+ * not in expected state.
+ */
+int tracecmd_output_set_from_input(struct tracecmd_output *handle, struct tracecmd_input *ihandle)
+{
+ const char *cname = NULL;
+ const char *cver = NULL;
+
+ if (!handle || !ihandle || handle->file_state != TRACECMD_FILE_ALLOCATED)
+ return -1;
+
+ /* get endian, page size, file version and compression */
+ /* Use the pevent of the ihandle for later writes */
+ handle->pevent = tracecmd_get_tep(ihandle);
+ tep_ref(handle->pevent);
+ handle->page_size = tracecmd_page_size(ihandle);
+ handle->file_version = tracecmd_get_in_file_version(ihandle);
+ handle->big_endian = tep_is_file_bigendian(handle->pevent);
+
+ if (!tracecmd_get_file_compress_proto(ihandle, &cname, &cver)) {
+ handle->compress = tracecmd_compress_alloc(cname, cver, handle->fd,
+ handle->pevent, handle->msg_handle);
+ if (!handle->compress)
+ return -1;
+
+ if (handle->file_version < FILE_VERSION_COMPRESSION)
+ handle->file_version = FILE_VERSION_COMPRESSION;
+ }
+
+ return 0;
+}
+
+/**
+ * tracecmd_output_set_version - Set file version of the output handle
+ * @handle: output handle to a trace file.
+ * @file_version: desired file version
+ *
+ * This API must be called before tracecmd_output_write_headers().
+ *
+ * Returns 0 on success, or -1 if the output file handle is not allocated or not in expected state.
+ */
+int tracecmd_output_set_version(struct tracecmd_output *handle, int file_version)
+{
+ if (!handle || handle->file_state != TRACECMD_FILE_ALLOCATED)
+ return -1;
+ if (file_version < FILE_VERSION_MIN || file_version > FILE_VERSION_MAX)
+ return -1;
+ handle->file_version = file_version;
+ if (handle->file_version < FILE_VERSION_COMPRESSION)
+ handle->compress = NULL;
+ return 0;
+}
+
+/**
+ * tracecmd_output_set_compression - Set file compression algorithm of the output handle
+ * @handle: output handle to a trace file.
+ * @compression: name of the desired compression algorithm. Can be one of:
+ * - "none" - do not use compression
+ * - "all" - use the best available compression algorithm
+ * - or specific name of the desired compression algorithm
+ *
+ * This API must be called before tracecmd_output_write_headers().
+ *
+ * Returns 0 on success, or -1 in case of an error:
+ * - the output file handle is not allocated or not in expected state.
+ * - the specified compression algorithm is not available
+ */
+int tracecmd_output_set_compression(struct tracecmd_output *handle, const char *compression)
+{
+ if (!handle || handle->file_state != TRACECMD_FILE_ALLOCATED)
+ return -1;
+
+ handle->compress = NULL;
+ if (compression && strcmp(compression, "none")) {
+ if (!strcmp(compression, "any")) {
+ handle->compress = tracecmd_compress_alloc(NULL, NULL, handle->fd,
+ handle->pevent,
+ handle->msg_handle);
+ if (!handle->compress)
+ tracecmd_warning("No compression algorithms are supported");
+ } else {
+ handle->compress = tracecmd_compress_alloc(compression, NULL, handle->fd,
+ handle->pevent,
+ handle->msg_handle);
+ if (!handle->compress) {
+ tracecmd_warning("Compression algorithm %s is not supported",
+ compression);
+ return -1;
+ }
+ }
+ }
+ if (handle->compress && handle->file_version < FILE_VERSION_COMPRESSION) {
+ handle->file_version = FILE_VERSION_COMPRESSION;
+ if (handle->msg_handle)
+ tracecmd_msg_handle_cache(handle->msg_handle);
+ }
+
+ return 0;
+}
+
+/**
+ * output_write_init - Write the initial data into the trace file
+ * @handle: output handle to a trace file.
+ *
+ * Must be called after tracecmd_output_set_*() functions and before writing
+ * anything else.
+ *
+ * The initial information to be written into the file:
+ * - initial file magic bytes
+ * - file version
+ * - data endian
+ * - long size
+ * - page size
+ * - compression header
+ *
+ * Returns 0 on success, or -1 if the output file handle is not allocated or
+ * not in the expected state.
+ */
+static int output_write_init(struct tracecmd_output *handle)
+{
+ unsigned long long offset;
+ char buf[BUFSIZ];
+ int endian4;
+
+ if (!handle || handle->file_state != TRACECMD_FILE_ALLOCATED)
+ return -1;
+
+ buf[0] = 23;
+ buf[1] = 8;
+ buf[2] = 68;
+ memcpy(buf + 3, "tracing", 7);
+
+ if (do_write_check(handle, buf, 10))
+ return -1;
+
+ sprintf(buf, "%lu", handle->file_version);
+ if (do_write_check(handle, buf, strlen(buf) + 1))
+ return -1;
+
+ if (handle->big_endian)
+ buf[0] = 1;
+ else
+ buf[0] = 0;
+ if (do_write_check(handle, buf, 1))
+ return -1;
+
+ /* save size of long (this may not be what the kernel is) */
+ buf[0] = sizeof(long);
+ if (do_write_check(handle, buf, 1))
+ return -1;
+
+ endian4 = convert_endian_4(handle, handle->page_size);
+ if (do_write_check(handle, &endian4, 4))
+ return -1;
+
+ if (handle->file_version >= FILE_VERSION_COMPRESSION) {
+ if (write_compression_header(handle))
+ return -1;
+ }
+
+ if (HAS_SECTIONS(handle)) {
+ /* Write 0 as options offset and save its location */
+ offset = 0;
+ handle->options_start = do_lseek(handle, 0, SEEK_CUR);
+ if (do_write_check(handle, &offset, 8))
+ return -1;
+ }
+
+ handle->file_state = TRACECMD_FILE_INIT;
+ return 0;
+}
+
+/**
+ * tracecmd_output_write_headers - Write the trace file headers
+ * @handle: output handle to a trace file.
+ * @list: desired events that will be included in the trace file.
+ * It can be NULL for all available events
+ *
+ * These headers are written in the file:
+ * - header files from the tracing directory
+ * - ftrace events from the tracing directory
+ * - event file from the tracing directory - all or only the one from @list
+ * - kernel symbols from the tracing directory
+ * - kernel printk strings from the tracing directory
+ *
+ * Returns 0 on success, or -1 in case of an error.
+ */
+int tracecmd_output_write_headers(struct tracecmd_output *handle,
+ struct tracecmd_event_list *list)
+{
+ bool compress = false;
+
+ if (!handle || handle->file_state < TRACECMD_FILE_ALLOCATED)
+ return -1;
+
+ /* Write init data, if not written yet */
+ if (handle->file_state < TRACECMD_FILE_INIT && output_write_init(handle))
+ return -1;
+ if (handle->compress)
+ compress = true;
+ if (read_header_files(handle, compress))
+ return -1;
+ if (read_ftrace_files(handle, compress))
+ return -1;
+ if (read_event_files(handle, list, compress))
+ return -1;
+ if (read_proc_kallsyms(handle, compress))
+ return -1;
+ if (read_ftrace_printk(handle, compress))
+ return -1;
+ return 0;
+}
+
+/**
+ * tracecmd_add_option_v - add options to the file
+ * @handle: the output file handle name
+ * @id: the id of the option
+ * @size: the size of the option data
+ * @data: the data to write to the file
+ * @vector: array of vectors, pointing to the data to write in the file
+ * @count: number of items in the vector array
+ *
+ *
+ * Returns handle to update option if needed.
+ * Just the content can be updated, with smaller or equal to
+ * content than the specified size.
+ */
+struct tracecmd_option *
+tracecmd_add_option_v(struct tracecmd_output *handle,
+ unsigned short id, const struct iovec *vector, int count)
+
+{
+ struct tracecmd_option *option;
+ char *data = NULL;
+ int i, size = 0;
+
+ /*
+ * We can only add options before tracing data were written.
+ * This may change in the future.
+ */
+ if (!HAS_SECTIONS(handle) && handle->file_state > TRACECMD_FILE_OPTIONS)
+ return NULL;
+
+ for (i = 0; i < count; i++)
+ size += vector[i].iov_len;
+ /* Some IDs (like TRACECMD_OPTION_TRACECLOCK) pass vector with 0 / NULL data */
+ if (size) {
+ data = malloc(size);
+ if (!data) {
+ tracecmd_warning("Insufficient memory");
+ return NULL;
+ }
+ }
+ option = calloc(1, sizeof(*option));
+ if (!option) {
+ tracecmd_warning("Could not allocate space for option");
+ free(data);
+ return NULL;
+ }
+
+ handle->nr_options++;
+ option->data = data;
+ for (i = 0; i < count; i++) {
+ if (vector[i].iov_base && vector[i].iov_len) {
+ memcpy(data, vector[i].iov_base, vector[i].iov_len);
+ data += vector[i].iov_len;
+ }
+ }
+
+ option->size = size;
+ option->id = id;
+
+ list_add_tail(&option->list, &handle->options);
+
+ return option;
+}
+
+/**
+ * tracecmd_add_option - add options to the file
+ * @handle: the output file handle name
+ * @id: the id of the option
+ * @size: the size of the option data
+ * @data: the data to write to the file
+ *
+ * Returns handle to update option if needed
+ * Just the content can be updated, with smaller or equal to
+ * content than the specified size
+ */
+struct tracecmd_option *
+tracecmd_add_option(struct tracecmd_output *handle,
+ unsigned short id, int size, const void *data)
+{
+ struct iovec vect;
+
+ vect.iov_base = (void *) data;
+ vect.iov_len = size;
+ return tracecmd_add_option_v(handle, id, &vect, 1);
+}
+
+int tracecmd_write_cpus(struct tracecmd_output *handle, int cpus)
+{
+ int ret;
+
+ if (!check_out_state(handle, TRACECMD_FILE_CPU_COUNT)) {
+ tracecmd_warning("Cannot write CPU count into the file, unexpected state 0x%X",
+ handle->file_state);
+ return -1;
+ }
+
+ if (!HAS_SECTIONS(handle)) {
+ cpus = convert_endian_4(handle, cpus);
+ ret = do_write_check(handle, &cpus, 4);
+ if (ret < 0)
+ return ret;
+ } else {
+ tracecmd_add_option(handle, TRACECMD_OPTION_CPUCOUNT, sizeof(int), &cpus);
+ }
+
+ handle->file_state = TRACECMD_FILE_CPU_COUNT;
+ return 0;
+}
+
+static int write_options_v6(struct tracecmd_output *handle)
+{
+ struct tracecmd_option *options;
+ unsigned short option;
+ unsigned short endian2;
+ unsigned int endian4;
+
+ /* If already written, ignore */
+ if (handle->file_state == TRACECMD_FILE_OPTIONS)
+ return 0;
+ if (!check_out_state(handle, TRACECMD_FILE_OPTIONS)) {
+ tracecmd_warning("Cannot write options into the file, unexpected state 0x%X",
+ handle->file_state);
+ return -1;
+ }
+
+ if (do_write_check(handle, "options ", 10))
+ return -1;
+ handle->options_start = do_lseek(handle, 0, SEEK_CUR);
+ list_for_each_entry(options, &handle->options, list) {
+ endian2 = convert_endian_2(handle, options->id);
+ if (do_write_check(handle, &endian2, 2))
+ return -1;
+
+ endian4 = convert_endian_4(handle, options->size);
+ if (do_write_check(handle, &endian4, 4))
+ return -1;
+
+ /* Save the data location in case it needs to be updated */
+ options->offset = do_lseek(handle, 0, SEEK_CUR);
+
+ if (do_write_check(handle, options->data,
+ options->size))
+ return -1;
+ }
+
+ option = TRACECMD_OPTION_DONE;
+
+ if (do_write_check(handle, &option, 2))
+ return -1;
+
+ handle->file_state = TRACECMD_FILE_OPTIONS;
+ return 0;
+}
+
+static int write_options(struct tracecmd_output *handle)
+{
+ struct tracecmd_option *options;
+ unsigned long long endian8;
+ unsigned short endian2;
+ unsigned int endian4;
+ bool new = false;
+ tsize_t offset;
+
+ /* Check if there are unsaved options */
+ list_for_each_entry(options, &handle->options, list) {
+ if (!options->offset) {
+ new = true;
+ break;
+ }
+ }
+ if (!new)
+ return 0;
+ offset = do_lseek(handle, 0, SEEK_CUR);
+
+ /* Append to the previous options section, if any */
+ if (handle->options_start) {
+ if (do_lseek(handle, handle->options_start, SEEK_SET) == (off64_t)-1)
+ return -1;
+ endian8 = convert_endian_8(handle, offset);
+ if (do_write_check(handle, &endian8, 8))
+ return -1;
+ if (do_lseek(handle, offset, SEEK_SET) == (off_t)-1)
+ return -1;
+ }
+
+ offset = out_write_section_header(handle, TRACECMD_OPTION_DONE, "options", 0, false);
+ if (offset == (off_t)-1)
+ return -1;
+
+ list_for_each_entry(options, &handle->options, list) {
+ /* Option is already saved, skip it */
+ if (options->offset)
+ continue;
+ endian2 = convert_endian_2(handle, options->id);
+ if (do_write_check(handle, &endian2, 2))
+ return -1;
+ endian4 = convert_endian_4(handle, options->size);
+ if (do_write_check(handle, &endian4, 4))
+ return -1;
+ /* Save the data location */
+ options->offset = do_lseek(handle, 0, SEEK_CUR);
+ if (do_write_check(handle, options->data, options->size))
+ return -1;
+ }
+
+ endian2 = convert_endian_2(handle, TRACECMD_OPTION_DONE);
+ if (do_write_check(handle, &endian2, 2))
+ return -1;
+ endian4 = convert_endian_4(handle, 8);
+ if (do_write_check(handle, &endian4, 4))
+ return -1;
+ endian8 = 0;
+ handle->options_start = do_lseek(handle, 0, SEEK_CUR);
+ if (do_write_check(handle, &endian8, 8))
+ return -1;
+ if (out_update_section_header(handle, offset))
+ return -1;
+
+ return 0;
+}
+
+int tracecmd_write_meta_strings(struct tracecmd_output *handle)
+{
+ if (!HAS_SECTIONS(handle))
+ return 0;
+
+ return save_string_section(handle, true);
+}
+
+int tracecmd_write_options(struct tracecmd_output *handle)
+{
+ if (!HAS_SECTIONS(handle))
+ return write_options_v6(handle);
+ return write_options(handle);
+}
+
+static int append_options_v6(struct tracecmd_output *handle)
+{
+ struct tracecmd_option *options;
+ unsigned short option;
+ unsigned short endian2;
+ unsigned int endian4;
+ off_t offset;
+ int r;
+
+ /*
+ * We can append only if options are already written and tracing data
+ * is not yet written
+ */
+ if (handle->file_state != TRACECMD_FILE_OPTIONS)
+ return -1;
+
+ if (do_lseek(handle, 0, SEEK_END) == (off_t)-1)
+ return -1;
+ offset = do_lseek(handle, -2, SEEK_CUR);
+ if (offset == (off_t)-1)
+ return -1;
+
+ r = do_preed(handle, &option, 2, offset);
+ if (r != 2 || option != TRACECMD_OPTION_DONE)
+ return -1;
+
+ list_for_each_entry(options, &handle->options, list) {
+ endian2 = convert_endian_2(handle, options->id);
+ if (do_write_check(handle, &endian2, 2))
+ return -1;
+
+ endian4 = convert_endian_4(handle, options->size);
+ if (do_write_check(handle, &endian4, 4))
+ return -1;
+
+ /* Save the data location in case it needs to be updated */
+ options->offset = do_lseek(handle, 0, SEEK_CUR);
+
+ if (do_write_check(handle, options->data,
+ options->size))
+ return -1;
+ }
+
+ option = TRACECMD_OPTION_DONE;
+
+ if (do_write_check(handle, &option, 2))
+ return -1;
+
+ return 0;
+}
+
+int tracecmd_append_options(struct tracecmd_output *handle)
+{
+ if (!HAS_SECTIONS(handle))
+ return append_options_v6(handle);
+ return write_options(handle);
+}
+
+static struct tracecmd_option *
+add_buffer_option_v6(struct tracecmd_output *handle, const char *name, int cpus)
+{
+ struct tracecmd_option *option;
+ char *buf;
+ int size = 8 + strlen(name) + 1;
+
+ buf = calloc(1, size);
+ if (!buf) {
+ tracecmd_warning("Failed to malloc buffer");
+ return NULL;
+ }
+ *(tsize_t *)buf = 0;
+ strcpy(buf + 8, name);
+
+ option = tracecmd_add_option(handle, TRACECMD_OPTION_BUFFER, size, buf);
+ free(buf);
+
+ /*
+ * In case a buffer instance has different number of CPUs as the
+ * local machine.
+ */
+ if (cpus)
+ tracecmd_add_option(handle, TRACECMD_OPTION_CPUCOUNT,
+ sizeof(int), &cpus);
+
+ return option;
+}
+
+int tracecmd_add_buffer_info(struct tracecmd_output *handle, const char *name, int cpus)
+{
+ struct tracecmd_buffer *buf;
+
+ buf = calloc(1, sizeof(struct tracecmd_buffer));
+ if (!buf)
+ return -1;
+ buf->name = strdup(name);
+ buf->cpus = cpus;
+ if (!buf->name) {
+ free(buf);
+ return -1;
+ }
+ list_add_tail(&buf->list, &handle->buffers);
+ return 0;
+}
+
+int tracecmd_write_buffer_info(struct tracecmd_output *handle)
+{
+ struct tracecmd_option *option;
+ struct tracecmd_buffer *buf;
+
+ if (HAS_SECTIONS(handle))
+ return 0;
+
+ list_for_each_entry(buf, &handle->buffers, list) {
+ option = add_buffer_option_v6(handle, buf->name, buf->cpus);
+ if (!option)
+ return -1;
+ buf->option = option;
+ }
+
+ return 0;
+}
+
+static tsize_t get_buffer_file_offset(struct tracecmd_output *handle, const char *name)
+{
+ struct tracecmd_buffer *buf;
+
+ list_for_each_entry(buf, &handle->buffers, list) {
+ if (!strcmp(name, buf->name)) {
+ if (!buf->option)
+ break;
+ return buf->option->offset;
+ }
+ }
+ return 0;
+}
+
+int tracecmd_write_cmdlines(struct tracecmd_output *handle)
+{
+ enum tracecmd_section_flags flags = 0;
+ bool compress = false;
+ tsize_t offset;
+ int ret;
+
+ if (!check_out_state(handle, TRACECMD_FILE_CMD_LINES)) {
+ tracecmd_warning("Cannot write command lines into the file, unexpected state 0x%X",
+ handle->file_state);
+ return -1;
+ }
+
+ if (handle->compress)
+ compress = true;
+
+ if (compress)
+ flags |= TRACECMD_SEC_FL_COMPRESS;
+ offset = out_write_section_header(handle, TRACECMD_OPTION_CMDLINES,
+ "command lines", flags, true);
+ if (offset == (off64_t)-1)
+ return -1;
+
+ out_compression_start(handle, compress);
+
+ ret = save_tracing_file_data(handle, "saved_cmdlines");
+ if (ret < 0) {
+ out_compression_reset(handle, compress);
+ return ret;
+ }
+
+ if (out_compression_end(handle, compress))
+ return -1;
+
+ if (out_update_section_header(handle, offset))
+ return -1;
+
+ handle->file_state = TRACECMD_FILE_CMD_LINES;
+ return 0;
+}
+
+static char *get_clock(struct tracecmd_output *handle)
+{
+ struct tracefs_instance *inst;
+
+ if (handle->trace_clock)
+ return handle->trace_clock;
+
+ /*
+ * If no clock is set on this handle, get the trace clock of
+ * the top instance in the handle's tracing dir
+ */
+ if (!handle->tracing_dir) {
+ handle->trace_clock = tracefs_get_clock(NULL);
+ return handle->trace_clock;
+ }
+
+ inst = tracefs_instance_alloc(handle->tracing_dir, NULL);
+ if (!inst)
+ return NULL;
+ handle->trace_clock = tracefs_get_clock(inst);
+ tracefs_instance_free(inst);
+ return handle->trace_clock;
+}
+
+__hidden struct tracecmd_option *
+out_add_buffer_option(struct tracecmd_output *handle, const char *name,
+ unsigned short id, unsigned long long data_offset,
+ int cpus, struct data_file_write *cpu_data, int page_size)
+{
+ struct tracecmd_option *option;
+ int i, j = 0, k = 0;
+ int *cpu_ids = NULL;
+ struct iovec *vect;
+ char *clock;
+
+ if (!HAS_SECTIONS(handle))
+ return NULL;
+
+ clock = get_clock(handle);
+ if (!clock) {
+ tracecmd_warning("Could not find clock, set to 'local'");
+ clock = "local";
+ }
+
+ /*
+ * Buffer flyrecord option:
+ * - trace data offset in the file
+ * - buffer name
+ * - buffer clock
+ * - page size
+ * - CPU count
+ * - for each CPU:
+ * - CPU id
+ * - CPU trace data offset in the file
+ * - CPU trace data size
+ */
+
+ /*
+ * Buffer latency option:
+ * - trace data offset in the file
+ * - buffer name
+ * - buffer clock
+ */
+
+ /*
+ * 5 : offset, name, clock, page size, count
+ * 3 : cpu offset, name, clock
+ */
+ vect = calloc(5 + (cpus * 3), sizeof(struct iovec));
+ if (!vect)
+ return NULL;
+ if (cpus) {
+ cpu_ids = calloc(cpus, sizeof(int));
+ if (!cpu_ids) {
+ free(vect);
+ return NULL;
+ }
+ }
+ vect[j].iov_base = (void *) &data_offset;
+ vect[j++].iov_len = 8;
+ vect[j].iov_base = (void *) name;
+ vect[j++].iov_len = strlen(name) + 1;
+ vect[j].iov_base = (void *) clock;
+ vect[j++].iov_len = strlen(clock) + 1;
+ if (id == TRACECMD_OPTION_BUFFER) {
+ vect[j].iov_base = &page_size;
+ vect[j++].iov_len = 4;
+ vect[j].iov_base = (void *) &k;
+ vect[j++].iov_len = 4;
+ for (i = 0; i < cpus; i++) {
+ if (!cpu_data[i].file_size)
+ continue;
+ cpu_ids[i] = i;
+ vect[j].iov_base = &cpu_ids[i];
+ vect[j++].iov_len = 4;
+ vect[j].iov_base = &cpu_data[i].data_offset;
+ vect[j++].iov_len = 8;
+ vect[j].iov_base = &cpu_data[i].write_size;
+ vect[j++].iov_len = 8;
+ k++;
+ }
+ }
+
+ option = tracecmd_add_option_v(handle, id, vect, j);
+ free(vect);
+ free(cpu_ids);
+
+ return option;
+}
+
+struct tracecmd_output *tracecmd_create_file_latency(const char *output_file, int cpus,
+ int file_version, const char *compression)
+{
+ enum tracecmd_section_flags flags = 0;
+ struct tracecmd_output *handle;
+ tsize_t offset;
+ char *path;
+
+ handle = tracecmd_output_create(output_file);
+ if (!handle)
+ return NULL;
+
+ if (file_version && tracecmd_output_set_version(handle, file_version))
+ goto out_free;
+
+ if (compression) {
+ if (tracecmd_output_set_compression(handle, compression))
+ goto out_free;
+ } else if (file_version >= FILE_VERSION_COMPRESSION) {
+ tracecmd_output_set_compression(handle, "any");
+ }
+
+ if (tracecmd_output_write_headers(handle, NULL))
+ goto out_free;
+ /*
+ * Save the command lines;
+ */
+ if (tracecmd_write_cmdlines(handle) < 0)
+ goto out_free;
+
+ if (tracecmd_write_cpus(handle, cpus) < 0)
+ goto out_free;
+ if (tracecmd_write_buffer_info(handle) < 0)
+ goto out_free;
+ if (tracecmd_write_options(handle) < 0)
+ goto out_free;
+
+ if (!check_out_state(handle, TRACECMD_FILE_CPU_LATENCY)) {
+ tracecmd_warning("Cannot write latency data into the file, unexpected state 0x%X",
+ handle->file_state);
+ goto out_free;
+ }
+
+ if (!HAS_SECTIONS(handle) && do_write_check(handle, "latency ", 10))
+ goto out_free;
+
+ path = get_tracing_file(handle, "trace");
+ if (!path)
+ goto out_free;
+
+ offset = do_lseek(handle, 0, SEEK_CUR);
+ if (HAS_SECTIONS(handle) &&
+ !out_add_buffer_option(handle, "", TRACECMD_OPTION_BUFFER_TEXT,
+ offset, 0, NULL, getpagesize()))
+ goto out_free;
+ if (handle->compress)
+ flags |= TRACECMD_SEC_FL_COMPRESS;
+
+ offset = out_write_section_header(handle, TRACECMD_OPTION_BUFFER_TEXT,
+ "buffer latency", flags, false);
+
+ copy_file_compress(handle, path, NULL);
+ if (out_update_section_header(handle, offset))
+ goto out_free;
+
+ put_tracing_file(path);
+
+ handle->file_state = TRACECMD_FILE_CPU_LATENCY;
+
+ if (HAS_SECTIONS(handle))
+ tracecmd_write_options(handle);
+
+ return handle;
+
+out_free:
+ tracecmd_output_close(handle);
+ return NULL;
+}
+
+static int save_clock(struct tracecmd_output *handle, char *clock)
+{
+ unsigned long long endian8;
+ char *str = NULL;
+ int ret;
+
+ ret = asprintf(&str, "[%s]", clock);
+ if (ret < 0)
+ return -1;
+
+ endian8 = convert_endian_8(handle, strlen(str));
+ ret = do_write_check(handle, &endian8, 8);
+ if (ret)
+ goto out;
+ ret = do_write_check(handle, str, strlen(str));
+
+out:
+ free(str);
+ return ret;
+}
+
+static int update_buffer_cpu_offset_v6(struct tracecmd_output *handle,
+ const char *name, tsize_t offset)
+{
+ tsize_t b_offset;
+ tsize_t current;
+
+ if (!name)
+ name = "";
+
+ b_offset = get_buffer_file_offset(handle, name);
+ if (!b_offset) {
+ tracecmd_warning("Cannot find description for buffer %s", name);
+ return -1;
+ }
+
+ current = do_lseek(handle, 0, SEEK_CUR);
+
+ /* Go to the option data, where will write the offest */
+ if (do_lseek(handle, b_offset, SEEK_SET) == (off64_t)-1) {
+ tracecmd_warning("could not seek to %lld", b_offset);
+ return -1;
+ }
+
+ if (do_write_check(handle, &offset, 8))
+ return -1;
+
+ /* Go back to end of file */
+ if (do_lseek(handle, current, SEEK_SET) == (off64_t)-1) {
+ tracecmd_warning("could not seek to %lld", offset);
+ return -1;
+ }
+ return 0;
+}
+
+__hidden int out_write_emty_cpu_data(struct tracecmd_output *handle, int cpus)
+{
+ unsigned long long zero = 0;
+ char *clock;
+ int ret;
+ int i;
+
+ if (HAS_SECTIONS(handle))
+ return 0;
+
+ ret = handle->file_state == TRACECMD_FILE_CPU_FLYRECORD ? 0 :
+ check_file_state(handle->file_version,
+ handle->file_state,
+ TRACECMD_FILE_CPU_FLYRECORD);
+ if (ret < 0) {
+ tracecmd_warning("Cannot write trace data into the file, unexpected state 0x%X",
+ handle->file_state);
+ return ret;
+ }
+
+ if (do_write_check(handle, "flyrecord", 10))
+ return -1;
+
+ for (i = 0; i < cpus; i++) {
+ /* Write 0 for trace data offset and size */
+ if (do_write_check(handle, &zero, 8))
+ return -1;
+
+ if (do_write_check(handle, &zero, 8))
+ return -1;
+ }
+ clock = get_clock(handle);
+ if (clock && save_clock(handle, clock))
+ return -1;
+
+ handle->file_state = TRACECMD_FILE_CPU_FLYRECORD;
+ return 0;
+}
+
+__hidden int out_write_cpu_data(struct tracecmd_output *handle,
+ int cpus, struct cpu_data_source *data, const char *buff_name)
+{
+ struct data_file_write *data_files = NULL;
+ enum tracecmd_section_flags flags = 0;
+ tsize_t data_offs, offset;
+ unsigned long long endian8;
+ unsigned long long read_size;
+ int page_size;
+ char *clock;
+ char *str;
+ int ret;
+ int i;
+
+ /* This can be called multiple times (when recording instances) */
+ ret = handle->file_state == TRACECMD_FILE_CPU_FLYRECORD ? 0 :
+ check_file_state(handle->file_version,
+ handle->file_state,
+ TRACECMD_FILE_CPU_FLYRECORD);
+ if (ret < 0) {
+ tracecmd_warning("Cannot write trace data into the file, unexpected state 0x%X",
+ handle->file_state);
+ goto out_free;
+ }
+
+ if (*buff_name == '\0')
+ page_size = handle->page_size;
+ else
+ page_size = get_trace_page_size(handle, buff_name);
+
+ data_offs = do_lseek(handle, 0, SEEK_CUR);
+ if (!HAS_SECTIONS(handle) && do_write_check(handle, "flyrecord", 10))
+ goto out_free;
+
+ if (handle->compress)
+ flags |= TRACECMD_SEC_FL_COMPRESS;
+ if (asprintf(&str, "buffer flyrecord %s", buff_name) < 1)
+ goto out_free;
+ offset = out_write_section_header(handle, TRACECMD_OPTION_BUFFER, str, flags, false);
+ free(str);
+ if (offset == (off_t)-1)
+ goto out_free;
+
+ data_files = calloc(cpus, sizeof(*data_files));
+ if (!data_files)
+ goto out_free;
+
+ for (i = 0; i < cpus; i++) {
+ data_files[i].file_size = data[i].size;
+ /*
+ * Place 0 for the data offset and size, and save the offsets to
+ * updated them with the correct data later.
+ */
+ if (!HAS_SECTIONS(handle)) {
+ endian8 = 0;
+ data_files[i].file_data_offset = do_lseek(handle, 0, SEEK_CUR);
+ if (do_write_check(handle, &endian8, 8))
+ goto out_free;
+ data_files[i].file_write_size = do_lseek(handle, 0, SEEK_CUR);
+ if (do_write_check(handle, &endian8, 8))
+ goto out_free;
+ }
+ }
+
+ if (!HAS_SECTIONS(handle)) {
+ update_buffer_cpu_offset_v6(handle, buff_name, data_offs);
+ clock = get_clock(handle);
+ if (clock && save_clock(handle, clock))
+ goto out_free;
+ }
+
+ for (i = 0; i < cpus; i++) {
+ data_files[i].data_offset = do_lseek(handle, 0, SEEK_CUR);
+ /* Page align offset */
+ data_files[i].data_offset += page_size - 1;
+ data_files[i].data_offset &= ~(page_size - 1);
+
+ ret = do_lseek(handle, data_files[i].data_offset, SEEK_SET);
+ if (ret == (off64_t)-1)
+ goto out_free;
+
+ if (!tracecmd_get_quiet(handle))
+ fprintf(stderr, "CPU%d data recorded at offset=0x%llx\n",
+ i, (unsigned long long)data_files[i].data_offset);
+
+ if (data[i].size) {
+ if (lseek64(data[i].fd, data[i].offset, SEEK_SET) == (off64_t)-1)
+ goto out_free;
+ read_size = out_copy_fd_compress(handle, data[i].fd,
+ data[i].size, &data_files[i].write_size,
+ page_size);
+
+ if (read_size != data_files[i].file_size) {
+ errno = EINVAL;
+ tracecmd_warning("did not match size of %lld to %lld",
+ read_size, data_files[i].file_size);
+ goto out_free;
+ }
+ } else {
+ data_files[i].write_size = 0;
+ }
+
+ if (!HAS_SECTIONS(handle)) {
+ /* Write the real CPU data offset in the file */
+ if (do_lseek(handle, data_files[i].file_data_offset, SEEK_SET) == (off64_t)-1)
+ goto out_free;
+ endian8 = convert_endian_8(handle, data_files[i].data_offset);
+ if (do_write_check(handle, &endian8, 8))
+ goto out_free;
+ /* Write the real CPU data size in the file */
+ if (do_lseek(handle, data_files[i].file_write_size, SEEK_SET) == (off64_t)-1)
+ goto out_free;
+ endian8 = convert_endian_8(handle, data_files[i].write_size);
+ if (do_write_check(handle, &endian8, 8))
+ goto out_free;
+ offset = data_files[i].data_offset + data_files[i].write_size;
+ if (do_lseek(handle, offset, SEEK_SET) == (off64_t)-1)
+ goto out_free;
+ }
+ if (!tracecmd_get_quiet(handle)) {
+ fprintf(stderr, " %llu bytes in size",
+ (unsigned long long)data_files[i].write_size);
+ if (flags & TRACECMD_SEC_FL_COMPRESS)
+ fprintf(stderr, " (%llu uncompressed)",
+ (unsigned long long)data_files[i].file_size);
+ fprintf(stderr, "\n");
+ }
+ }
+
+ if (HAS_SECTIONS(handle) &&
+ !out_add_buffer_option(handle, buff_name, TRACECMD_OPTION_BUFFER,
+ data_offs, cpus, data_files, page_size))
+ goto out_free;
+
+ free(data_files);
+ if (do_lseek(handle, 0, SEEK_END) == (off64_t)-1)
+ return -1;
+
+ if (out_update_section_header(handle, offset))
+ goto out_free;
+
+ handle->file_state = TRACECMD_FILE_CPU_FLYRECORD;
+
+ if (HAS_SECTIONS(handle))
+ tracecmd_write_options(handle);
+
+ return 0;
+
+ out_free:
+ do_lseek(handle, 0, SEEK_END);
+ free(data_files);
+ return -1;
+}
+
+int tracecmd_write_cpu_data(struct tracecmd_output *handle,
+ int cpus, char * const *cpu_data_files, const char *buff_name)
+{
+ struct cpu_data_source *data;
+ struct stat st;
+ int size = 0;
+ int ret;
+ int i;
+
+ if (!buff_name)
+ buff_name = "";
+
+ data = calloc(cpus, sizeof(struct cpu_data_source));
+ if (!data)
+ return -1;
+
+ for (i = 0; i < cpus; i++) {
+ ret = stat(cpu_data_files[i], &st);
+ if (ret < 0) {
+ tracecmd_warning("can not stat '%s'", cpu_data_files[i]);
+ break;
+ }
+ data[i].fd = open(cpu_data_files[i], O_RDONLY);
+ if (data[i].fd < 0) {
+ tracecmd_warning("Can't read '%s'", data[i].fd);
+ break;
+ }
+
+ data[i].size = st.st_size;
+ data[i].offset = 0;
+ size += st.st_size;
+ }
+
+ if (i < cpus)
+ ret = -1;
+ else
+ ret = out_write_cpu_data(handle, cpus, data, buff_name);
+
+ for (i--; i >= 0; i--)
+ close(data[i].fd);
+
+ free(data);
+ return ret;
+}
+
+int tracecmd_append_cpu_data(struct tracecmd_output *handle,
+ int cpus, char * const *cpu_data_files)
+{
+ int ret;
+
+ ret = tracecmd_write_cpus(handle, cpus);
+ if (ret)
+ return ret;
+ ret = tracecmd_write_buffer_info(handle);
+ if (ret)
+ return ret;
+ ret = tracecmd_write_options(handle);
+ if (ret)
+ return ret;
+
+ return tracecmd_write_cpu_data(handle, cpus, cpu_data_files, NULL);
+}
+
+int tracecmd_append_buffer_cpu_data(struct tracecmd_output *handle,
+ const char *name, int cpus, char * const *cpu_data_files)
+{
+ return tracecmd_write_cpu_data(handle, cpus, cpu_data_files, name);
+}
+
+struct tracecmd_output *tracecmd_get_output_handle_fd(int fd)
+{
+ struct tracecmd_output *handle = NULL;
+ struct tracecmd_input *ihandle;
+ const char *cname = NULL;
+ const char *cver = NULL;
+ int fd2;
+
+ /* Move the file descriptor to the beginning */
+ if (lseek(fd, 0, SEEK_SET) == (off_t)-1)
+ return NULL;
+
+ /* dup fd to be used by the ihandle bellow */
+ fd2 = dup(fd);
+ if (fd2 < 0)
+ return NULL;
+
+ /* get a input handle from this */
+ ihandle = tracecmd_alloc_fd(fd2, TRACECMD_FL_LOAD_NO_PLUGINS);
+ if (!ihandle)
+ return NULL;
+ tracecmd_read_headers(ihandle, 0);
+
+ /* move the file descriptor to the end */
+ if (lseek(fd, 0, SEEK_END) == (off_t)-1)
+ goto out_free;
+
+ /* create a partial output handle */
+ handle = calloc(1, sizeof(*handle));
+ if (!handle)
+ goto out_free;
+
+ handle->fd = fd;
+
+ /* get tep, state, endian and page size */
+ handle->file_state = tracecmd_get_file_state(ihandle);
+ /* Use the tep of the ihandle for later writes */
+ handle->pevent = tracecmd_get_tep(ihandle);
+ tep_ref(handle->pevent);
+ handle->page_size = tracecmd_page_size(ihandle);
+ handle->file_version = tracecmd_get_in_file_version(ihandle);
+ handle->options_start = get_last_option_offset(ihandle);
+ handle->strings_offs = get_meta_strings_size(ihandle);
+ list_head_init(&handle->options);
+ list_head_init(&handle->buffers);
+
+ if (!tracecmd_get_file_compress_proto(ihandle, &cname, &cver)) {
+ handle->compress = tracecmd_compress_alloc(cname, cver, handle->fd,
+ handle->pevent, handle->msg_handle);
+ if (!handle->compress)
+ goto out_free;
+ }
+ tracecmd_close(ihandle);
+
+ return handle;
+
+ out_free:
+ tracecmd_close(ihandle);
+ free(handle);
+ return NULL;
+}
+
+/**
+ * tracecmd_output_create - Create new output handle to a trace file with given name
+ * @output_file: Name of the trace file that will be created.
+ *
+ * The @output_file parameter can be NULL. In this case the output handle is created
+ * and initialized, but is not associated with a file.
+ *
+ * Returns pointer to created outpuy handle, or NULL in case of an error.
+ */
+struct tracecmd_output *tracecmd_output_create(const char *output_file)
+{
+ struct tracecmd_output *out;
+ int fd = -1;
+
+ if (output_file) {
+ fd = open(output_file, O_RDWR | O_CREAT | O_TRUNC | O_LARGEFILE, 0644);
+ if (fd < 0)
+ return NULL;
+ }
+ out = tracecmd_output_create_fd(fd);
+ if (!out && fd >= 0) {
+ close(fd);
+ unlink(output_file);
+ }
+
+ return out;
+}
+
+/**
+ * tracecmd_copy - copy the headers of one trace.dat file for another
+ * @ihandle: input handle of the trace.dat file to copy
+ * @file: the trace.dat file to create
+ * @state: what data will be copied from the source handle
+ * @file_version: version of the output file
+ * @compression: compression of the output file, can be one of:
+ * NULL - inherit compression from the input file
+ * "any" - compress the output file with the best available algorithm
+ * "none" - do not compress the output file
+ * algorithm_name - compress the output file with specified algorithm
+ *
+ * Reads the header information and creates a new trace data file
+ * with the same characteristics (events and all) and returns
+ * tracecmd_output handle to this new file.
+ */
+struct tracecmd_output *tracecmd_copy(struct tracecmd_input *ihandle, const char *file,
+ enum tracecmd_file_states state, int file_version,
+ const char *compression)
+{
+ enum tracecmd_file_states fstate;
+ struct tracecmd_output *handle;
+
+ handle = tracecmd_output_create(file);
+ if (!handle)
+ return NULL;
+
+ if (tracecmd_output_set_from_input(handle, ihandle))
+ goto out_free;
+
+ if (file_version >= FILE_VERSION_MIN)
+ tracecmd_output_set_version(handle, file_version);
+
+ if (compression && tracecmd_output_set_compression(handle, compression))
+ goto out_free;
+
+ output_write_init(handle);
+ fstate = state > TRACECMD_FILE_CPU_COUNT ? TRACECMD_FILE_CPU_COUNT : state;
+ if (tracecmd_copy_headers(ihandle, handle, 0, fstate) < 0)
+ goto out_free;
+
+ if (tracecmd_copy_buffer_descr(ihandle, handle) < 0)
+ goto out_free;
+
+ if (state >= TRACECMD_FILE_OPTIONS &&
+ tracecmd_copy_options(ihandle, handle) < 0)
+ goto out_free;
+
+ if (state >= TRACECMD_FILE_CPU_LATENCY &&
+ tracecmd_copy_trace_data(ihandle, handle) < 0)
+ goto out_free;
+
+ if (HAS_SECTIONS(handle))
+ tracecmd_write_options(handle);
+
+ /* The file is all ready to have cpu data attached */
+ return handle;
+
+out_free:
+ if (handle)
+ tracecmd_output_close(handle);
+
+ unlink(file);
+ return NULL;
+}
+
+__hidden void out_set_file_state(struct tracecmd_output *handle, int new_state)
+{
+ handle->file_state = new_state;
+}
+
+__hidden bool check_out_state(struct tracecmd_output *handle, int new_state)
+{
+ return check_file_state(handle->file_version, handle->file_state, new_state);
+}
+
+__hidden bool out_check_compression(struct tracecmd_output *handle)
+{
+ return (handle->compress != NULL);
+}
+
+__hidden int out_save_options_offset(struct tracecmd_output *handle, unsigned long long start)
+{
+ unsigned long long new, en8;
+
+ if (HAS_SECTIONS(handle)) {
+ /* Append to the previous options section, if any */
+ if (!handle->options_start)
+ return -1;
+
+ new = do_lseek(handle, 0, SEEK_CUR);
+ if (do_lseek(handle, handle->options_start, SEEK_SET) == (off64_t)-1)
+ return -1;
+
+ en8 = convert_endian_8(handle, start);
+ if (do_write_check(handle, &en8, 8))
+ return -1;
+
+ handle->options_start = new;
+ if (do_lseek(handle, new, SEEK_SET) == (off64_t)-1)
+ return -1;
+ } else {
+ handle->options_start = start;
+ }
+
+ return 0;
+}
+
+/**
+ * tracecmd_get_out_file_version - return the trace.dat file version
+ * @handle: output handle for the trace.dat file
+ */
+unsigned long tracecmd_get_out_file_version(struct tracecmd_output *handle)
+{
+ return handle->file_version;
+}
+
+unsigned long long tracecmd_get_out_file_offset(struct tracecmd_output *handle)
+{
+ return do_lseek(handle, 0, SEEK_CUR);
+}
diff --git a/lib/trace-cmd/trace-perf.c b/lib/trace-cmd/trace-perf.c
new file mode 100644
index 00000000..a10da55d
--- /dev/null
+++ b/lib/trace-cmd/trace-perf.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2021, VMware, Tzvetomir Stoyanov <tz.stoyanov@gmail.com>
+ *
+ */
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/mman.h>
+
+#include "trace-cmd-private.h"
+
+static void default_perf_init_pe(struct perf_event_attr *pe)
+{
+ pe->type = PERF_TYPE_SOFTWARE;
+ pe->sample_type = PERF_SAMPLE_CPU;
+ pe->size = sizeof(struct perf_event_attr);
+ pe->config = PERF_COUNT_HW_CPU_CYCLES;
+ pe->disabled = 1;
+ pe->exclude_kernel = 1;
+ pe->freq = 1;
+ pe->sample_freq = 1000;
+ pe->inherit = 1;
+ pe->mmap = 1;
+ pe->comm = 1;
+ pe->task = 1;
+ pe->precise_ip = 1;
+ pe->sample_id_all = 1;
+ pe->read_format = PERF_FORMAT_ID |
+ PERF_FORMAT_TOTAL_TIME_ENABLED |
+ PERF_FORMAT_TOTAL_TIME_RUNNING;
+}
+
+/**
+ * trace_perf_init - Initialize perf context
+ *
+ * @perf: structure, representing perf context, that will be initialized.
+ * @pages: Number of perf memory mapped pages.
+ * @cpu: CPU number, associated with this perf context.
+ * @pid: PID, associated with this perf context.
+ *
+ * The perf context in initialized with default values. The caller can set
+ * custom perf parameters in perf->pe, before calling trace_perf_open() API.
+ *
+ * Returns 0 on success, or -1 in case of an error.
+ *
+ */
+int __hidden trace_perf_init(struct trace_perf *perf, int pages, int cpu, int pid)
+{
+ if (!perf)
+ return -1;
+
+ memset(perf, 0, sizeof(struct trace_perf));
+ default_perf_init_pe(&perf->pe);
+ perf->cpu = cpu;
+ perf->pages = pages;
+ perf->pid = pid;
+ perf->fd = -1;
+
+ return 0;
+}
+
+/**
+ * trace_perf_close - Close perf session
+ *
+ * @perf: structure, representing context of a running perf session, opened
+ * with trace_perf_open()
+ *
+ */
+void __hidden trace_perf_close(struct trace_perf *perf)
+{
+ if (perf->fd >= 0)
+ close(perf->fd);
+ perf->fd = -1;
+ if (perf->mmap && perf->mmap != MAP_FAILED)
+ munmap(perf->mmap, (perf->pages + 1) * getpagesize());
+ perf->mmap = NULL;
+}
+
+/**
+ * trace_perf_open - Open perf session
+ *
+ * @perf: structure, representing perf context that will be opened. It must be
+ * initialized with trace_perf_init().
+ *
+ * Returns 0 on success, or -1 in case of an error. In case of success, the
+ * session must be closed with trace_perf_close()
+ */
+int __hidden trace_perf_open(struct trace_perf *perf)
+{
+ perf->fd = syscall(__NR_perf_event_open, &perf->pe, perf->pid, perf->cpu, -1, 0);
+ if (perf->fd < 0)
+ return -1;
+ fcntl(perf->fd, F_SETFL, O_NONBLOCK);
+
+ perf->mmap = mmap(NULL, (perf->pages + 1) * getpagesize(),
+ PROT_READ | PROT_WRITE, MAP_SHARED, perf->fd, 0);
+ if (perf->mmap == MAP_FAILED)
+ goto error;
+
+ return 0;
+
+error:
+ trace_perf_close(perf);
+ return -1;
+}
diff --git a/lib/trace-cmd/trace-plugin.c b/lib/trace-cmd/trace-plugin.c
new file mode 100644
index 00000000..127771ea
--- /dev/null
+++ b/lib/trace-cmd/trace-plugin.c
@@ -0,0 +1,314 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#include <stdlib.h>
+#include <unistd.h>
+#include <dlfcn.h>
+#include <sys/stat.h>
+#include <libgen.h>
+#include "trace-cmd.h"
+#include "trace-local.h"
+#include "trace-cmd-local.h"
+
+#define LOCAL_PLUGIN_DIR ".local/lib/trace-cmd/plugins/"
+
+struct trace_plugin_list {
+ struct trace_plugin_list *next;
+ char *name;
+ void *handle;
+};
+
+struct trace_plugin_context {
+ enum tracecmd_context context;
+ enum tracecmd_plugin_flag flags;
+ union {
+ void *data;
+ struct tracecmd_input *trace_input;
+ struct tracecmd_output *trace_output;
+ };
+};
+
+/**
+ * tracecmd_plugin_context_create - Create and initialize tracecmd plugins context.
+ * @context: Context of the trace-cmd command.
+ * @data: Pointer to the context specific data, which will be passed to plugins.
+ *
+ * Returns a pointer to created tracecmd plugins context, or NULL in case memory
+ * allocation fails. The returned pointer should be freed by free ().
+ */
+struct trace_plugin_context *
+tracecmd_plugin_context_create(enum tracecmd_context context, void *data)
+{
+ struct trace_plugin_context *trace;
+
+ trace = calloc(1, sizeof(struct trace_plugin_context));
+ if (!trace)
+ return NULL;
+ trace->context = context;
+ trace->data = data;
+ return trace;
+}
+
+/**
+ * tracecmd_plugin_set_flag - Set a flag to tracecmd plugins context.
+ * @context: Context of the trace-cmd command.
+ * @flag: Flag, whil will be set.
+ *
+ */
+void tracecmd_plugin_set_flag(struct trace_plugin_context *context,
+ enum tracecmd_plugin_flag flag)
+{
+ if (context)
+ context->flags |= flag;
+}
+
+/**
+ * tracecmd_plugin_context_input - Get a tracecmd_input plugin context.
+ * @context: Context of the trace-cmd command.
+ *
+ * Returns pointer to tracecmd_input, if such context is available or
+ * NULL otherwise.
+ */
+struct tracecmd_input *
+tracecmd_plugin_context_input(struct trace_plugin_context *context)
+{
+ if (!context || context->context != TRACECMD_INPUT)
+ return NULL;
+ return context->trace_input;
+}
+
+/**
+ * tracecmd_plugin_context_output - Get a tracecmd_output plugin context
+ * @context: Context of the trace-cmd command.
+ *
+ * Returns pointer to tracecmd_output, if such context is available or
+ * NULL otherwise.
+ */
+struct tracecmd_output *
+tracecmd_plugin_context_output(struct trace_plugin_context *context)
+{
+ if (!context || context->context != TRACECMD_OUTPUT)
+ return NULL;
+ return context->trace_output;
+}
+
+static void
+load_plugin(struct trace_plugin_context *trace, const char *path,
+ const char *file, void *data)
+{
+ struct trace_plugin_list **plugin_list = data;
+ tracecmd_plugin_load_func func;
+ struct trace_plugin_list *list;
+ const char *alias;
+ char *plugin;
+ void *handle;
+ int ret;
+
+ ret = asprintf(&plugin, "%s/%s", path, file);
+ if (ret < 0) {
+ tracecmd_warning("could not allocate plugin memory");
+ return;
+ }
+
+ handle = dlopen(plugin, RTLD_NOW | RTLD_GLOBAL);
+ if (!handle) {
+ tracecmd_warning("could not load plugin '%s'\n%s", plugin, dlerror());
+ goto out_free;
+ }
+
+ alias = dlsym(handle, TRACECMD_PLUGIN_ALIAS_NAME);
+ if (!alias)
+ alias = file;
+
+ func = dlsym(handle, TRACECMD_PLUGIN_LOADER_NAME);
+ if (!func) {
+ tracecmd_warning("could not find func '%s' in plugin '%s'\n%s",
+ TRACECMD_PLUGIN_LOADER_NAME, plugin, dlerror());
+ goto out_free;
+ }
+
+ list = malloc(sizeof(*list));
+ if (!list) {
+ tracecmd_warning("could not allocate plugin memory");
+ goto out_free;
+ }
+
+ list->next = *plugin_list;
+ list->handle = handle;
+ list->name = plugin;
+ *plugin_list = list;
+
+ tracecmd_info("registering plugin: %s", plugin);
+ func(trace);
+ return;
+
+ out_free:
+ free(plugin);
+}
+
+static void
+load_plugins_dir(struct trace_plugin_context *trace, const char *suffix,
+ const char *path,
+ void (*load_plugin)(struct trace_plugin_context *trace,
+ const char *path,
+ const char *name,
+ void *data),
+ void *data)
+{
+ struct dirent *dent;
+ struct stat st;
+ DIR *dir;
+ int ret;
+
+ ret = stat(path, &st);
+ if (ret < 0)
+ return;
+
+ if (!S_ISDIR(st.st_mode))
+ return;
+
+ dir = opendir(path);
+ if (!dir)
+ return;
+
+ while ((dent = readdir(dir))) {
+ const char *name = dent->d_name;
+
+ if (strcmp(name, ".") == 0 ||
+ strcmp(name, "..") == 0)
+ continue;
+
+ /* Only load plugins that end in suffix */
+ if (strcmp(name + (strlen(name) - strlen(suffix)), suffix) != 0)
+ continue;
+
+ load_plugin(trace, path, name, data);
+ }
+
+ closedir(dir);
+}
+
+static char *get_source_plugins_dir(void)
+{
+ char *p, path[PATH_MAX+1];
+ int ret;
+
+ ret = readlink("/proc/self/exe", path, PATH_MAX);
+ if (ret > PATH_MAX || ret < 0)
+ return NULL;
+
+ path[ret] = 0;
+ dirname(path);
+ p = strrchr(path, '/');
+ if (!p)
+ return NULL;
+ /* Check if we are in the the source tree */
+ if (strcmp(p, "/tracecmd") != 0)
+ return NULL;
+
+ strcpy(p, "/lib/trace-cmd/plugins");
+ return strdup(path);
+}
+
+static void
+load_plugins_hook(struct trace_plugin_context *trace, const char *suffix,
+ void (*load_plugin)(struct trace_plugin_context *trace,
+ const char *path,
+ const char *name,
+ void *data),
+ void *data)
+{
+ char *home;
+ char *path;
+ char *envdir;
+ int ret;
+
+ if (trace && trace->flags & TRACECMD_DISABLE_PLUGINS)
+ return;
+
+ /*
+ * If a system plugin directory was defined,
+ * check that first.
+ */
+#ifdef PLUGIN_TRACECMD_DIR
+ if (!trace || !(trace->flags & TRACECMD_DISABLE_SYS_PLUGINS))
+ load_plugins_dir(trace, suffix, PLUGIN_TRACECMD_DIR,
+ load_plugin, data);
+#endif
+
+ /*
+ * Next let the environment-set plugin directory
+ * override the system defaults.
+ */
+ envdir = getenv("TRACECMD_PLUGIN_DIR");
+ if (envdir)
+ load_plugins_dir(trace, suffix, envdir, load_plugin, data);
+
+ /*
+ * Now let the home directory override the environment
+ * or system defaults.
+ */
+ home = getenv("HOME");
+ if (!home)
+ return;
+
+ ret = asprintf(&path, "%s/%s", home, LOCAL_PLUGIN_DIR);
+ if (ret < 0) {
+ tracecmd_warning("could not allocate plugin memory");
+ return;
+ }
+
+ load_plugins_dir(trace, suffix, path, load_plugin, data);
+
+ free(path);
+
+ path = get_source_plugins_dir();
+ if (path) {
+ load_plugins_dir(trace, suffix, path, load_plugin, data);
+ free(path);
+ }
+}
+
+/**
+ * tracecmd_load_plugins - Load trace-cmd specific plugins.
+ * @context: Context of the trace-cmd command, will be passed to the plugins
+ * at load time.
+ *
+ * Returns a list of loaded plugins
+ */
+struct trace_plugin_list*
+tracecmd_load_plugins(struct trace_plugin_context *trace)
+{
+ struct trace_plugin_list *list = NULL;
+
+ load_plugins_hook(trace, ".so", load_plugin, &list);
+ return list;
+}
+
+/**
+ * tracecmd_unload_plugins - Unload trace-cmd specific plugins.
+ * @plugin_list - List of plugins, previously loaded with tracecmd_load_plugins.
+ * @context: Context of the trace-cmd command, will be passed to the plugins
+ * at unload time.
+ *
+ */
+void
+tracecmd_unload_plugins(struct trace_plugin_list *plugin_list,
+ struct trace_plugin_context *trace)
+{
+ tracecmd_plugin_unload_func func;
+ struct trace_plugin_list *list;
+
+ while (plugin_list) {
+ list = plugin_list;
+ plugin_list = list->next;
+ func = dlsym(list->handle, TRACECMD_PLUGIN_UNLOADER_NAME);
+ if (func)
+ func(trace);
+ dlclose(list->handle);
+ free(list->name);
+ free(list);
+ }
+}
diff --git a/lib/trace-cmd/trace-recorder.c b/lib/trace-cmd/trace-recorder.c
new file mode 100644
index 00000000..c8333789
--- /dev/null
+++ b/lib/trace-cmd/trace-recorder.c
@@ -0,0 +1,601 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#define _LARGEFILE64_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <time.h>
+#include <poll.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include "tracefs.h"
+#include "trace-cmd-private.h"
+#include "trace-cmd-local.h"
+#include "event-utils.h"
+
+/* F_GETPIPE_SZ was introduced in 2.6.35, older systems don't have it */
+#ifndef F_GETPIPE_SZ
+# define F_GETPIPE_SZ 1032 /* The Linux number for the option */
+#endif
+
+#ifndef SPLICE_F_MOVE
+# define SPLICE_F_MOVE 1
+# define SPLICE_F_NONBLOCK 2
+# define SPLICE_F_MORE 4
+# define SPLICE_F_GIFT 8
+#endif
+
+#define POLL_TIMEOUT_MS 1000
+
+struct tracecmd_recorder {
+ int fd;
+ int fd1;
+ int fd2;
+ int trace_fd;
+ int brass[2];
+ int pipe_size;
+ int page_size;
+ int cpu;
+ int stop;
+ int max;
+ int pages;
+ int count;
+ unsigned fd_flags;
+ unsigned trace_fd_flags;
+ unsigned flags;
+};
+
+static int append_file(int size, int dst, int src)
+{
+ char buf[size];
+ int r;
+
+ lseek64(src, 0, SEEK_SET);
+
+ /* If there's an error, then we are pretty much screwed :-p */
+ do {
+ r = read(src, buf, size);
+ if (r < 0)
+ return r;
+ r = write(dst, buf, r);
+ if (r < 0)
+ return r;
+ } while (r);
+ return 0;
+}
+
+void tracecmd_free_recorder(struct tracecmd_recorder *recorder)
+{
+ if (!recorder)
+ return;
+
+ if (recorder->max) {
+ /* Need to put everything into fd1 */
+ if (recorder->fd == recorder->fd1) {
+ int ret;
+ /*
+ * Crap, the older data is in fd2, and we need
+ * to append fd1 onto it, and then copy over to fd1
+ */
+ ret = append_file(recorder->page_size,
+ recorder->fd2, recorder->fd1);
+ /* Error on copying, then just keep fd1 */
+ if (ret) {
+ lseek64(recorder->fd1, 0, SEEK_END);
+ goto close;
+ }
+ lseek64(recorder->fd1, 0, SEEK_SET);
+ ftruncate(recorder->fd1, 0);
+ }
+ append_file(recorder->page_size, recorder->fd1, recorder->fd2);
+ }
+ close:
+ if (recorder->brass[0] >= 0)
+ close(recorder->brass[0]);
+
+ if (recorder->brass[1] >= 0)
+ close(recorder->brass[1]);
+
+ if (recorder->trace_fd >= 0)
+ close(recorder->trace_fd);
+
+ if (recorder->fd1 >= 0)
+ close(recorder->fd1);
+
+ if (recorder->fd2 >= 0)
+ close(recorder->fd2);
+
+ free(recorder);
+}
+
+static void set_nonblock(struct tracecmd_recorder *recorder)
+{
+ long flags;
+
+ /* Do not block on reads */
+ flags = fcntl(recorder->trace_fd, F_GETFL);
+ fcntl(recorder->trace_fd, F_SETFL, flags | O_NONBLOCK);
+
+ /* Do not block on streams */
+ recorder->fd_flags |= SPLICE_F_NONBLOCK;
+}
+
+struct tracecmd_recorder *
+tracecmd_create_buffer_recorder_fd2(int fd, int fd2, int cpu, unsigned flags,
+ const char *buffer, int maxkb)
+{
+ struct tracecmd_recorder *recorder;
+ char *path = NULL;
+ int pipe_size = 0;
+ int ret;
+
+ recorder = malloc(sizeof(*recorder));
+ if (!recorder)
+ return NULL;
+
+ recorder->cpu = cpu;
+ recorder->flags = flags;
+
+ recorder->fd_flags = SPLICE_F_MOVE;
+
+ if (!(recorder->flags & TRACECMD_RECORD_BLOCK_SPLICE))
+ recorder->fd_flags |= SPLICE_F_NONBLOCK;
+
+ recorder->trace_fd_flags = SPLICE_F_MOVE;
+
+ /* Init to know what to free and release */
+ recorder->trace_fd = -1;
+ recorder->brass[0] = -1;
+ recorder->brass[1] = -1;
+
+ recorder->page_size = getpagesize();
+ if (maxkb) {
+ int kb_per_page = recorder->page_size >> 10;
+
+ if (!kb_per_page)
+ kb_per_page = 1;
+ recorder->max = maxkb / kb_per_page;
+ /* keep max half */
+ recorder->max >>= 1;
+ if (!recorder->max)
+ recorder->max = 1;
+ } else
+ recorder->max = 0;
+
+ recorder->count = 0;
+ recorder->pages = 0;
+
+ /* fd always points to what to write to */
+ recorder->fd = fd;
+ recorder->fd1 = fd;
+ recorder->fd2 = fd2;
+
+ if (buffer) {
+ if (flags & TRACECMD_RECORD_SNAPSHOT)
+ ret = asprintf(&path, "%s/per_cpu/cpu%d/snapshot_raw",
+ buffer, cpu);
+ else
+ ret = asprintf(&path, "%s/per_cpu/cpu%d/trace_pipe_raw",
+ buffer, cpu);
+ if (ret < 0)
+ goto out_free;
+
+ recorder->trace_fd = open(path, O_RDONLY);
+ free(path);
+
+ if (recorder->trace_fd < 0)
+ goto out_free;
+ }
+
+ if (!(recorder->flags & (TRACECMD_RECORD_NOSPLICE |
+ TRACECMD_RECORD_NOBRASS))) {
+ ret = pipe(recorder->brass);
+ if (ret < 0)
+ goto out_free;
+
+ ret = fcntl(recorder->brass[0], F_GETPIPE_SZ, &pipe_size);
+ /*
+ * F_GETPIPE_SZ was introduced in 2.6.35, ftrace was introduced
+ * in 2.6.31. If we are running on an older kernel, just fall
+ * back to using page_size for splice(). It could also return
+ * success, but not modify pipe_size.
+ */
+ if (ret < 0 || !pipe_size)
+ pipe_size = recorder->page_size;
+
+ recorder->pipe_size = pipe_size;
+ }
+
+ if (recorder->flags & TRACECMD_RECORD_POLL)
+ set_nonblock(recorder);
+
+ return recorder;
+
+ out_free:
+ tracecmd_free_recorder(recorder);
+ return NULL;
+}
+
+struct tracecmd_recorder *
+tracecmd_create_buffer_recorder_fd(int fd, int cpu, unsigned flags, const char *buffer)
+{
+ return tracecmd_create_buffer_recorder_fd2(fd, -1, cpu, flags, buffer, 0);
+}
+
+static struct tracecmd_recorder *
+__tracecmd_create_buffer_recorder(const char *file, int cpu, unsigned flags,
+ const char *buffer)
+{
+ struct tracecmd_recorder *recorder;
+ int fd;
+
+ fd = open(file, O_WRONLY | O_CREAT | O_TRUNC | O_LARGEFILE, 0644);
+ if (fd < 0)
+ return NULL;
+
+ recorder = tracecmd_create_buffer_recorder_fd(fd, cpu, flags, buffer);
+ if (!recorder) {
+ close(fd);
+ unlink(file);
+ }
+
+ return recorder;
+}
+
+struct tracecmd_recorder *
+tracecmd_create_buffer_recorder_maxkb(const char *file, int cpu, unsigned flags,
+ const char *buffer, int maxkb)
+{
+ struct tracecmd_recorder *recorder = NULL;
+ char *file2;
+ int len;
+ int fd;
+ int fd2;
+
+ if (!maxkb)
+ return tracecmd_create_buffer_recorder(file, cpu, flags, buffer);
+
+ len = strlen(file);
+ file2 = malloc(len + 3);
+ if (!file2)
+ return NULL;
+
+ sprintf(file2, "%s.1", file);
+
+ fd = open(file, O_RDWR | O_CREAT | O_TRUNC | O_LARGEFILE, 0644);
+ if (fd < 0)
+ goto out;
+
+ fd2 = open(file2, O_RDWR | O_CREAT | O_TRUNC | O_LARGEFILE, 0644);
+ if (fd2 < 0)
+ goto err;
+
+ recorder = tracecmd_create_buffer_recorder_fd2(fd, fd2, cpu, flags, buffer, maxkb);
+ if (!recorder)
+ goto err2;
+ out:
+ /* Unlink file2, we need to add everything to file at the end */
+ unlink(file2);
+ free(file2);
+
+ return recorder;
+ err2:
+ close(fd2);
+ err:
+ close(fd);
+ unlink(file);
+ goto out;
+}
+
+struct tracecmd_recorder *
+tracecmd_create_buffer_recorder(const char *file, int cpu, unsigned flags,
+ const char *buffer)
+{
+ return __tracecmd_create_buffer_recorder(file, cpu, flags, buffer);
+}
+
+/**
+ * tracecmd_create_recorder_virt - Create a recorder reading tracing data
+ * from the trace_fd file descriptor instead of from the local tracefs
+ * @file: output filename where tracing data will be written
+ * @cpu: which CPU is being traced
+ * @flags: flags configuring the recorder (see TRACECMD_RECORDER_* enums)
+ * @trace_fd: file descriptor from where tracing data will be read
+ */
+struct tracecmd_recorder *
+tracecmd_create_recorder_virt(const char *file, int cpu, unsigned flags,
+ int trace_fd)
+{
+ struct tracecmd_recorder *recorder;
+
+ recorder = __tracecmd_create_buffer_recorder(file, cpu, flags, NULL);
+ if (recorder)
+ recorder->trace_fd = trace_fd;
+
+ return recorder;
+}
+
+struct tracecmd_recorder *tracecmd_create_recorder_fd(int fd, int cpu, unsigned flags)
+{
+ const char *tracing;
+
+ tracing = tracefs_tracing_dir();
+ if (!tracing) {
+ errno = ENODEV;
+ return NULL;
+ }
+
+ return tracecmd_create_buffer_recorder_fd(fd, cpu, flags, tracing);
+}
+
+struct tracecmd_recorder *tracecmd_create_recorder(const char *file, int cpu, unsigned flags)
+{
+ const char *tracing;
+
+ tracing = tracefs_tracing_dir();
+ if (!tracing) {
+ errno = ENODEV;
+ return NULL;
+ }
+
+ return tracecmd_create_buffer_recorder(file, cpu, flags, tracing);
+}
+
+struct tracecmd_recorder *
+tracecmd_create_recorder_maxkb(const char *file, int cpu, unsigned flags, int maxkb)
+{
+ const char *tracing;
+
+ tracing = tracefs_tracing_dir();
+ if (!tracing) {
+ errno = ENODEV;
+ return NULL;
+ }
+
+ return tracecmd_create_buffer_recorder_maxkb(file, cpu, flags, tracing, maxkb);
+}
+
+static inline void update_fd(struct tracecmd_recorder *recorder, int size)
+{
+ int fd;
+
+ if (!recorder->max)
+ return;
+
+ recorder->count += size;
+
+ if (recorder->count >= recorder->page_size) {
+ recorder->count = 0;
+ recorder->pages++;
+ }
+
+ if (recorder->pages < recorder->max)
+ return;
+
+ recorder->pages = 0;
+
+ fd = recorder->fd;
+
+ /* Swap fd to next file. */
+ if (fd == recorder->fd1)
+ fd = recorder->fd2;
+ else
+ fd = recorder->fd1;
+
+ /* Zero out the new file we are writing to */
+ lseek64(fd, 0, SEEK_SET);
+ ftruncate(fd, 0);
+
+ recorder->fd = fd;
+}
+
+/*
+ * Returns -1 on error.
+ * or bytes of data read.
+ */
+static long splice_data(struct tracecmd_recorder *recorder)
+{
+ long total_read = 0;
+ long read;
+ long ret;
+
+ read = splice(recorder->trace_fd, NULL, recorder->brass[1], NULL,
+ recorder->pipe_size, recorder->trace_fd_flags);
+ if (read < 0) {
+ if (errno == EAGAIN || errno == EINTR || errno == ENOTCONN)
+ return 0;
+
+ tracecmd_warning("recorder error in splice input");
+ return -1;
+ } else if (read == 0)
+ return 0;
+
+ again:
+ ret = splice(recorder->brass[0], NULL, recorder->fd, NULL,
+ read, recorder->fd_flags);
+ if (ret < 0) {
+ if (errno != EAGAIN && errno != EINTR) {
+ tracecmd_warning("recorder error in splice output");
+ return -1;
+ }
+ return total_read;
+ } else
+ update_fd(recorder, ret);
+ total_read = ret;
+ read -= ret;
+ if (read)
+ goto again;
+
+ return total_read;
+}
+
+/*
+ * Returns -1 on error.
+ * or bytes of data read.
+ */
+static long direct_splice_data(struct tracecmd_recorder *recorder)
+{
+ struct pollfd pfd = {
+ .fd = recorder->trace_fd,
+ .events = POLLIN,
+ };
+ long read;
+ int ret;
+
+ /*
+ * splice(2) in Linux used to not check O_NONBLOCK flag of pipe file
+ * descriptors before [1]. To avoid getting blocked in the splice(2)
+ * call below after the user had requested to stop tracing, we poll(2)
+ * here. This poll() is not necessary on newer kernels.
+ *
+ * [1] https://github.com/torvalds/linux/commit/ee5e001196d1345b8fee25925ff5f1d67936081e
+ */
+ ret = poll(&pfd, 1, POLL_TIMEOUT_MS);
+ if (ret < 0)
+ return -1;
+
+ if (!(pfd.revents | POLLIN))
+ return 0;
+
+ read = splice(recorder->trace_fd, NULL, recorder->fd, NULL,
+ recorder->pipe_size, recorder->fd_flags);
+ if (read < 0) {
+ if (errno == EAGAIN || errno == EINTR || errno == ENOTCONN)
+ return 0;
+
+ tracecmd_warning("recorder error in splice input");
+ return -1;
+ }
+
+ return read;
+}
+
+/*
+ * Returns -1 on error.
+ * or bytes of data read.
+ */
+static long read_data(struct tracecmd_recorder *recorder)
+{
+ char buf[recorder->page_size];
+ long left;
+ long r, w;
+
+ r = read(recorder->trace_fd, buf, recorder->page_size);
+ if (r < 0) {
+ if (errno == EAGAIN || errno == EINTR || errno == ENOTCONN)
+ return 0;
+
+ tracecmd_warning("recorder error in read input");
+ return -1;
+ }
+
+ left = r;
+ do {
+ w = write(recorder->fd, buf + (r - left), left);
+ if (w > 0) {
+ left -= w;
+ update_fd(recorder, w);
+ }
+ } while (w >= 0 && left);
+
+ if (w < 0)
+ r = w;
+
+ return r;
+}
+
+static long move_data(struct tracecmd_recorder *recorder)
+{
+ if (recorder->flags & TRACECMD_RECORD_NOSPLICE)
+ return read_data(recorder);
+
+ if (recorder->flags & TRACECMD_RECORD_NOBRASS)
+ return direct_splice_data(recorder);
+
+ return splice_data(recorder);
+}
+
+long tracecmd_flush_recording(struct tracecmd_recorder *recorder)
+{
+ char buf[recorder->page_size];
+ long total = 0;
+ long wrote = 0;
+ long ret;
+
+ set_nonblock(recorder);
+
+ do {
+ ret = move_data(recorder);
+ if (ret < 0)
+ return ret;
+ total += ret;
+ } while (ret);
+
+ /* splice only reads full pages */
+ do {
+ ret = read(recorder->trace_fd, buf, recorder->page_size);
+ if (ret > 0) {
+ write(recorder->fd, buf, ret);
+ wrote += ret;
+ }
+
+ } while (ret > 0);
+
+ /* Make sure we finish off with a page size boundary */
+ wrote &= recorder->page_size - 1;
+ if (wrote) {
+ memset(buf, 0, recorder->page_size);
+ write(recorder->fd, buf, recorder->page_size - wrote);
+ total += recorder->page_size;
+ }
+
+ return total;
+}
+
+int tracecmd_start_recording(struct tracecmd_recorder *recorder, unsigned long sleep)
+{
+ struct timespec req = {
+ .tv_sec = sleep / 1000000,
+ .tv_nsec = (sleep % 1000000) * 1000,
+ };
+ long read = 1;
+ long ret;
+
+ recorder->stop = 0;
+
+ do {
+ /* Only sleep if we did not read anything last time */
+ if (!read && sleep)
+ nanosleep(&req, NULL);
+
+ read = 0;
+ do {
+ ret = move_data(recorder);
+ if (ret < 0)
+ return ret;
+ read += ret;
+ } while (ret);
+ } while (!recorder->stop);
+
+ /* Flush out the rest */
+ ret = tracecmd_flush_recording(recorder);
+
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+void tracecmd_stop_recording(struct tracecmd_recorder *recorder)
+{
+ if (!recorder)
+ return;
+
+ set_nonblock(recorder);
+
+ recorder->stop = 1;
+}
diff --git a/lib/trace-cmd/trace-timesync-kvm.c b/lib/trace-cmd/trace-timesync-kvm.c
new file mode 100644
index 00000000..12a22d4c
--- /dev/null
+++ b/lib/trace-cmd/trace-timesync-kvm.c
@@ -0,0 +1,559 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2020, VMware, Tzvetomir Stoyanov tz.stoyanov@gmail.com>
+ *
+ */
+
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <dirent.h>
+#include <ctype.h>
+
+#include "trace-cmd.h"
+#include "trace-cmd-private.h"
+#include "tracefs.h"
+#include "trace-tsync-local.h"
+
+#define KVM_DEBUG_FS "/sys/kernel/debug/kvm"
+#define KVM_DEBUG_OFFSET_FILE "tsc-offset"
+#define KVM_DEBUG_SCALING_FILE "tsc-scaling-ratio"
+#define KVM_DEBUG_FRACTION_FILE "tsc-scaling-ratio-frac-bits"
+#define KVM_DEBUG_VCPU_DIR "vcpu"
+
+/* default KVM scaling values, taken from the Linux kernel */
+#define KVM_SCALING_AMD_DEFAULT (1ULL<<32)
+#define KVM_SCALING_INTEL_DEFAULT (1ULL<<48)
+
+#define KVM_SYNC_PKT_REQUEST 1
+#define KVM_SYNC_PKT_RESPONSE 2
+
+typedef __s64 s64;
+
+#define KVM_ACCURACY 0
+#define KVM_NAME "kvm"
+
+struct kvm_clock_sync {
+ int vcpu_count;
+ char **vcpu_offsets;
+ char **vcpu_scalings;
+ char **vcpu_frac;
+ int marker_fd;
+ struct tep_handle *tep;
+ int raw_id;
+ unsigned long long ts;
+};
+
+struct kvm_clock_offset_msg {
+ s64 ts;
+ s64 offset;
+ s64 scaling;
+ s64 frac;
+};
+
+static int read_ll_from_file(char *file, long long *res)
+{
+ char buf[32];
+ int ret;
+ int fd;
+
+ if (!file)
+ return -1;
+ fd = open(file, O_RDONLY | O_NONBLOCK);
+ if (fd < 0)
+ return -1;
+ ret = read(fd, buf, 32);
+ close(fd);
+ if (ret <= 0)
+ return -1;
+
+ *res = strtoll(buf, NULL, 0);
+
+ return 0;
+}
+
+static bool kvm_scaling_check_vm_cpu(char *vname, char *cpu)
+{
+ long long scaling, frac;
+ bool has_scaling = false;
+ bool has_frac = false;
+ char *path;
+ int ret;
+
+ if (asprintf(&path, "%s/%s/%s", vname, cpu, KVM_DEBUG_SCALING_FILE) < 0)
+ return false;
+ ret = read_ll_from_file(path, &scaling);
+ free(path);
+ if (!ret)
+ has_scaling = true;
+
+ if (asprintf(&path, "%s/%s/%s", vname, cpu, KVM_DEBUG_FRACTION_FILE) < 0)
+ return false;
+ ret = read_ll_from_file(path, &frac);
+ free(path);
+ if (!ret)
+ has_frac = true;
+
+ if (has_scaling != has_frac)
+ return false;
+
+ return true;
+}
+
+static bool kvm_scaling_check_vm(char *name)
+{
+ struct dirent *entry;
+ char *vdir;
+ DIR *dir;
+
+ if (asprintf(&vdir, "%s/%s", KVM_DEBUG_FS, name) < 0)
+ return true;
+
+ dir = opendir(vdir);
+ if (!dir) {
+ free(vdir);
+ return true;
+ }
+ while ((entry = readdir(dir))) {
+ if (entry->d_type == DT_DIR && !strncmp(entry->d_name, "vcpu", 4) &&
+ !kvm_scaling_check_vm_cpu(vdir, entry->d_name))
+ break;
+ }
+
+ closedir(dir);
+ free(vdir);
+ return entry == NULL;
+}
+static bool kvm_scaling_check(void)
+{
+ struct dirent *entry;
+ DIR *dir;
+
+ dir = opendir(KVM_DEBUG_FS);
+ if (!dir)
+ return true;
+
+ while ((entry = readdir(dir))) {
+ if (entry->d_type == DT_DIR && isdigit(entry->d_name[0]) &&
+ !kvm_scaling_check_vm(entry->d_name))
+ break;
+ }
+ closedir(dir);
+ return entry == NULL;
+}
+
+static bool kvm_support_check(bool guest)
+{
+ struct stat st;
+ int ret;
+
+ if (guest)
+ return true;
+
+ ret = stat(KVM_DEBUG_FS, &st);
+ if (ret < 0)
+ return false;
+
+ if (!S_ISDIR(st.st_mode))
+ return false;
+
+ return kvm_scaling_check();
+}
+
+static int kvm_open_vcpu_dir(struct kvm_clock_sync *kvm, int cpu, char *dir_str)
+{
+ struct dirent *entry;
+ char path[PATH_MAX];
+ DIR *dir;
+
+ dir = opendir(dir_str);
+ if (!dir)
+ goto error;
+ while ((entry = readdir(dir))) {
+ if (entry->d_type != DT_DIR) {
+ if (!strcmp(entry->d_name, KVM_DEBUG_OFFSET_FILE)) {
+ snprintf(path, sizeof(path), "%s/%s",
+ dir_str, entry->d_name);
+ kvm->vcpu_offsets[cpu] = strdup(path);
+ }
+ if (!strcmp(entry->d_name, KVM_DEBUG_SCALING_FILE)) {
+ snprintf(path, sizeof(path), "%s/%s",
+ dir_str, entry->d_name);
+ kvm->vcpu_scalings[cpu] = strdup(path);
+ }
+ if (!strcmp(entry->d_name, KVM_DEBUG_FRACTION_FILE)) {
+ snprintf(path, sizeof(path), "%s/%s",
+ dir_str, entry->d_name);
+ kvm->vcpu_frac[cpu] = strdup(path);
+ }
+ }
+ }
+ if (!kvm->vcpu_offsets[cpu])
+ goto error;
+ closedir(dir);
+ return 0;
+
+error:
+ if (dir)
+ closedir(dir);
+ free(kvm->vcpu_offsets[cpu]);
+ kvm->vcpu_offsets[cpu] = NULL;
+ free(kvm->vcpu_scalings[cpu]);
+ kvm->vcpu_scalings[cpu] = NULL;
+ free(kvm->vcpu_frac[cpu]);
+ kvm->vcpu_frac[cpu] = NULL;
+ return -1;
+}
+
+static int kvm_open_debug_files(struct kvm_clock_sync *kvm, int pid)
+{
+ char *vm_dir_str = NULL;
+ struct dirent *entry;
+ char *pid_str = NULL;
+ char path[PATH_MAX];
+ long vcpu;
+ DIR *dir;
+ int i;
+
+ dir = opendir(KVM_DEBUG_FS);
+ if (!dir)
+ goto error;
+ if (asprintf(&pid_str, "%d-", pid) <= 0)
+ goto error;
+ while ((entry = readdir(dir))) {
+ if (!(entry->d_type == DT_DIR &&
+ !strncmp(entry->d_name, pid_str, strlen(pid_str))))
+ continue;
+ asprintf(&vm_dir_str, "%s/%s", KVM_DEBUG_FS, entry->d_name);
+ break;
+ }
+ closedir(dir);
+ dir = NULL;
+ if (!vm_dir_str)
+ goto error;
+ dir = opendir(vm_dir_str);
+ if (!dir)
+ goto error;
+ while ((entry = readdir(dir))) {
+ if (!(entry->d_type == DT_DIR &&
+ !strncmp(entry->d_name, KVM_DEBUG_VCPU_DIR, strlen(KVM_DEBUG_VCPU_DIR))))
+ continue;
+ vcpu = strtol(entry->d_name + strlen(KVM_DEBUG_VCPU_DIR), NULL, 10);
+ if (vcpu < 0 || vcpu >= kvm->vcpu_count)
+ continue;
+ snprintf(path, sizeof(path), "%s/%s", vm_dir_str, entry->d_name);
+ if (kvm_open_vcpu_dir(kvm, vcpu, path) < 0)
+ goto error;
+ }
+ for (i = 0; i < kvm->vcpu_count; i++) {
+ if (!kvm->vcpu_offsets[i])
+ goto error;
+ }
+ closedir(dir);
+ free(pid_str);
+ free(vm_dir_str);
+ return 0;
+error:
+ free(pid_str);
+ free(vm_dir_str);
+ if (dir)
+ closedir(dir);
+ return -1;
+}
+
+static int kvm_clock_sync_init_host(struct tracecmd_time_sync *tsync,
+ struct kvm_clock_sync *kvm)
+{
+ kvm->vcpu_count = tsync->vcpu_count;
+ kvm->vcpu_offsets = calloc(kvm->vcpu_count, sizeof(char *));
+ kvm->vcpu_scalings = calloc(kvm->vcpu_count, sizeof(char *));
+ kvm->vcpu_frac = calloc(kvm->vcpu_count, sizeof(char *));
+ if (!kvm->vcpu_offsets || !kvm->vcpu_scalings || !kvm->vcpu_frac)
+ goto error;
+ if (kvm_open_debug_files(kvm, tsync->guest_pid) < 0)
+ goto error;
+ return 0;
+
+error:
+ free(kvm->vcpu_offsets);
+ free(kvm->vcpu_scalings);
+ free(kvm->vcpu_frac);
+ return -1;
+}
+
+static int kvm_clock_sync_init_guest(struct tracecmd_time_sync *tsync,
+ struct kvm_clock_sync *kvm)
+{
+ const char *systems[] = {"ftrace", NULL};
+ struct clock_sync_context *clock_context;
+ struct tep_event *raw;
+ char *path;
+
+ clock_context = (struct clock_sync_context *)tsync->context;
+ path = tracefs_instance_get_dir(clock_context->instance);
+ if (!path)
+ goto error;
+ kvm->tep = tracefs_local_events_system(path, systems);
+ tracefs_put_tracing_file(path);
+ if (!kvm->tep)
+ goto error;
+ raw = tep_find_event_by_name(kvm->tep, "ftrace", "raw_data");
+ if (!raw)
+ goto error;
+
+ kvm->raw_id = raw->id;
+ tep_set_file_bigendian(kvm->tep, tracecmd_host_bigendian());
+ tep_set_local_bigendian(kvm->tep, tracecmd_host_bigendian());
+
+ path = tracefs_instance_get_file(clock_context->instance, "trace_marker_raw");
+ if (!path)
+ goto error;
+ kvm->marker_fd = open(path, O_WRONLY);
+ tracefs_put_tracing_file(path);
+
+ return 0;
+
+error:
+ if (kvm->tep)
+ tep_free(kvm->tep);
+ if (kvm->marker_fd >= 0)
+ close(kvm->marker_fd);
+
+ return -1;
+}
+
+static int kvm_clock_sync_init(struct tracecmd_time_sync *tsync)
+{
+ struct clock_sync_context *clock_context;
+ struct kvm_clock_sync *kvm;
+ int ret;
+
+ if (!tsync || !tsync->context)
+ return -1;
+ clock_context = (struct clock_sync_context *)tsync->context;
+
+ if (!kvm_support_check(clock_context->is_guest))
+ return -1;
+ kvm = calloc(1, sizeof(struct kvm_clock_sync));
+ if (!kvm)
+ return -1;
+ kvm->marker_fd = -1;
+ if (clock_context->is_guest)
+ ret = kvm_clock_sync_init_guest(tsync, kvm);
+ else
+ ret = kvm_clock_sync_init_host(tsync, kvm);
+ if (ret < 0)
+ goto error;
+
+ clock_context->proto_data = kvm;
+ return 0;
+
+error:
+ free(kvm);
+ return -1;
+}
+
+static int kvm_clock_sync_free(struct tracecmd_time_sync *tsync)
+{
+ struct clock_sync_context *clock_context;
+ struct kvm_clock_sync *kvm = NULL;
+ int i;
+
+ clock_context = (struct clock_sync_context *)tsync->context;
+ if (clock_context)
+ kvm = (struct kvm_clock_sync *)clock_context->proto_data;
+ if (kvm) {
+ for (i = 0; i < kvm->vcpu_count; i++) {
+ free(kvm->vcpu_offsets[i]);
+ kvm->vcpu_offsets[i] = NULL;
+ free(kvm->vcpu_scalings[i]);
+ kvm->vcpu_scalings[i] = NULL;
+ free(kvm->vcpu_frac[i]);
+ kvm->vcpu_frac[i] = NULL;
+ }
+ if (kvm->tep)
+ tep_free(kvm->tep);
+ if (kvm->marker_fd >= 0)
+ close(kvm->marker_fd);
+ free(kvm);
+ }
+ return -1;
+}
+
+static int kvm_clock_host(struct tracecmd_time_sync *tsync,
+ long long *offset, long long *scaling, long long *frac,
+ long long *timestamp, unsigned int cpu)
+{
+ char sync_proto[TRACECMD_TSYNC_PNAME_LENGTH];
+ struct clock_sync_context *clock_context;
+ struct kvm_clock_offset_msg packet;
+ struct kvm_clock_sync *kvm = NULL;
+ long long kvm_scaling = 1;
+ unsigned int sync_msg;
+ long long kvm_offset;
+ long long kvm_frac = 0;
+ unsigned int size;
+ char *msg;
+ int ret;
+
+ clock_context = (struct clock_sync_context *)tsync->context;
+ if (clock_context)
+ kvm = (struct kvm_clock_sync *)clock_context->proto_data;
+ if (!kvm || !kvm->vcpu_offsets || !kvm->vcpu_offsets[0])
+ return -1;
+ if (cpu >= kvm->vcpu_count)
+ return -1;
+ ret = read_ll_from_file(kvm->vcpu_offsets[cpu], &kvm_offset);
+ if (ret < 0)
+ return -1;
+
+ if (kvm->vcpu_scalings && kvm->vcpu_scalings[cpu]) {
+ read_ll_from_file(kvm->vcpu_scalings[cpu], &kvm_scaling);
+ if (kvm_scaling == KVM_SCALING_AMD_DEFAULT ||
+ kvm_scaling == KVM_SCALING_INTEL_DEFAULT)
+ kvm_scaling = 1;
+ }
+
+ if (kvm->vcpu_frac && kvm->vcpu_frac[cpu] && kvm_scaling != 1)
+ ret = read_ll_from_file(kvm->vcpu_frac[cpu], &kvm_frac);
+ msg = (char *)&packet;
+ size = sizeof(packet);
+ ret = tracecmd_msg_recv_time_sync(tsync->msg_handle,
+ sync_proto, &sync_msg,
+ &size, &msg);
+ if (ret || strncmp(sync_proto, KVM_NAME, TRACECMD_TSYNC_PNAME_LENGTH) ||
+ sync_msg != KVM_SYNC_PKT_REQUEST)
+ return -1;
+
+ packet.offset = -kvm_offset;
+ packet.scaling = kvm_scaling;
+ packet.frac = kvm_frac;
+ ret = tracecmd_msg_send_time_sync(tsync->msg_handle, KVM_NAME,
+ KVM_SYNC_PKT_RESPONSE, sizeof(packet),
+ (char *)&packet);
+ if (ret)
+ return -1;
+
+ *scaling = packet.scaling;
+ *offset = packet.offset;
+ *frac = kvm_frac;
+ *timestamp = packet.ts;
+
+ return 0;
+}
+
+#define KVM_EVENT_MARKER "kvm sync event"
+static int kvm_marker_find(struct tep_event *event, struct tep_record *record,
+ int cpu, void *context)
+{
+ struct kvm_clock_sync *kvm = (struct kvm_clock_sync *)context;
+ struct tep_format_field *field;
+ struct tep_format_field *id;
+ char *marker;
+
+ /* Make sure this is our event */
+ if (event->id != kvm->raw_id)
+ return 0;
+ id = tep_find_field(event, "id");
+ field = tep_find_field(event, "buf");
+ if (field && id &&
+ record->size >= (id->offset + strlen(KVM_EVENT_MARKER) + 1)) {
+ marker = (char *)(record->data + id->offset);
+ if (!strcmp(marker, KVM_EVENT_MARKER)) {
+ kvm->ts = record->ts;
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int kvm_clock_guest(struct tracecmd_time_sync *tsync,
+ long long *offset,
+ long long *scaling,
+ long long *frac,
+ long long *timestamp)
+{
+ char sync_proto[TRACECMD_TSYNC_PNAME_LENGTH];
+ struct clock_sync_context *clock_context;
+ struct kvm_clock_offset_msg packet;
+ struct kvm_clock_sync *kvm = NULL;
+ unsigned int sync_msg;
+ unsigned int size;
+ char *msg;
+ int ret;
+
+ clock_context = (struct clock_sync_context *)tsync->context;
+ if (clock_context)
+ kvm = (struct kvm_clock_sync *)clock_context->proto_data;
+ if (!kvm)
+ return -1;
+ kvm->ts = 0;
+ memset(&packet, 0, sizeof(packet));
+ tracefs_instance_file_write(clock_context->instance, "trace", "\0");
+ write(kvm->marker_fd, KVM_EVENT_MARKER, strlen(KVM_EVENT_MARKER) + 1);
+ kvm->ts = 0;
+ tracefs_iterate_raw_events(kvm->tep, clock_context->instance,
+ NULL, 0, kvm_marker_find, kvm);
+ packet.ts = kvm->ts;
+ ret = tracecmd_msg_send_time_sync(tsync->msg_handle, KVM_NAME,
+ KVM_SYNC_PKT_REQUEST, sizeof(packet),
+ (char *)&packet);
+ if (ret)
+ return -1;
+ msg = (char *)&packet;
+ size = sizeof(packet);
+ ret = tracecmd_msg_recv_time_sync(tsync->msg_handle,
+ sync_proto, &sync_msg,
+ &size, &msg);
+ if (ret || strncmp(sync_proto, KVM_NAME, TRACECMD_TSYNC_PNAME_LENGTH) ||
+ sync_msg != KVM_SYNC_PKT_RESPONSE)
+ return -1;
+
+ *scaling = packet.scaling;
+ *offset = packet.offset;
+ *frac = packet.frac;
+ *timestamp = packet.ts;
+ return 0;
+}
+
+static int kvm_clock_sync_calc(struct tracecmd_time_sync *tsync,
+ long long *offset, long long *scaling, long long *frac,
+ long long *timestamp, unsigned int cpu)
+{
+ struct clock_sync_context *clock_context;
+ int ret;
+
+ if (!tsync || !tsync->context)
+ return -1;
+
+ clock_context = (struct clock_sync_context *)tsync->context;
+
+ if (clock_context->is_guest)
+ ret = kvm_clock_guest(tsync, offset, scaling, frac, timestamp);
+ else
+ ret = kvm_clock_host(tsync, offset, scaling, frac, timestamp, cpu);
+ return ret;
+}
+
+int kvm_clock_sync_register(void)
+{
+ int role = TRACECMD_TIME_SYNC_ROLE_GUEST;
+ int clock = 0;
+
+ if (kvm_support_check(false)) {
+ role |= TRACECMD_TIME_SYNC_ROLE_HOST;
+ clock = TRACECMD_CLOCK_X86_TSC;
+ }
+ return tracecmd_tsync_proto_register(KVM_NAME, KVM_ACCURACY,
+ role, clock, 0,
+ kvm_clock_sync_init,
+ kvm_clock_sync_free,
+ kvm_clock_sync_calc);
+}
+
+int kvm_clock_sync_unregister(void)
+{
+ return tracecmd_tsync_proto_unregister(KVM_NAME);
+}
diff --git a/lib/trace-cmd/trace-timesync-ptp.c b/lib/trace-cmd/trace-timesync-ptp.c
new file mode 100644
index 00000000..20e6e6f1
--- /dev/null
+++ b/lib/trace-cmd/trace-timesync-ptp.c
@@ -0,0 +1,718 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2019, VMware, Tzvetomir Stoyanov tz.stoyanov@gmail.com>
+ *
+ */
+
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <sys/types.h>
+#include <linux/types.h>
+#include <time.h>
+#include <sched.h>
+#include <limits.h>
+
+#include "trace-cmd.h"
+#include "trace-cmd-private.h"
+#include "tracefs.h"
+#include "trace-tsync-local.h"
+#include "trace-msg.h"
+#include "trace-cmd-local.h"
+
+typedef __be32 be32;
+typedef __u64 u64;
+typedef __s64 s64;
+
+#define PTP_SYNC_LOOP 339
+
+#define PTP_SYNC_PKT_START 1
+#define PTP_SYNC_PKT_PROBE 2
+#define PTP_SYNC_PKT_PROBES 3
+#define PTP_SYNC_PKT_OFFSET 4
+#define PTP_SYNC_PKT_END 5
+
+/* print time sync debug messages */
+/* #define TSYNC_DEBUG */
+
+struct ptp_clock_sync {
+ struct tep_handle *tep;
+ struct tep_format_field *id;
+ int raw_id;
+ int marker_fd;
+ int series_id;
+ int flags;
+ int debug_fd;
+};
+
+enum {
+/*
+ * Consider only the probe with fastest response time,
+ * otherwise make a histogram from all probes.
+ */
+ PTP_FLAG_FASTEST_RESPONSE = (1 << 0),
+/*
+ * Use trace marker to get the clock,
+ * otherwise use the system clock directly.
+ */
+ PTP_FLAG_USE_MARKER = (1 << 1),
+};
+static int ptp_flags = PTP_FLAG_FASTEST_RESPONSE | PTP_FLAG_USE_MARKER;
+
+/*
+ * Calculated using formula [CPU rate]*[calculated offset deviation]
+ * tested on 3GHz CPU, with x86-tsc trace clock and compare the calculated
+ * offset with /sys/kernel/debug/kvm/<VM ID>/vcpu0/tsc-offset
+ * measured 2000ns deviation
+ * using PTP flags PTP_FLAG_FASTEST_RESPONSE | PTP_FLAG_USE_MARKER
+ */
+#define PTP_ACCURACY 6000
+#define PTP_NAME "ptp"
+
+struct ptp_clock_start_msg {
+ be32 series_id;
+ be32 flags;
+} __packed;
+
+struct ptp_clock_sample {
+ s64 ts;
+ be32 id;
+} __packed;
+
+struct ptp_clock_result_msg {
+ be32 series_id;
+ be32 count;
+ struct ptp_clock_sample samples[2*PTP_SYNC_LOOP];
+} __packed;
+
+struct ptp_clock_offset_msg {
+ s64 ts;
+ s64 offset;
+};
+
+struct ptp_markers_context {
+ struct clock_sync_context *clock;
+ struct ptp_clock_sync *ptp;
+ struct ptp_clock_result_msg msg;
+ int size;
+};
+
+struct ptp_marker_buf {
+ int local_id;
+ int remote_id;
+ int count;
+ int packet_id;
+} __packed;
+
+struct ptp_marker {
+ int series_id;
+ struct ptp_marker_buf data;
+} __packed;
+
+static int ptp_clock_sync_init(struct tracecmd_time_sync *tsync)
+{
+ const char *systems[] = {"ftrace", NULL};
+ struct clock_sync_context *clock_context;
+ struct ptp_clock_sync *ptp;
+ struct tep_event *raw;
+ char *path;
+
+ if (!tsync || !tsync->context)
+ return -1;
+ clock_context = (struct clock_sync_context *)tsync->context;
+ if (clock_context->proto_data)
+ return 0;
+
+ ptp = calloc(1, sizeof(struct ptp_clock_sync));
+ if (!ptp)
+ return -1;
+
+ ptp->marker_fd = -1;
+ ptp->debug_fd = -1;
+
+ path = tracefs_instance_get_dir(clock_context->instance);
+ if (!path)
+ goto error;
+ ptp->tep = tracefs_local_events_system(path, systems);
+ tracefs_put_tracing_file(path);
+ if (!ptp->tep)
+ goto error;
+ raw = tep_find_event_by_name(ptp->tep, "ftrace", "raw_data");
+ if (!raw)
+ goto error;
+ ptp->id = tep_find_field(raw, "id");
+ if (!ptp->id)
+ goto error;
+ ptp->raw_id = raw->id;
+
+ tep_set_file_bigendian(ptp->tep, tracecmd_host_bigendian());
+ tep_set_local_bigendian(ptp->tep, tracecmd_host_bigendian());
+
+ path = tracefs_instance_get_file(clock_context->instance, "trace_marker_raw");
+ if (!path)
+ goto error;
+ ptp->marker_fd = open(path, O_WRONLY);
+ tracefs_put_tracing_file(path);
+
+ clock_context->proto_data = ptp;
+
+#ifdef TSYNC_DEBUG
+ if (clock_context->is_server) {
+ char buff[256];
+ int res_fd;
+
+ sprintf(buff, "res-id%d.txt", clock_context->remote_id);
+
+ res_fd = open(buff, O_CREAT|O_WRONLY|O_TRUNC, 0644);
+ if (res_fd > 0)
+ close(res_fd);
+ }
+#endif
+
+ return 0;
+
+error:
+ if (ptp) {
+ tep_free(ptp->tep);
+ if (ptp->marker_fd >= 0)
+ close(ptp->marker_fd);
+ }
+ free(ptp);
+ return -1;
+}
+
+static int ptp_clock_sync_free(struct tracecmd_time_sync *tsync)
+{
+ struct clock_sync_context *clock_context;
+ struct ptp_clock_sync *ptp;
+
+ if (!tsync || !tsync->context)
+ return -1;
+ clock_context = (struct clock_sync_context *)tsync->context;
+
+ if (clock_context && clock_context->proto_data) {
+ ptp = (struct ptp_clock_sync *)clock_context->proto_data;
+ tep_free(ptp->tep);
+ if (ptp->marker_fd >= 0)
+ close(ptp->marker_fd);
+ if (ptp->debug_fd >= 0)
+ close(ptp->debug_fd);
+ free(clock_context->proto_data);
+ clock_context->proto_data = NULL;
+ }
+ return 0;
+}
+
+/* Save the timestamps of sent ('s') and returned ('r') probes in the
+ * ctx->msg.samples[] array. Depending of the context (server or client), there
+ * may be only returned probes, or both sent and returned probes. The returned
+ * probes are saved first in the array, after them are the sent probes.
+ * Depending of the context, the array can be with size:
+ * [0 .. max data.count] - holds only returned probes
+ * [0 .. 2 * max data.count] - holds both returned and sent probes
+ */
+static void ptp_probe_store(struct ptp_markers_context *ctx,
+ struct ptp_marker *marker,
+ unsigned long long ts)
+{
+ int index = -1;
+
+ if (marker->data.packet_id == 'r' &&
+ marker->data.count <= ctx->size) {
+ index = marker->data.count - 1;
+ } else if (marker->data.packet_id == 's' &&
+ marker->data.count * 2 <= ctx->size){
+ index = ctx->size / 2 + marker->data.count - 1;
+ }
+
+ if (index >= 0) {
+ ctx->msg.samples[index].id = marker->data.count;
+ ctx->msg.samples[index].ts = ts;
+ ctx->msg.count++;
+ }
+}
+
+static int ptp_marker_find(struct tep_event *event, struct tep_record *record,
+ int cpu, void *context)
+{
+ struct ptp_markers_context *ctx;
+ struct ptp_marker *marker;
+
+ ctx = (struct ptp_markers_context *)context;
+
+ /* Make sure this is our event */
+ if (event->id != ctx->ptp->raw_id || !ctx->ptp->id)
+ return 0;
+ if (record->size >= (ctx->ptp->id->offset + sizeof(struct ptp_marker))) {
+ marker = (struct ptp_marker *)(record->data + ctx->ptp->id->offset);
+ if (marker->data.local_id == ctx->clock->local_id &&
+ marker->data.remote_id == ctx->clock->remote_id &&
+ marker->series_id == ctx->ptp->series_id &&
+ marker->data.count)
+ ptp_probe_store(ctx, marker, record->ts);
+ }
+
+ return 0;
+}
+
+static inline bool good_probe(struct ptp_clock_sample *server_sample,
+ struct ptp_clock_sample *send_sample,
+ struct ptp_clock_sample *client_sample,
+ int *bad_probes)
+{
+ if (server_sample->ts && send_sample->ts && client_sample->ts &&
+ server_sample->id == send_sample->id &&
+ server_sample->id == client_sample->id)
+ return true;
+ (*bad_probes)++;
+ return false;
+}
+
+static int ptp_calc_offset_fastest(struct clock_sync_context *clock,
+ struct ptp_clock_result_msg *server,
+ struct ptp_clock_result_msg *client,
+ long long *offset_ret, long long *ts_ret,
+ int *bad_probes)
+{
+ struct ptp_clock_sample *sample_send;
+ long long delta_min = LLONG_MAX;
+ long long offset = 0;
+ long long delta = 0;
+ long long ts = 0;
+ int max_i;
+ int i;
+
+ *bad_probes = 0;
+ sample_send = server->samples + (server->count / 2);
+ max_i = server->count / 2 < client->count ?
+ server->count / 2 : client->count;
+ for (i = 0; i < max_i; i++) {
+ if (!good_probe(&server->samples[i], &sample_send[i],
+ &client->samples[i], bad_probes))
+ continue;
+ ts = (sample_send[i].ts + server->samples[i].ts) / 2;
+ offset = client->samples[i].ts - ts;
+
+ delta = server->samples[i].ts - sample_send[i].ts;
+ if (delta_min > delta) {
+ delta_min = delta;
+ *offset_ret = offset;
+ *ts_ret = ts;
+ }
+#ifdef TSYNC_DEBUG
+ {
+ struct ptp_clock_sync *ptp;
+
+ ptp = (struct ptp_clock_sync *)clock->proto_data;
+ if (ptp && ptp->debug_fd > 0) {
+ char buff[256];
+
+ sprintf(buff, "%lld %lld %lld\n",
+ ts, client->samples[i].ts, offset);
+ write(ptp->debug_fd, buff, strlen(buff));
+ }
+ }
+#endif
+ }
+
+ return 0;
+}
+
+static int ptp_calc_offset_hist(struct clock_sync_context *clock,
+ struct ptp_clock_result_msg *server,
+ struct ptp_clock_result_msg *client,
+ long long *offset_ret, long long *ts_ret,
+ int *bad_probes)
+{
+ struct ptp_clock_sample *sample_send;
+ long long timestamps[PTP_SYNC_LOOP];
+ long long offsets[PTP_SYNC_LOOP];
+ long long offset_min = LLONG_MAX;
+ long long offset_max = 0;
+ int hist[PTP_SYNC_LOOP];
+ int ind, max = 0;
+ long long bin;
+ int i, k = 0;
+
+ *bad_probes = 0;
+ memset(hist, 0, sizeof(int) * PTP_SYNC_LOOP);
+ sample_send = server->samples + (server->count / 2);
+ for (i = 0; i * 2 < server->count && i < client->count; i++) {
+ if (!good_probe(&server->samples[i], &sample_send[i],
+ &client->samples[i], bad_probes))
+ continue;
+ timestamps[k] = (sample_send[i].ts + server->samples[i].ts) / 2;
+ offsets[k] = client->samples[i].ts - timestamps[k];
+ if (offset_max < llabs(offsets[k]))
+ offset_max = llabs(offsets[k]);
+ if (offset_min > llabs(offsets[k]))
+ offset_min = llabs(offsets[k]);
+#ifdef TSYNC_DEBUG
+ {
+ struct ptp_clock_sync *ptp;
+
+ ptp = (struct ptp_clock_sync *)clock->proto_data;
+
+ if (ptp && ptp->debug_fd > 0) {
+ char buff[256];
+
+ sprintf(buff, "%lld %lld %lld\n",
+ timestamps[k],
+ client->samples[i].ts, offsets[k]);
+ write(ptp->debug_fd, buff, strlen(buff));
+ }
+ }
+#endif
+ k++;
+ }
+
+ bin = (offset_max - offset_min) / PTP_SYNC_LOOP;
+ for (i = 0; i < k; i++) {
+ ind = (llabs(offsets[i]) - offset_min) / bin;
+ if (ind < PTP_SYNC_LOOP) {
+ hist[ind]++;
+ if (max < hist[ind]) {
+ max = hist[ind];
+ *offset_ret = offsets[i];
+ *ts_ret = timestamps[i];
+ }
+ }
+ }
+
+ return 0;
+}
+
+static void ntoh_ptp_results(struct ptp_clock_result_msg *msg)
+{
+ int i;
+
+ msg->count = ntohl(msg->count);
+ for (i = 0; i < msg->count; i++) {
+ msg->samples[i].id = ntohl(msg->samples[i].id);
+ msg->samples[i].ts = ntohll(msg->samples[i].ts);
+ }
+ msg->series_id = ntohl(msg->series_id);
+}
+
+
+static void hton_ptp_results(struct ptp_clock_result_msg *msg)
+{
+ int i;
+
+ for (i = 0; i < msg->count; i++) {
+ msg->samples[i].id = htonl(msg->samples[i].id);
+ msg->samples[i].ts = htonll(msg->samples[i].ts);
+ }
+ msg->series_id = htonl(msg->series_id);
+ msg->count = htonl(msg->count);
+}
+
+static inline void ptp_track_clock(struct ptp_markers_context *ctx,
+ struct ptp_marker *marker)
+{
+ if (ctx->ptp->flags & PTP_FLAG_USE_MARKER) {
+ write(ctx->ptp->marker_fd, marker, sizeof(struct ptp_marker));
+ } else {
+ struct timespec clock;
+ unsigned long long ts;
+
+ clock_gettime(CLOCK_MONOTONIC_RAW, &clock);
+ ts = clock.tv_sec * 1000000000LL;
+ ts += clock.tv_nsec;
+ ptp_probe_store(ctx, marker, ts);
+ }
+}
+
+static int ptp_clock_client(struct tracecmd_time_sync *tsync,
+ long long *offset, long long *timestamp)
+{
+ char sync_proto[TRACECMD_TSYNC_PNAME_LENGTH];
+ struct clock_sync_context *clock_context;
+ struct ptp_clock_offset_msg res_offset;
+ struct ptp_clock_start_msg start;
+ struct ptp_markers_context ctx;
+ struct ptp_clock_sync *ptp;
+ struct ptp_marker marker;
+ unsigned int sync_msg;
+ unsigned int size;
+ char *msg;
+ int count;
+ int ret;
+
+ if (!tsync || !tsync->context || !tsync->msg_handle)
+ return -1;
+
+ clock_context = (struct clock_sync_context *)tsync->context;
+ if (clock_context->proto_data == NULL)
+ return -1;
+
+ ptp = (struct ptp_clock_sync *)clock_context->proto_data;
+ size = sizeof(start);
+ msg = (char *)&start;
+ ret = tracecmd_msg_recv_time_sync(tsync->msg_handle,
+ sync_proto, &sync_msg,
+ &size, &msg);
+ if (ret || strncmp(sync_proto, PTP_NAME, TRACECMD_TSYNC_PNAME_LENGTH) ||
+ sync_msg != PTP_SYNC_PKT_START)
+ return -1;
+ ret = tracecmd_msg_send_time_sync(tsync->msg_handle, PTP_NAME,
+ PTP_SYNC_PKT_START, sizeof(start),
+ (char *)&start);
+ marker.data.local_id = clock_context->local_id;
+ marker.data.remote_id = clock_context->remote_id;
+ marker.series_id = ntohl(start.series_id);
+ marker.data.packet_id = 'r';
+ ptp->series_id = marker.series_id;
+ ptp->flags = ntohl(start.flags);
+ msg = (char *)&count;
+ size = sizeof(count);
+ ctx.msg.count = 0;
+ ctx.size = PTP_SYNC_LOOP;
+ ctx.ptp = ptp;
+ ctx.clock = clock_context;
+ ctx.msg.series_id = ptp->series_id;
+ while (true) {
+ count = 0;
+ ret = tracecmd_msg_recv_time_sync(tsync->msg_handle,
+ sync_proto, &sync_msg,
+ &size, &msg);
+ if (ret || strncmp(sync_proto, PTP_NAME, TRACECMD_TSYNC_PNAME_LENGTH) ||
+ sync_msg != PTP_SYNC_PKT_PROBE || !ntohl(count))
+ break;
+ marker.data.count = ntohl(count);
+ ptp_track_clock(&ctx, &marker);
+ ret = tracecmd_msg_send_time_sync(tsync->msg_handle, PTP_NAME,
+ PTP_SYNC_PKT_PROBE,
+ sizeof(count), (char *)&count);
+ if (ret)
+ break;
+ }
+
+ if (strncmp(sync_proto, PTP_NAME, TRACECMD_TSYNC_PNAME_LENGTH) ||
+ sync_msg != PTP_SYNC_PKT_END)
+ return -1;
+
+ if (ptp->flags & PTP_FLAG_USE_MARKER)
+ tracefs_iterate_raw_events(ptp->tep, clock_context->instance,
+ NULL, 0, ptp_marker_find, &ctx);
+
+ hton_ptp_results(&ctx.msg);
+ ret = tracecmd_msg_send_time_sync(tsync->msg_handle, PTP_NAME,
+ PTP_SYNC_PKT_PROBES,
+ sizeof(ctx.msg), (char *)&ctx.msg);
+
+ msg = (char *)&res_offset;
+ size = sizeof(res_offset);
+ ret = tracecmd_msg_recv_time_sync(tsync->msg_handle,
+ sync_proto, &sync_msg,
+ &size, (char **)&msg);
+ if (ret || strncmp(sync_proto, PTP_NAME, TRACECMD_TSYNC_PNAME_LENGTH) ||
+ sync_msg != PTP_SYNC_PKT_OFFSET)
+ return -1;
+
+ *offset = ntohll(res_offset.offset);
+ *timestamp = ntohll(res_offset.ts);
+
+ return 0;
+}
+
+
+static int ptp_clock_server(struct tracecmd_time_sync *tsync,
+ long long *offset, long long *timestamp)
+{
+ char sync_proto[TRACECMD_TSYNC_PNAME_LENGTH];
+ struct ptp_clock_result_msg *results = NULL;
+ struct clock_sync_context *clock_context;
+ struct ptp_clock_offset_msg res_offset;
+ struct ptp_clock_start_msg start;
+ struct ptp_markers_context ctx;
+ int sync_loop = PTP_SYNC_LOOP;
+ struct ptp_clock_sync *ptp;
+ struct ptp_marker marker;
+ unsigned int sync_msg;
+ unsigned int size;
+ int bad_probes;
+ int count = 1;
+ int msg_count;
+ int msg_ret;
+ char *msg;
+ int ret;
+
+ if (!tsync || !tsync->context || !tsync->msg_handle)
+ return -1;
+
+ clock_context = (struct clock_sync_context *)tsync->context;
+ if (clock_context->proto_data == NULL)
+ return -1;
+
+ ptp = (struct ptp_clock_sync *)clock_context->proto_data;
+ ptp->flags = ptp_flags;
+ memset(&start, 0, sizeof(start));
+ start.series_id = htonl(ptp->series_id + 1);
+ start.flags = htonl(ptp->flags);
+ ret = tracecmd_msg_send_time_sync(tsync->msg_handle, PTP_NAME,
+ PTP_SYNC_PKT_START, sizeof(start),
+ (char *)&start);
+ if (!ret)
+ ret = tracecmd_msg_recv_time_sync(tsync->msg_handle,
+ sync_proto, &sync_msg,
+ NULL, NULL);
+ if (ret || strncmp(sync_proto, PTP_NAME, TRACECMD_TSYNC_PNAME_LENGTH) ||
+ sync_msg != PTP_SYNC_PKT_START)
+ return -1;
+
+ tracefs_instance_file_write(clock_context->instance, "trace", "\0");
+
+ ptp->series_id++;
+ marker.data.local_id = clock_context->local_id;
+ marker.data.remote_id = clock_context->remote_id;
+ marker.series_id = ptp->series_id;
+ msg = (char *)&msg_ret;
+ size = sizeof(msg_ret);
+ ctx.size = 2*PTP_SYNC_LOOP;
+ ctx.ptp = ptp;
+ ctx.clock = clock_context;
+ ctx.msg.count = 0;
+ ctx.msg.series_id = ptp->series_id;
+ do {
+ marker.data.count = count++;
+ marker.data.packet_id = 's';
+ msg_count = htonl(marker.data.count);
+ ptp_track_clock(&ctx, &marker);
+ ret = tracecmd_msg_send_time_sync(tsync->msg_handle, PTP_NAME,
+ PTP_SYNC_PKT_PROBE,
+ sizeof(msg_count),
+ (char *)&msg_count);
+ if (!ret)
+ ret = tracecmd_msg_recv_time_sync(tsync->msg_handle,
+ sync_proto, &sync_msg,
+ &size, &msg);
+
+ marker.data.packet_id = 'r';
+ ptp_track_clock(&ctx, &marker);
+ if (ret || strncmp(sync_proto, PTP_NAME, TRACECMD_TSYNC_PNAME_LENGTH) ||
+ sync_msg != PTP_SYNC_PKT_PROBE ||
+ ntohl(msg_ret) != marker.data.count)
+ break;
+ } while (--sync_loop);
+
+ if (sync_loop)
+ return -1;
+
+ ret = tracecmd_msg_send_time_sync(tsync->msg_handle, PTP_NAME,
+ PTP_SYNC_PKT_END, 0, NULL);
+
+ size = 0;
+ ret = tracecmd_msg_recv_time_sync(tsync->msg_handle,
+ sync_proto, &sync_msg,
+ &size, (char **)&results);
+ if (ret || strncmp(sync_proto, PTP_NAME, TRACECMD_TSYNC_PNAME_LENGTH) ||
+ sync_msg != PTP_SYNC_PKT_PROBES || size == 0 || results == NULL)
+ return -1;
+
+ ntoh_ptp_results(results);
+ if (ptp->flags & PTP_FLAG_USE_MARKER)
+ tracefs_iterate_raw_events(ptp->tep, clock_context->instance,
+ NULL, 0, ptp_marker_find, &ctx);
+ if (ptp->flags & PTP_FLAG_FASTEST_RESPONSE)
+ ptp_calc_offset_fastest(clock_context, &ctx.msg, results, offset,
+ timestamp, &bad_probes);
+ else
+ ptp_calc_offset_hist(clock_context, &ctx.msg, results, offset,
+ timestamp, &bad_probes);
+#ifdef TSYNC_DEBUG
+ {
+ char buff[256];
+ int res_fd;
+
+ sprintf(buff, "res-id%d.txt", clock_context->remote_id);
+
+ res_fd = open(buff, O_WRONLY|O_APPEND, 0644);
+ if (res_fd > 0) {
+ if (*offset && *timestamp) {
+ sprintf(buff, "%d %lld %lld\n",
+ ptp->series_id, *offset, *timestamp);
+ write(res_fd, buff, strlen(buff));
+ }
+ close(res_fd);
+ }
+
+ printf("\n calculated offset %d: %lld, %d probes, filtered out %d, PTP flags 0x%X\n\r",
+ ptp->series_id, *offset, results->count, bad_probes, ptp->flags);
+ if (ptp && ptp->debug_fd > 0) {
+ sprintf(buff, "%lld %lld 0\n", *offset, *timestamp);
+ write(ptp->debug_fd, buff, strlen(buff));
+ close(ptp->debug_fd);
+ ptp->debug_fd = -1;
+ }
+
+ }
+#endif
+
+ res_offset.offset = htonll(*offset);
+ res_offset.ts = htonll(*timestamp);
+ ret = tracecmd_msg_send_time_sync(tsync->msg_handle, PTP_NAME,
+ PTP_SYNC_PKT_OFFSET,
+ sizeof(res_offset),
+ (char *)&res_offset);
+
+ free(results);
+ return 0;
+}
+
+static int ptp_clock_sync_calc(struct tracecmd_time_sync *tsync,
+ long long *offset, long long *scaling, long long *frac,
+ long long *timestamp, unsigned int cpu)
+{
+ struct clock_sync_context *clock_context;
+ int ret;
+
+ if (!tsync || !tsync->context)
+ return -1;
+ clock_context = (struct clock_sync_context *)tsync->context;
+
+#ifdef TSYNC_DEBUG
+ if (clock_context->is_server) {
+ struct ptp_clock_sync *ptp;
+ char buff[256];
+
+ ptp = (struct ptp_clock_sync *)clock_context->proto_data;
+ if (ptp->debug_fd > 0)
+ close(ptp->debug_fd);
+ sprintf(buff, "s-id%d_%d.txt",
+ clock_context->remote_id, ptp->series_id+1);
+ ptp->debug_fd = open(buff, O_CREAT|O_WRONLY|O_TRUNC, 0644);
+ }
+#endif
+
+ if (scaling)
+ *scaling = 1;
+ if (frac)
+ *frac = 0;
+ if (clock_context->is_server)
+ ret = ptp_clock_server(tsync, offset, timestamp);
+ else
+ ret = ptp_clock_client(tsync, offset, timestamp);
+
+ return ret;
+}
+
+int ptp_clock_sync_register(void)
+{
+ return tracecmd_tsync_proto_register(PTP_NAME, PTP_ACCURACY,
+ TRACECMD_TIME_SYNC_ROLE_GUEST |
+ TRACECMD_TIME_SYNC_ROLE_HOST |
+ TRACECMD_TIME_SYNC_ROLE_CLIENT |
+ TRACECMD_TIME_SYNC_ROLE_SERVER,
+ 0, TRACECMD_TSYNC_FLAG_INTERPOLATE,
+ ptp_clock_sync_init,
+ ptp_clock_sync_free,
+ ptp_clock_sync_calc);
+
+}
+
+int ptp_clock_sync_unregister(void)
+{
+ return tracecmd_tsync_proto_unregister(PTP_NAME);
+}
diff --git a/lib/trace-cmd/trace-timesync.c b/lib/trace-cmd/trace-timesync.c
new file mode 100644
index 00000000..bbefda20
--- /dev/null
+++ b/lib/trace-cmd/trace-timesync.c
@@ -0,0 +1,1079 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2019, VMware, Tzvetomir Stoyanov <tz.stoyanov@gmail.com>
+ *
+ */
+
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <linux/limits.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <time.h>
+#include <dirent.h>
+#include <errno.h>
+#include <pthread.h>
+
+#include "trace-cmd-private.h"
+#include "trace-cmd-local.h"
+#include "tracefs.h"
+#include "event-utils.h"
+#include "trace-tsync-local.h"
+
+struct tsync_proto {
+ struct tsync_proto *next;
+ char proto_name[TRACECMD_TSYNC_PNAME_LENGTH];
+ enum tracecmd_time_sync_role roles;
+ int accuracy;
+ int supported_clocks;
+ unsigned int flags;
+
+ int (*clock_sync_init)(struct tracecmd_time_sync *clock_context);
+ int (*clock_sync_free)(struct tracecmd_time_sync *clock_context);
+ int (*clock_sync_calc)(struct tracecmd_time_sync *clock_context,
+ long long *offset, long long *scaling, long long *frac,
+ long long *timestamp, unsigned int cpu);
+};
+
+struct tsync_probe_request_msg {
+ unsigned short cpu;
+} __packed;
+
+#ifdef __ANDROID__
+#define __NR_sched_setaffinity 122
+#define __NR_sched_getaffinity 123
+
+static int pthread_setaffinity_np(pthread_t thread, size_t cpusetsize, const cpu_set_t *cpuset)
+{
+ return -syscall(__NR_sched_setaffinity, thread, cpusetsize, cpuset);
+}
+
+static int pthread_getaffinity_np(pthread_t thread, size_t cpusetsize, const cpu_set_t *cpuset)
+{
+ long ret = syscall(__NR_sched_getaffinity, thread, cpusetsize, cpuset);
+
+ if (ret < 0)
+ return ret;
+ if (ret < cpusetsize)
+ memset((char *)cpuset+ret, 0, cpusetsize-ret);
+
+ return 0;
+}
+#endif /* __ANDROID__ */
+
+static struct tsync_proto *tsync_proto_list;
+
+static struct tsync_proto *tsync_proto_find(const char *proto_name)
+{
+ struct tsync_proto *proto;
+
+ if (!proto_name)
+ return NULL;
+ for (proto = tsync_proto_list; proto; proto = proto->next) {
+ if (strlen(proto->proto_name) == strlen(proto_name) &&
+ !strncmp(proto->proto_name, proto_name, TRACECMD_TSYNC_PNAME_LENGTH))
+ return proto;
+ }
+ return NULL;
+}
+
+/**
+ * tracecmd_tsync_init - Initialize the global, per task, time sync data.
+ */
+void tracecmd_tsync_init(void)
+{
+ ptp_clock_sync_register();
+ kvm_clock_sync_register();
+}
+
+int tracecmd_tsync_proto_register(const char *proto_name, int accuracy, int roles,
+ int supported_clocks, unsigned int flags,
+ int (*init)(struct tracecmd_time_sync *),
+ int (*free)(struct tracecmd_time_sync *),
+ int (*calc)(struct tracecmd_time_sync *,
+ long long *, long long *, long long *,
+ long long *, unsigned int))
+{
+ struct tsync_proto *proto = NULL;
+
+ if (tsync_proto_find(proto_name))
+ return -1;
+ proto = calloc(1, sizeof(struct tsync_proto));
+ if (!proto)
+ return -1;
+ strncpy(proto->proto_name, proto_name, TRACECMD_TSYNC_PNAME_LENGTH);
+ proto->accuracy = accuracy;
+ proto->roles = roles;
+ proto->flags = flags;
+ proto->supported_clocks = supported_clocks;
+ proto->clock_sync_init = init;
+ proto->clock_sync_free = free;
+ proto->clock_sync_calc = calc;
+
+ proto->next = tsync_proto_list;
+ tsync_proto_list = proto;
+ return 0;
+}
+
+int tracecmd_tsync_proto_unregister(char *proto_name)
+{
+ struct tsync_proto **last = &tsync_proto_list;
+
+ if (!proto_name)
+ return -1;
+
+ for (; *last; last = &(*last)->next) {
+ if (strlen((*last)->proto_name) == strlen(proto_name) &&
+ !strncmp((*last)->proto_name, proto_name, TRACECMD_TSYNC_PNAME_LENGTH)) {
+ struct tsync_proto *proto = *last;
+
+ *last = proto->next;
+ free(proto);
+ return 0;
+ }
+ }
+
+ return -1;
+}
+
+bool __hidden tsync_proto_is_supported(const char *proto_name)
+{
+ if (tsync_proto_find(proto_name))
+ return true;
+ return false;
+}
+
+/**
+ * tracecmd_tsync_get_offsets - Return the calculated time offsets
+ *
+ * @tsync: Pointer to time sync context
+ * @cpu: CPU for which to get the calculated offsets
+ * @count: Returns the number of calculated time offsets
+ * @ts: Array of size @count containing timestamps of callculated offsets
+ * @offsets: array of size @count, containing offsets for each timestamp
+ * @scalings: array of size @count, containing scaling ratios for each timestamp
+ * @frac: array of size @count, containing fraction bits for each timestamp
+ *
+ * Retuns -1 in case of an error, or 0 otherwise
+ */
+int tracecmd_tsync_get_offsets(struct tracecmd_time_sync *tsync, int cpu,
+ int *count, long long **ts,
+ long long **offsets, long long **scalings, long long **frac)
+{
+ struct clock_sync_context *tsync_context;
+
+ if (!tsync || !tsync->context)
+ return -1;
+ tsync_context = (struct clock_sync_context *)tsync->context;
+ if (cpu >= tsync_context->cpu_count || !tsync_context->offsets)
+ return -1;
+ if (count)
+ *count = tsync_context->offsets[cpu].sync_count;
+ if (ts)
+ *ts = tsync_context->offsets[cpu].sync_ts;
+ if (offsets)
+ *offsets = tsync_context->offsets[cpu].sync_offsets;
+ if (scalings)
+ *scalings = tsync_context->offsets[cpu].sync_scalings;
+ if (frac)
+ *frac = tsync_context->offsets[cpu].sync_frac;
+
+ return 0;
+}
+
+/**
+ * tsync_get_proto_flags - Get protocol flags
+ *
+ * @tsync: Pointer to time sync context
+ * @flags: Returns the protocol flags, a combination of TRACECMD_TSYNC_FLAG_...
+ *
+ * Retuns -1 in case of an error, or 0 otherwise
+ */
+static int tsync_get_proto_flags(struct tracecmd_time_sync *tsync,
+ unsigned int *flags)
+{
+ struct tsync_proto *protocol;
+
+ if (!tsync)
+ return -1;
+ protocol = tsync_proto_find(tsync->proto_name);
+ if (!protocol)
+ return -1;
+
+ if (flags)
+ *flags = protocol->flags;
+
+ return 0;
+}
+
+
+#define PROTO_MASK_SIZE (sizeof(char))
+#define PROTO_MASK_BITS (PROTO_MASK_SIZE * 8)
+/**
+ * tsync_proto_select - Select time sync protocol, to be used for
+ * timestamp synchronization with a peer
+ *
+ * @protos: list of tsync protocol names
+ * @clock : trace clock
+ * @role : local time sync role
+ *
+ * Retuns pointer to a protocol name, that can be used with the peer, or NULL
+ * in case there is no match with supported protocols.
+ * The returned string MUST NOT be freed by the caller
+ */
+static const char *
+tsync_proto_select(const struct tracecmd_tsync_protos *protos,
+ const char *clock, enum tracecmd_time_sync_role role)
+{
+ struct tsync_proto *selected = NULL;
+ struct tsync_proto *proto;
+ char **pname;
+ int clock_id = 0;
+
+ if (!protos)
+ return NULL;
+
+ clock_id = tracecmd_clock_str2id(clock);
+ pname = protos->names;
+ while (*pname) {
+ for (proto = tsync_proto_list; proto; proto = proto->next) {
+ if (!(proto->roles & role))
+ continue;
+ if (proto->supported_clocks && clock_id &&
+ !(proto->supported_clocks & clock_id))
+ continue;
+ if (strncmp(proto->proto_name, *pname, TRACECMD_TSYNC_PNAME_LENGTH))
+ continue;
+ if (selected) {
+ if (selected->accuracy > proto->accuracy)
+ selected = proto;
+ } else
+ selected = proto;
+ }
+ pname++;
+ }
+
+ if (selected)
+ return selected->proto_name;
+
+ return NULL;
+}
+
+/**
+ * tracecmd_tsync_proto_getall - Returns list of all supported
+ * time sync protocols
+ * @protos: return, allocated list of time sync protocol names,
+ * supported by the peer. Must be freed by free()
+ * @clock: selected trace clock
+ * @role: supported protocol role
+ *
+ * If completed successfully 0 is returned and allocated list of strings in @protos.
+ * The last list entry is NULL. In case of an error, -1 is returned.
+ * @protos must be freed with free()
+ */
+int tracecmd_tsync_proto_getall(struct tracecmd_tsync_protos **protos, const char *clock, int role)
+{
+ struct tracecmd_tsync_protos *plist = NULL;
+ struct tsync_proto *proto;
+ int clock_id = 0;
+ int count = 1;
+ int i;
+
+ if (clock)
+ clock_id = tracecmd_clock_str2id(clock);
+ for (proto = tsync_proto_list; proto; proto = proto->next) {
+ if (!(proto->roles & role))
+ continue;
+ if (proto->supported_clocks && clock_id &&
+ !(proto->supported_clocks & clock_id))
+ continue;
+ count++;
+ }
+ plist = calloc(1, sizeof(struct tracecmd_tsync_protos));
+ if (!plist)
+ goto error;
+ plist->names = calloc(count, sizeof(char *));
+ if (!plist->names)
+ return -1;
+
+ for (i = 0, proto = tsync_proto_list; proto && i < (count - 1); proto = proto->next) {
+ if (!(proto->roles & role))
+ continue;
+ if (proto->supported_clocks && clock_id &&
+ !(proto->supported_clocks & clock_id))
+ continue;
+ plist->names[i++] = proto->proto_name;
+ }
+
+ *protos = plist;
+ return 0;
+
+error:
+ if (plist) {
+ free(plist->names);
+ free(plist);
+ }
+ return -1;
+}
+
+static int get_first_cpu(cpu_set_t **pin_mask, size_t *m_size)
+{
+ int cpus = tracecmd_count_cpus();
+ cpu_set_t *cpu_mask;
+ int mask_size;
+ int i;
+
+ cpu_mask = CPU_ALLOC(cpus);
+ *pin_mask = CPU_ALLOC(cpus);
+ if (!cpu_mask || !*pin_mask || 1)
+ goto error;
+
+ mask_size = CPU_ALLOC_SIZE(cpus);
+ CPU_ZERO_S(mask_size, cpu_mask);
+ CPU_ZERO_S(mask_size, *pin_mask);
+
+ if (sched_getaffinity(0, mask_size, cpu_mask) == -1)
+ goto error;
+
+ for (i = 0; i < cpus; i++) {
+ if (CPU_ISSET_S(i, mask_size, cpu_mask)) {
+ CPU_SET_S(i, mask_size, *pin_mask);
+ break;
+ }
+ }
+
+ if (CPU_COUNT_S(mask_size, *pin_mask) < 1)
+ goto error;
+
+ CPU_FREE(cpu_mask);
+ *m_size = mask_size;
+ return 0;
+
+error:
+ if (cpu_mask)
+ CPU_FREE(cpu_mask);
+ if (*pin_mask)
+ CPU_FREE(*pin_mask);
+ *pin_mask = NULL;
+ *m_size = 0;
+ return -1;
+}
+
+static struct tracefs_instance *
+clock_synch_create_instance(const char *clock, unsigned int cid)
+{
+ struct tracefs_instance *instance;
+ char inst_name[256];
+
+ snprintf(inst_name, 256, "clock_synch-%d", cid);
+
+ instance = tracefs_instance_create(inst_name);
+ if (!instance)
+ return NULL;
+
+ tracefs_instance_file_write(instance, "trace", "\0");
+ if (clock)
+ tracefs_instance_file_write(instance, "trace_clock", clock);
+ return instance;
+}
+
+static void
+clock_synch_delete_instance(struct tracefs_instance *inst)
+{
+ if (!inst)
+ return;
+ tracefs_instance_destroy(inst);
+ tracefs_instance_free(inst);
+}
+
+static int clock_context_init(struct tracecmd_time_sync *tsync,
+ struct tsync_proto **proto, bool guest)
+{
+ struct clock_sync_context *clock = NULL;
+ struct tsync_proto *protocol;
+
+ if (tsync->context)
+ return 0;
+
+ protocol = tsync_proto_find(tsync->proto_name);
+ if (!protocol || !protocol->clock_sync_calc)
+ return -1;
+
+ clock = calloc(1, sizeof(struct clock_sync_context));
+ if (!clock)
+ return -1;
+ clock->is_guest = guest;
+ clock->is_server = clock->is_guest;
+
+ clock->instance = clock_synch_create_instance(tsync->clock_str,
+ tsync->remote_id);
+ if (!clock->instance)
+ goto error;
+
+ clock->cpu_count = tsync->vcpu_count;
+ if (clock->cpu_count) {
+ clock->offsets = calloc(clock->cpu_count, sizeof(struct clock_sync_offsets));
+ if (!clock->offsets)
+ goto error;
+ }
+
+ tsync->context = clock;
+ if (protocol->clock_sync_init && protocol->clock_sync_init(tsync) < 0)
+ goto error;
+
+ *proto = protocol;
+
+ return 0;
+error:
+ tsync->context = NULL;
+ if (clock->instance)
+ clock_synch_delete_instance(clock->instance);
+ free(clock->offsets);
+ free(clock);
+ return -1;
+}
+
+/**
+ * tracecmd_tsync_free - Free time sync context, allocated by
+ * tracecmd_tsync_with_host() or tracecmd_tsync_with_guest() APIs
+ *
+ * @tsync: Pointer to time sync context
+ *
+ */
+void tracecmd_tsync_free(struct tracecmd_time_sync *tsync)
+{
+ struct clock_sync_context *tsync_context;
+ struct tsync_proto *proto;
+ int i;
+
+ if (!tsync)
+ return;
+
+ tsync_context = (struct clock_sync_context *)tsync->context;
+
+ proto = tsync_proto_find(tsync->proto_name);
+ if (proto && proto->clock_sync_free)
+ proto->clock_sync_free(tsync);
+
+
+ if (tsync_context) {
+ clock_synch_delete_instance(tsync_context->instance);
+ tsync_context->instance = NULL;
+
+ if (tsync_context->cpu_count && tsync_context->offsets) {
+ for (i = 0; i < tsync_context->cpu_count; i++) {
+ free(tsync_context->offsets[i].sync_ts);
+ free(tsync_context->offsets[i].sync_offsets);
+ free(tsync_context->offsets[i].sync_scalings);
+ free(tsync_context->offsets[i].sync_frac);
+ tsync_context->offsets[i].sync_ts = NULL;
+ tsync_context->offsets[i].sync_offsets = NULL;
+ tsync_context->offsets[i].sync_scalings = NULL;
+ tsync_context->offsets[i].sync_frac = NULL;
+ tsync_context->offsets[i].sync_count = 0;
+ tsync_context->offsets[i].sync_size = 0;
+ }
+ free(tsync_context->offsets);
+ tsync_context->offsets = NULL;
+ }
+ }
+
+ if (tsync->msg_handle)
+ tracecmd_msg_handle_close(tsync->msg_handle);
+
+ /* These are only created from the host */
+ if (tsync->guest_pid) {
+ pthread_mutex_destroy(&tsync->lock);
+ pthread_cond_destroy(&tsync->cond);
+ pthread_barrier_destroy(&tsync->first_sync);
+ }
+
+ free(tsync->clock_str);
+ free(tsync->proto_name);
+ free(tsync);
+}
+
+static cpu_set_t *pin_to_cpu(int cpu)
+{
+ static size_t size;
+ static int cpus;
+ cpu_set_t *mask = NULL;
+ cpu_set_t *old = NULL;
+
+ if (!cpus) {
+ cpus = tracecmd_count_cpus();
+ size = CPU_ALLOC_SIZE(cpus);
+ }
+ if (cpu >= cpus)
+ goto error;
+
+ mask = CPU_ALLOC(cpus);
+ if (!mask)
+ goto error;
+ old = CPU_ALLOC(cpus);
+ if (!old)
+ goto error;
+
+ CPU_ZERO_S(size, mask);
+ CPU_SET_S(cpu, size, mask);
+ if (pthread_getaffinity_np(pthread_self(), size, old))
+ goto error;
+ if (pthread_setaffinity_np(pthread_self(), size, mask))
+ goto error;
+
+ CPU_FREE(mask);
+ return old;
+
+error:
+ if (mask)
+ CPU_FREE(mask);
+ if (old)
+ CPU_FREE(old);
+ return NULL;
+}
+
+static void restore_pin_to_cpu(cpu_set_t *mask)
+{
+ static size_t size;
+
+ if (!size)
+ size = CPU_ALLOC_SIZE(tracecmd_count_cpus());
+
+ pthread_setaffinity_np(pthread_self(), size, mask);
+ CPU_FREE(mask);
+}
+
+static int tsync_send(struct tracecmd_time_sync *tsync,
+ struct tsync_proto *proto, unsigned int cpu)
+{
+ cpu_set_t *old_set = NULL;
+ long long timestamp = 0;
+ long long scaling = 0;
+ long long offset = 0;
+ long long frac = 0;
+ int ret;
+
+ old_set = pin_to_cpu(cpu);
+ ret = proto->clock_sync_calc(tsync, &offset, &scaling, &frac, &timestamp, cpu);
+ if (old_set)
+ restore_pin_to_cpu(old_set);
+
+ return ret;
+}
+
+static void tsync_with_host(struct tracecmd_time_sync *tsync)
+{
+ char protocol[TRACECMD_TSYNC_PNAME_LENGTH];
+ struct tsync_probe_request_msg probe;
+ struct tsync_proto *proto;
+ unsigned int command;
+ unsigned int size;
+ char *msg;
+ int ret;
+
+ clock_context_init(tsync, &proto, true);
+ if (!tsync->context)
+ return;
+
+ msg = (char *)&probe;
+ size = sizeof(probe);
+ while (true) {
+ memset(&probe, 0, size);
+ ret = tracecmd_msg_recv_time_sync(tsync->msg_handle,
+ protocol, &command,
+ &size, &msg);
+
+ if (ret || strncmp(protocol, TRACECMD_TSYNC_PROTO_NONE, TRACECMD_TSYNC_PNAME_LENGTH) ||
+ command != TRACECMD_TIME_SYNC_CMD_PROBE)
+ break;
+ ret = tsync_send(tsync, proto, probe.cpu);
+ if (ret)
+ break;
+ }
+}
+
+static int record_sync_sample(struct clock_sync_offsets *offsets, int array_step,
+ long long offset, long long scaling, long long frac, long long ts)
+{
+ long long *sync_scalings = NULL;
+ long long *sync_offsets = NULL;
+ long long *sync_frac = NULL;
+ long long *sync_ts = NULL;
+
+ if (offsets->sync_count >= offsets->sync_size) {
+ sync_ts = realloc(offsets->sync_ts,
+ (offsets->sync_size + array_step) * sizeof(long long));
+ sync_offsets = realloc(offsets->sync_offsets,
+ (offsets->sync_size + array_step) * sizeof(long long));
+ sync_scalings = realloc(offsets->sync_scalings,
+ (offsets->sync_size + array_step) * sizeof(long long));
+ sync_frac = realloc(offsets->sync_frac,
+ (offsets->sync_size + array_step) * sizeof(long long));
+
+ if (!sync_ts || !sync_offsets || !sync_scalings || !sync_frac) {
+ free(sync_ts);
+ free(sync_offsets);
+ free(sync_scalings);
+ free(sync_frac);
+ return -1;
+ }
+ offsets->sync_size += array_step;
+ offsets->sync_ts = sync_ts;
+ offsets->sync_offsets = sync_offsets;
+ offsets->sync_scalings = sync_scalings;
+ offsets->sync_frac = sync_frac;
+ }
+
+ offsets->sync_ts[offsets->sync_count] = ts;
+ offsets->sync_offsets[offsets->sync_count] = offset;
+ offsets->sync_scalings[offsets->sync_count] = scaling;
+ offsets->sync_frac[offsets->sync_count] = frac;
+ offsets->sync_count++;
+
+ return 0;
+}
+
+static int tsync_get_sample(struct tracecmd_time_sync *tsync, unsigned int cpu,
+ struct tsync_proto *proto, int array_step)
+{
+ struct clock_sync_context *clock;
+ long long timestamp = 0;
+ long long scaling = 0;
+ long long offset = 0;
+ long long frac = 0;
+ int ret;
+
+ ret = proto->clock_sync_calc(tsync, &offset, &scaling, &frac, &timestamp, cpu);
+ if (ret) {
+ tracecmd_warning("Failed to synchronize timestamps with guest");
+ return -1;
+ }
+ if (!offset || !timestamp || !scaling)
+ return 0;
+ clock = tsync->context;
+ if (!clock || cpu >= clock->cpu_count || !clock->offsets)
+ return -1;
+ return record_sync_sample(&clock->offsets[cpu], array_step,
+ offset, scaling, frac, timestamp);
+}
+
+#define TIMER_SEC_NANO 1000000000LL
+static inline void get_ts_loop_delay(struct timespec *timeout, int delay_ms)
+{
+ memset(timeout, 0, sizeof(struct timespec));
+ clock_gettime(CLOCK_REALTIME, timeout);
+
+ timeout->tv_nsec += ((unsigned long long)delay_ms * 1000000LL);
+
+ if (timeout->tv_nsec >= TIMER_SEC_NANO) {
+ timeout->tv_sec += timeout->tv_nsec / TIMER_SEC_NANO;
+ timeout->tv_nsec %= TIMER_SEC_NANO;
+ }
+}
+
+#define CLOCK_TS_ARRAY 5
+static int tsync_with_guest(struct tracecmd_time_sync *tsync)
+{
+ struct tsync_probe_request_msg probe;
+ int ts_array_size = CLOCK_TS_ARRAY;
+ struct tsync_proto *proto;
+ struct timespec timeout;
+ bool first = true;
+ bool end = false;
+ int ret;
+ int i;
+
+ clock_context_init(tsync, &proto, false);
+ if (!tsync->context) {
+ pthread_barrier_wait(&tsync->first_sync);
+ return -1;
+ }
+
+ if (tsync->loop_interval > 0 &&
+ tsync->loop_interval < (CLOCK_TS_ARRAY * 1000))
+ ts_array_size = (CLOCK_TS_ARRAY * 1000) / tsync->loop_interval;
+
+ while (true) {
+ pthread_mutex_lock(&tsync->lock);
+ for (i = 0; i < tsync->vcpu_count; i++) {
+ probe.cpu = i;
+ ret = tracecmd_msg_send_time_sync(tsync->msg_handle,
+ TRACECMD_TSYNC_PROTO_NONE,
+ TRACECMD_TIME_SYNC_CMD_PROBE,
+ sizeof(probe), (char *)&probe);
+ ret = tsync_get_sample(tsync, i, proto, ts_array_size);
+ if (ret)
+ break;
+ }
+ if (first) {
+ first = false;
+ pthread_barrier_wait(&tsync->first_sync);
+ }
+ if (end || i < tsync->vcpu_count) {
+ pthread_mutex_unlock(&tsync->lock);
+ break;
+ }
+ if (tsync->loop_interval > 0) {
+ get_ts_loop_delay(&timeout, tsync->loop_interval);
+ ret = pthread_cond_timedwait(&tsync->cond, &tsync->lock, &timeout);
+ pthread_mutex_unlock(&tsync->lock);
+ if (ret && ret != ETIMEDOUT)
+ break;
+ else if (!ret)
+ end = true;
+ } else {
+ pthread_cond_wait(&tsync->cond, &tsync->lock);
+ end = true;
+ pthread_mutex_unlock(&tsync->lock);
+ }
+ };
+
+ tracecmd_msg_send_time_sync(tsync->msg_handle,
+ TRACECMD_TSYNC_PROTO_NONE,
+ TRACECMD_TIME_SYNC_CMD_STOP,
+ 0, NULL);
+ return 0;
+}
+
+static void *tsync_host_thread(void *data)
+{
+ struct tracecmd_time_sync *tsync = data;
+
+ tsync_with_guest(tsync);
+ pthread_exit(0);
+}
+
+/**
+ * tracecmd_tsync_with_guest - Synchronize timestamps with guest
+ *
+ * @trace_id: Local ID for the current trace session
+ * @fd: file descriptor of guest
+ * @guest_pid: PID of the host OS process, running the guest
+ * @guest_cpus: Number of the guest VCPUs
+ * @proto_name: Name of the negotiated time synchronization protocol
+ * @clock: Trace clock, used for that session
+ *
+ * On success, a pointer to time sync context is returned, or NULL in
+ * case of an error. The context must be freed with tracecmd_tsync_free()
+ *
+ * This API spawns a pthread, which performs time stamps synchronization
+ * until tracecmd_tsync_with_guest_stop() is called.
+ */
+struct tracecmd_time_sync *
+tracecmd_tsync_with_guest(unsigned long long trace_id, int loop_interval,
+ unsigned int fd, int guest_pid,
+ int guest_cpus, const char *proto_name, const char *clock)
+{
+ struct tracecmd_time_sync *tsync;
+ cpu_set_t *pin_mask = NULL;
+ pthread_attr_t attrib;
+ size_t mask_size = 0;
+ int ret;
+
+ if (!proto_name)
+ return NULL;
+
+ tsync = calloc(1, sizeof(*tsync));
+ if (!tsync)
+ return NULL;
+
+ tsync->trace_id = trace_id;
+ tsync->loop_interval = loop_interval;
+ tsync->proto_name = strdup(proto_name);
+
+ tsync->msg_handle = tracecmd_msg_handle_alloc(fd, 0);
+ if (!tsync->msg_handle) {
+ ret = -1;
+ goto error;
+ }
+ tsync->guest_pid = guest_pid;
+ tsync->vcpu_count = guest_cpus;
+
+ if (clock)
+ tsync->clock_str = strdup(clock);
+ pthread_mutex_init(&tsync->lock, NULL);
+ pthread_cond_init(&tsync->cond, NULL);
+ pthread_barrier_init(&tsync->first_sync, NULL, 2);
+ pthread_attr_init(&attrib);
+ pthread_attr_setdetachstate(&attrib, PTHREAD_CREATE_JOINABLE);
+
+ ret = pthread_create(&tsync->thread, &attrib, tsync_host_thread, tsync);
+ if (ret)
+ goto error;
+ tsync->thread_running = true;
+
+ if (!get_first_cpu(&pin_mask, &mask_size))
+ pthread_setaffinity_np(tsync->thread, mask_size, pin_mask);
+ pthread_barrier_wait(&tsync->first_sync);
+
+ if (pin_mask)
+ CPU_FREE(pin_mask);
+ pthread_attr_destroy(&attrib);
+
+ return tsync;
+
+error:
+ if (tsync->msg_handle)
+ tracecmd_msg_handle_close(tsync->msg_handle);
+ else if (fd >= 0)
+ close(fd);
+ free(tsync);
+
+ return NULL;
+}
+
+/**
+ * tracecmd_write_guest_time_shift - Write collected timestamp corrections in a file
+ *
+ * @handle: Handle to a trace file, where timestamp corrections will be saved
+ * @tsync: Time sync context with collected timestamp corrections
+ *
+ * Returns 0 on success, or -1 in case of an error.
+ *
+ * This API writes collected timestamp corrections in the metadata of the
+ * trace file, as TRACECMD_OPTION_TIME_SHIFT option.
+ */
+int tracecmd_write_guest_time_shift(struct tracecmd_output *handle,
+ struct tracecmd_time_sync *tsync)
+{
+ struct iovec *vector = NULL;
+ unsigned int flags;
+ long long *scalings = NULL;
+ long long *offsets = NULL;
+ long long *frac = NULL;
+ long long *ts = NULL;
+ int vcount;
+ int count;
+ int i, j;
+ int ret = -1;
+
+ if (!tsync->vcpu_count)
+ return -1;
+ vcount = 3 + (5 * tsync->vcpu_count);
+ vector = calloc(vcount, sizeof(struct iovec));
+ if (!vector)
+ return -1;
+ ret = tsync_get_proto_flags(tsync, &flags);
+ if (ret < 0)
+ goto out;
+
+ j = 0;
+ vector[j].iov_len = 8;
+ vector[j++].iov_base = &tsync->trace_id;
+ vector[j].iov_len = 4;
+ vector[j++].iov_base = &flags;
+ vector[j].iov_len = 4;
+ vector[j++].iov_base = &tsync->vcpu_count;
+ for (i = 0; i < tsync->vcpu_count; i++) {
+ if (j >= vcount)
+ break;
+ ret = tracecmd_tsync_get_offsets(tsync, i, &count,
+ &ts, &offsets, &scalings, NULL);
+ if (ret < 0 || !count || !ts || !offsets || !scalings)
+ break;
+ vector[j].iov_len = 4;
+ vector[j++].iov_base = &count;
+ vector[j].iov_len = 8 * count;
+ vector[j++].iov_base = ts;
+ vector[j].iov_len = 8 * count;
+ vector[j++].iov_base = offsets;
+ vector[j].iov_len = 8 * count;
+ vector[j++].iov_base = scalings;
+ }
+ if (i < tsync->vcpu_count) {
+ ret = -1;
+ goto out;
+ }
+ /*
+ * Writing fraction bits into the option is implemented in a separate loop for
+ * backward compatibility. In the trace-cmd 2.9 release, this option has only offset
+ * and scaling. That legacy code must work with the new extended option.
+ *
+ */
+ for (i = 0; i < tsync->vcpu_count; i++) {
+ if (j >= vcount)
+ break;
+ ret = tracecmd_tsync_get_offsets(tsync, i, NULL,
+ NULL, NULL, NULL, &frac);
+ if (ret < 0)
+ break;
+ vector[j].iov_len = 8 * count;
+ vector[j++].iov_base = frac;
+ }
+ if (i < tsync->vcpu_count) {
+ ret = -1;
+ goto out;
+ }
+
+ tracecmd_add_option_v(handle, TRACECMD_OPTION_TIME_SHIFT, vector, vcount);
+#ifdef TSYNC_DEBUG
+ if (count > 1)
+ printf("Got %d timestamp synch samples in %lld ns trace\n\r",
+ count, ts[count - 1] - ts[0]);
+#endif
+ ret = 0;
+out:
+ free(vector);
+ return ret;
+}
+
+/**
+ * tracecmd_tsync_with_guest_stop - Stop the time sync session with a guest
+ *
+ * @tsync: Time sync context, representing a running time sync session
+ *
+ * Returns 0 on success, or -1 in case of an error.
+ *
+ */
+int tracecmd_tsync_with_guest_stop(struct tracecmd_time_sync *tsync)
+{
+ if (!tsync || !tsync->thread_running)
+ return -1;
+
+ /* Signal the time synchronization thread to complete and wait for it */
+ pthread_mutex_lock(&tsync->lock);
+ pthread_cond_signal(&tsync->cond);
+ pthread_mutex_unlock(&tsync->lock);
+ pthread_join(tsync->thread, NULL);
+ return 0;
+}
+
+static void *tsync_agent_thread(void *data)
+{
+ struct tracecmd_time_sync *tsync = data;
+ long ret = 0;
+ int sd;
+
+ while (true) {
+ tracecmd_debug("Listening on fd:%d\n", tsync->msg_handle->fd);
+ sd = accept(tsync->msg_handle->fd, NULL, NULL);
+ tracecmd_debug("Accepted fd:%d\n", sd);
+ if (sd < 0) {
+ if (errno == EINTR)
+ continue;
+ ret = -1;
+ goto out;
+ }
+ break;
+ }
+ close(tsync->msg_handle->fd);
+ tsync->msg_handle->fd = sd;
+
+ tsync_with_host(tsync);
+
+out:
+ pthread_exit((void *)ret);
+}
+
+/**
+ * tracecmd_tsync_with_host - Synchronize timestamps with host
+ * @fd: File descriptor connecting with the host
+ * @tsync_protos: List of tsync protocols, supported by the host
+ * @clock: Trace clock, used for that session
+ * @port: returned, VSOCKET port, on which the guest listens for tsync requests
+ * @remote_id: Identifier to uniquely identify the remote host
+ * @local_id: Identifier to uniquely identify the local machine
+ *
+ * On success, a pointer to time sync context is returned, or NULL in
+ * case of an error. The context must be freed with tracecmd_tsync_free()
+ *
+ * This API spawns a pthread, which performs time stamps synchronization
+ * until tracecmd_tsync_with_host_stop() is called.
+ */
+struct tracecmd_time_sync *
+tracecmd_tsync_with_host(int fd,
+ const struct tracecmd_tsync_protos *tsync_protos,
+ const char *clock, int remote_id, int local_id)
+{
+ struct tracecmd_time_sync *tsync;
+ cpu_set_t *pin_mask = NULL;
+ pthread_attr_t attrib;
+ size_t mask_size = 0;
+ const char *proto;
+ int ret;
+
+ tsync = calloc(1, sizeof(struct tracecmd_time_sync));
+ if (!tsync)
+ return NULL;
+
+ proto = tsync_proto_select(tsync_protos, clock,
+ TRACECMD_TIME_SYNC_ROLE_GUEST);
+ if (!proto)
+ goto error;
+ tsync->proto_name = strdup(proto);
+ tsync->msg_handle = tracecmd_msg_handle_alloc(fd, 0);
+ if (clock)
+ tsync->clock_str = strdup(clock);
+
+ tsync->remote_id = remote_id;
+ tsync->local_id = local_id;
+
+ pthread_attr_init(&attrib);
+ tsync->vcpu_count = tracecmd_count_cpus();
+ pthread_attr_setdetachstate(&attrib, PTHREAD_CREATE_JOINABLE);
+
+ ret = pthread_create(&tsync->thread, &attrib, tsync_agent_thread, tsync);
+ if (ret) {
+ pthread_attr_destroy(&attrib);
+ goto error;
+ }
+ tsync->thread_running = true;
+ if (!get_first_cpu(&pin_mask, &mask_size))
+ pthread_setaffinity_np(tsync->thread, mask_size, pin_mask);
+
+ if (pin_mask)
+ CPU_FREE(pin_mask);
+ pthread_attr_destroy(&attrib);
+ return tsync;
+
+error:
+ if (tsync) {
+ if (tsync->msg_handle) {
+ /* Do not close the fd that was passed it */
+ tsync->msg_handle->fd = -1;
+ tracecmd_msg_handle_close(tsync->msg_handle);
+ }
+ free(tsync->clock_str);
+ free(tsync);
+ }
+
+ return NULL;
+
+}
+
+/**
+ * tracecmd_tsync_with_host_stop - Stop the time sync session with a host
+ *
+ * @tsync: Time sync context, representing a running time sync session
+ *
+ * Returns 0 on success, or error number in case of an error.
+ *
+ */
+int tracecmd_tsync_with_host_stop(struct tracecmd_time_sync *tsync)
+{
+ return pthread_join(tsync->thread, NULL);
+}
+
+/**
+ * tracecmd_tsync_get_selected_proto - Return the seleceted time sync protocol
+ * @tsync: Time sync context, representing a running time sync session
+ * @selected_proto: return, name of the selected time sync protocol for this session
+ *
+ * Returns 0 on success, or -1 in case of an error.
+ *
+ */
+int tracecmd_tsync_get_selected_proto(struct tracecmd_time_sync *tsync,
+ char **selected_proto)
+{
+ if (!tsync)
+ return -1;
+
+ if (selected_proto) {
+ if (!tsync->proto_name)
+ return -1;
+ (*selected_proto) = strdup(tsync->proto_name);
+ }
+ return 0;
+}
diff --git a/lib/trace-cmd/trace-util.c b/lib/trace-cmd/trace-util.c
new file mode 100644
index 00000000..9564c81a
--- /dev/null
+++ b/lib/trace-cmd/trace-util.c
@@ -0,0 +1,692 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <dirent.h>
+#include <ctype.h>
+#include <errno.h>
+#include <dlfcn.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <limits.h>
+#include <libgen.h>
+#include <sys/mount.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/sysinfo.h>
+#include <time.h>
+#include <event-parse.h>
+#include <event-utils.h>
+
+#include "trace-cmd-private.h"
+#include "trace-cmd-local.h"
+
+#define LOCAL_PLUGIN_DIR ".trace-cmd/plugins"
+#define PROC_STACK_FILE "/proc/sys/kernel/stack_tracer_enabled"
+
+static bool debug;
+static int log_level = TEP_LOG_INFO;
+static FILE *logfp;
+
+const static struct {
+ const char *clock_str;
+ enum tracecmd_clocks clock_id;
+} trace_clocks[] = {
+ {"local", TRACECMD_CLOCK_LOCAL},
+ {"global", TRACECMD_CLOCK_GLOBAL},
+ {"counter", TRACECMD_CLOCK_COUNTER},
+ {"uptime", TRACECMD_CLOCK_UPTIME},
+ {"perf", TRACECMD_CLOCK_PERF},
+ {"mono", TRACECMD_CLOCK_MONO},
+ {"mono_raw", TRACECMD_CLOCK_MONO_RAW},
+ {"boot", TRACECMD_CLOCK_BOOT},
+ {"x86-tsc", TRACECMD_CLOCK_X86_TSC},
+ {NULL, -1}
+};
+
+/**
+ * tracecmd_clock_str2id - Convert ftrace clock name to clock ID
+ * @clock: Ftrace clock name
+ * Returns ID of the ftrace clock
+ */
+enum tracecmd_clocks tracecmd_clock_str2id(const char *clock)
+{
+ int i;
+
+ if (!clock)
+ return TRACECMD_CLOCK_UNKNOWN;
+
+ for (i = 0; trace_clocks[i].clock_str; i++) {
+ if (!strncmp(clock, trace_clocks[i].clock_str,
+ strlen(trace_clocks[i].clock_str)))
+ return trace_clocks[i].clock_id;
+ }
+ return TRACECMD_CLOCK_UNKNOWN;
+}
+
+/**
+ * tracecmd_clock_id2str - Convert clock ID to ftare clock name
+ * @clock: Clock ID
+ * Returns name of a ftrace clock
+ */
+const char *tracecmd_clock_id2str(enum tracecmd_clocks clock)
+{
+ int i;
+
+ for (i = 0; trace_clocks[i].clock_str; i++) {
+ if (trace_clocks[i].clock_id == clock)
+ return trace_clocks[i].clock_str;
+ }
+ return NULL;
+}
+
+/**
+ * tracecmd_set_debug - Set debug mode of the tracecmd library
+ * @set_debug: The new "debug" mode. If true, the tracecmd library is
+ * in "debug" mode
+ */
+void tracecmd_set_debug(bool set_debug)
+{
+ debug = set_debug;
+
+ if (set_debug)
+ tracecmd_set_loglevel(TEP_LOG_DEBUG);
+ else
+ tracecmd_set_loglevel(TEP_LOG_CRITICAL);
+}
+
+/**
+ * tracecmd_get_debug - Get debug mode of tracecmd library
+ * Returns true, if the tracecmd library is in debug mode.
+ *
+ */
+bool tracecmd_get_debug(void)
+{
+ return debug;
+}
+
+void tracecmd_parse_cmdlines(struct tep_handle *pevent,
+ char *file, int size __maybe_unused)
+{
+ char *comm;
+ char *line;
+ char *next = NULL;
+ int pid;
+
+ line = strtok_r(file, "\n", &next);
+ while (line) {
+ sscanf(line, "%d %m[^\n]s", &pid, &comm);
+ tep_register_comm(pevent, comm, pid);
+ free(comm);
+ line = strtok_r(NULL, "\n", &next);
+ }
+}
+
+void tracecmd_parse_proc_kallsyms(struct tep_handle *pevent,
+ char *file, unsigned int size __maybe_unused)
+{
+ unsigned long long addr;
+ int sav_errno;
+ char *func;
+ char *line;
+ char *next = NULL;
+ char *mod;
+ char ch;
+
+ line = strtok_r(file, "\n", &next);
+ while (line) {
+ int func_start, func_end = 0;
+ int mod_start, mod_end = 0;
+ int n;
+
+ mod = NULL;
+ sav_errno = errno;
+ errno = 0;
+ n = sscanf(line, "%16llx %c %n%*s%n%*1[\t][%n%*s%n",
+ &addr, &ch, &func_start, &func_end, &mod_start, &mod_end);
+ if (errno)
+ return;
+ errno = sav_errno;
+
+ if (n != 2 || !func_end)
+ return;
+
+ func = line + func_start;
+ /*
+ * Hacks for
+ * - arm arch that adds a lot of bogus '$a' functions
+ * - x86-64 that reports per-cpu variable offsets as absolute
+ */
+ if (func[0] != '$' && ch != 'A' && ch != 'a') {
+ line[func_end] = 0;
+ if (mod_end) {
+ mod = line + mod_start;
+ /* truncate the extra ']' */
+ line[mod_end - 1] = 0;
+ }
+ tep_register_function(pevent, func, addr, mod);
+ }
+
+ line = strtok_r(NULL, "\n", &next);
+ }
+}
+
+void tracecmd_parse_ftrace_printk(struct tep_handle *pevent,
+ char *file, unsigned int size __maybe_unused)
+{
+ unsigned long long addr;
+ char *printk;
+ char *line;
+ char *next = NULL;
+ char *addr_str;
+ char *fmt;
+
+ line = strtok_r(file, "\n", &next);
+ while (line) {
+ addr_str = strtok_r(line, ":", &fmt);
+ if (!addr_str) {
+ tracecmd_warning("printk format with empty entry");
+ break;
+ }
+ addr = strtoull(addr_str, NULL, 16);
+ /* fmt still has a space, skip it */
+ printk = strdup(fmt+1);
+ line = strtok_r(NULL, "\n", &next);
+ tep_register_print_string(pevent, printk, addr);
+ free(printk);
+ }
+}
+
+/**
+ * tracecmd_add_id - add an int to the event id list
+ * @list: list to add the id to
+ * @id: id to add
+ * @len: current length of list of ids.
+ *
+ * The typical usage is:
+ *
+ * events = tracecmd_add_id(events, id, len++);
+ *
+ * Returns the new allocated list with the id included.
+ * the list will contain a '-1' at the end.
+ *
+ * The returned list should be freed with free().
+ */
+int *tracecmd_add_id(int *list, int id, int len)
+{
+ if (!list)
+ list = malloc(sizeof(*list) * 2);
+ else
+ list = realloc(list, sizeof(*list) * (len + 2));
+ if (!list)
+ return NULL;
+
+ list[len++] = id;
+ list[len] = -1;
+
+ return list;
+}
+
+struct add_plugin_data {
+ int ret;
+ int index;
+ char **files;
+};
+
+static void add_plugin_file(struct tep_handle *pevent, const char *path,
+ const char *name, void *data)
+{
+ struct add_plugin_data *pdata = data;
+ char **ptr;
+ int size;
+ int i;
+
+ if (pdata->ret)
+ return;
+
+ size = pdata->index + 2;
+ ptr = realloc(pdata->files, sizeof(char *) * size);
+ if (!ptr)
+ goto out_free;
+
+ ptr[pdata->index] = strdup(name);
+ if (!ptr[pdata->index])
+ goto out_free;
+
+ pdata->files = ptr;
+ pdata->index++;
+ pdata->files[pdata->index] = NULL;
+ return;
+
+ out_free:
+ for (i = 0; i < pdata->index; i++)
+ free(pdata->files[i]);
+ free(pdata->files);
+ pdata->files = NULL;
+ pdata->ret = errno;
+}
+
+/**
+ * trace_util_find_plugin_files - find list of possible plugin files
+ * @suffix: The suffix of the plugin files to find
+ *
+ * Searches the plugin directory for files that end in @suffix, and
+ * will return an allocated array of file names, or NULL if none is
+ * found.
+ *
+ * Must check against TRACECMD_ISERR(ret) as if an error happens
+ * the errno will be returned with the TRACECMD_ERR_MSK to denote
+ * such an error occurred.
+ *
+ * Use trace_util_free_plugin_files() to free the result.
+ */
+__hidden char **trace_util_find_plugin_files(const char *suffix)
+{
+ struct add_plugin_data pdata;
+
+ memset(&pdata, 0, sizeof(pdata));
+
+ tep_load_plugins_hook(NULL, suffix, add_plugin_file, &pdata);
+
+ if (pdata.ret)
+ return TRACECMD_ERROR(pdata.ret);
+
+ return pdata.files;
+}
+
+/**
+ * trace_util_free_plugin_files - free the result of trace_util_find_plugin_files()
+ * @files: The result from trace_util_find_plugin_files()
+ *
+ * Frees the contents that were allocated by trace_util_find_plugin_files().
+ */
+void __hidden trace_util_free_plugin_files(char **files)
+{
+ int i;
+
+ if (!files || TRACECMD_ISERR(files))
+ return;
+
+ for (i = 0; files[i]; i++) {
+ free(files[i]);
+ }
+ free(files);
+}
+
+static char *get_source_plugins_dir(void)
+{
+ char *p, path[PATH_MAX+1];
+ int ret;
+
+ ret = readlink("/proc/self/exe", path, PATH_MAX);
+ if (ret > PATH_MAX || ret < 0)
+ return NULL;
+
+ path[ret] = 0;
+ dirname(path);
+ p = strrchr(path, '/');
+ if (!p)
+ return NULL;
+ /* Check if we are in the the source tree */
+ if (strcmp(p, "/tracecmd") != 0)
+ return NULL;
+
+ strcpy(p, "/lib/traceevent/plugins");
+ return strdup(path);
+}
+
+__hidden struct tep_plugin_list *
+trace_load_plugins(struct tep_handle *tep, int flags)
+{
+ struct tep_plugin_list *list;
+ char *path;
+
+ if (flags & TRACECMD_FL_LOAD_NO_PLUGINS)
+ tep_set_flag(tep, TEP_DISABLE_PLUGINS);
+ if (flags & TRACECMD_FL_LOAD_NO_SYSTEM_PLUGINS)
+ tep_set_flag(tep, TEP_DISABLE_SYS_PLUGINS);
+
+ path = get_source_plugins_dir();
+ if (path)
+ tep_add_plugin_path(tep, path, TEP_PLUGIN_LAST);
+ free(path);
+
+ list = tep_load_plugins(tep);
+
+ return list;
+}
+
+/**
+ * tracecmd_set_loglevel - set log level of the library
+ * @level: desired level of the library messages
+ */
+void tracecmd_set_loglevel(enum tep_loglevel level)
+{
+ log_level = level;
+ tracefs_set_loglevel(level);
+ tep_set_loglevel(level);
+}
+
+void __weak tracecmd_warning(const char *fmt, ...)
+{
+ va_list ap;
+
+ if (log_level < TEP_LOG_WARNING)
+ return;
+
+ va_start(ap, fmt);
+ tep_vprint("libtracecmd", TEP_LOG_WARNING, true, fmt, ap);
+ va_end(ap);
+}
+
+void __weak tracecmd_info(const char *fmt, ...)
+{
+ va_list ap;
+
+ if (log_level < TEP_LOG_INFO)
+ return;
+
+ va_start(ap, fmt);
+ tep_vprint("libtracecmd", TEP_LOG_INFO, false, fmt, ap);
+ va_end(ap);
+}
+
+void __weak tracecmd_critical(const char *fmt, ...)
+{
+ int ret;
+ va_list ap;
+
+ if (log_level < TEP_LOG_CRITICAL)
+ return;
+
+ va_start(ap, fmt);
+ ret = tep_vprint("libtracecmd", TEP_LOG_CRITICAL, true, fmt, ap);
+ va_end(ap);
+
+ if (debug) {
+ if (!ret)
+ ret = -1;
+ exit(ret);
+ }
+}
+
+void __weak tracecmd_debug(const char *fmt, ...)
+{
+ va_list ap;
+
+ if (!tracecmd_get_debug())
+ return;
+
+ va_start(ap, fmt);
+ vprintf(fmt, ap);
+ va_end(ap);
+}
+
+#define LOG_BUF_SIZE 1024
+static void __plog(const char *prefix, const char *fmt, va_list ap, FILE *fp)
+{
+ static int newline = 1;
+ char buf[LOG_BUF_SIZE];
+ int r;
+
+ r = vsnprintf(buf, LOG_BUF_SIZE, fmt, ap);
+
+ if (r > LOG_BUF_SIZE)
+ r = LOG_BUF_SIZE;
+
+ if (logfp) {
+ if (newline)
+ fprintf(logfp, "[%d]%s%.*s", getpid(), prefix, r, buf);
+ else
+ fprintf(logfp, "[%d]%s%.*s", getpid(), prefix, r, buf);
+ newline = buf[r - 1] == '\n';
+ fflush(logfp);
+ return;
+ }
+
+ fprintf(fp, "%.*s", r, buf);
+}
+
+void tracecmd_plog(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ __plog("", fmt, ap, stdout);
+ va_end(ap);
+ /* Make sure it gets to the screen, in case we crash afterward */
+ fflush(stdout);
+}
+
+void tracecmd_plog_error(const char *fmt, ...)
+{
+ va_list ap;
+ char *str = "";
+
+ va_start(ap, fmt);
+ __plog("Error: ", fmt, ap, stderr);
+ va_end(ap);
+ if (errno)
+ str = strerror(errno);
+ if (logfp)
+ fprintf(logfp, "\n%s\n", str);
+ else
+ fprintf(stderr, "\n%s\n", str);
+}
+
+/**
+ * tracecmd_set_logfile - Set file for logging
+ * @logfile: Name of the log file
+ *
+ * Returns 0 on successful completion or -1 in case of error
+ */
+int tracecmd_set_logfile(char *logfile)
+{
+ if (logfp)
+ fclose(logfp);
+ logfp = fopen(logfile, "w");
+ if (!logfp)
+ return -1;
+ return 0;
+}
+
+/**
+ * tracecmd_stack_tracer_status - Check stack trace status
+ * @status: Returned stack trace status:
+ * 0 - not configured, disabled
+ * non 0 - enabled
+ *
+ * Returns -1 in case of an error, 0 if file does not exist
+ * (stack tracer not configured in kernel) or 1 on successful completion.
+ */
+int tracecmd_stack_tracer_status(int *status)
+{
+ struct stat stat_buf;
+ char buf[64];
+ long num;
+ int fd;
+ int n;
+
+ if (stat(PROC_STACK_FILE, &stat_buf) < 0) {
+ /* stack tracer not configured on running kernel */
+ *status = 0; /* not configured means disabled */
+ return 0;
+ }
+
+ fd = open(PROC_STACK_FILE, O_RDONLY);
+
+ if (fd < 0)
+ return -1;
+
+ n = read(fd, buf, sizeof(buf));
+ close(fd);
+
+ if (n <= 0)
+ return -1;
+
+ if (n >= sizeof(buf))
+ return -1;
+
+ buf[n] = 0;
+
+ num = strtol(buf, NULL, 10);
+
+ /* Check for various possible errors */
+ if (num > INT_MAX || num < INT_MIN || (!num && errno))
+ return -1;
+
+ *status = num;
+ return 1; /* full success */
+}
+
+/**
+ * tracecmd_count_cpus - Get the number of CPUs in the system
+ *
+ * Returns the number of CPUs in the system, or 0 in case of an error
+ */
+int tracecmd_count_cpus(void)
+{
+ static int once;
+ char buf[1024];
+ int cpus = 0;
+ char *pbuf;
+ size_t *pn;
+ FILE *fp;
+ size_t n;
+ int r;
+
+ cpus = sysconf(_SC_NPROCESSORS_CONF);
+ if (cpus > 0)
+ return cpus;
+
+ if (!once) {
+ once++;
+ tracecmd_warning("sysconf could not determine number of CPUS");
+ }
+
+ /* Do the hack to figure out # of CPUS */
+ n = 1024;
+ pn = &n;
+ pbuf = buf;
+
+ fp = fopen("/proc/cpuinfo", "r");
+ if (!fp) {
+ tracecmd_critical("Can not read cpuinfo");
+ return 0;
+ }
+
+ while ((r = getline(&pbuf, pn, fp)) >= 0) {
+ char *p;
+
+ if (strncmp(buf, "processor", 9) != 0)
+ continue;
+ for (p = buf+9; isspace(*p); p++)
+ ;
+ if (*p == ':')
+ cpus++;
+ }
+ fclose(fp);
+
+ return cpus;
+}
+
+#define FNV_64_PRIME 0x100000001b3ULL
+/*
+ * tracecmd_generate_traceid - Generate a unique ID, used to identify
+ * the current tracing session
+ *
+ * Returns unique ID
+ */
+unsigned long long tracecmd_generate_traceid(void)
+{
+ unsigned long long hash = 0;
+ unsigned char *ustr;
+ struct sysinfo sinfo;
+ struct timespec ts;
+ char *str = NULL;
+
+ clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
+ sysinfo(&sinfo);
+ asprintf(&str, "%ld %ld %ld %ld %ld %ld %ld %ld %d",
+ ts.tv_sec, ts.tv_nsec,
+ sinfo.loads[0], sinfo.loads[1], sinfo.loads[2],
+ sinfo.freeram, sinfo.sharedram, sinfo.freeswap,
+ sinfo.procs);
+ if (!str)
+ return 0;
+ ustr = (unsigned char *)str;
+ hash = 0;
+ while (*ustr) {
+ hash ^= (unsigned long long)*ustr++;
+ hash *= FNV_64_PRIME;
+ }
+
+ free(str);
+ return hash;
+}
+
+/*
+ * tracecmd_default_file_version - Get default trace file version of the library
+ *
+ * Returns the default trace file version
+ */
+int tracecmd_default_file_version(void)
+{
+ return FILE_VERSION_DEFAULT;
+}
+
+bool tracecmd_is_version_supported(unsigned int version)
+{
+ if (version <= FILE_VERSION_MAX)
+ return true;
+ return false;
+}
+
+static void __attribute__ ((constructor)) tracecmd_lib_init(void)
+{
+ tracecmd_compress_init();
+}
+
+static void __attribute__((destructor)) tracecmd_lib_free(void)
+{
+ tracecmd_compress_free();
+}
+
+__hidden bool check_file_state(unsigned long file_version, int current_state, int new_state)
+{
+ if (file_version >= FILE_VERSION_SECTIONS) {
+ if (current_state < TRACECMD_FILE_INIT)
+ return false;
+
+ return true;
+ }
+
+ switch (new_state) {
+ case TRACECMD_FILE_HEADERS:
+ case TRACECMD_FILE_FTRACE_EVENTS:
+ case TRACECMD_FILE_ALL_EVENTS:
+ case TRACECMD_FILE_KALLSYMS:
+ case TRACECMD_FILE_PRINTK:
+ case TRACECMD_FILE_CMD_LINES:
+ case TRACECMD_FILE_CPU_COUNT:
+ if (current_state == (new_state - 1))
+ return true;
+ break;
+ case TRACECMD_FILE_OPTIONS:
+ if (file_version < FILE_VERSION_SECTIONS && current_state == TRACECMD_FILE_CPU_COUNT)
+ return true;
+ break;
+ case TRACECMD_FILE_CPU_LATENCY:
+ case TRACECMD_FILE_CPU_FLYRECORD:
+ if (current_state == TRACECMD_FILE_OPTIONS)
+ return true;
+ break;
+ }
+
+ return false;
+}
diff --git a/libtracecmd.pc.template b/libtracecmd.pc.template
new file mode 100644
index 00000000..bcf4e39d
--- /dev/null
+++ b/libtracecmd.pc.template
@@ -0,0 +1,11 @@
+prefix=INSTALL_PREFIX
+libdir=LIB_DIR
+includedir=HEADER_DIR
+
+Name: libtracecmd
+URL: https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/
+Description: Library for creating and reading trace-cmd data files
+Version: LIB_VERSION
+Requires: libtracefs >= LIBTRACEFS_MIN_VERSION
+Cflags: -I${includedir}
+Libs: -L${libdir} -ltracecmd
diff --git a/make-trace-cmd.sh b/make-trace-cmd.sh
new file mode 100755
index 00000000..31f32594
--- /dev/null
+++ b/make-trace-cmd.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+if [ -z "$INSTALL_PATH" ]; then
+ echo
+ echo 'Error: No $INSTALL_PATH defined'
+ echo
+ echo " usage: [PREFIX=prefix][BUILD_PATH=/path/to/build] INSTALL_PATH=/path/to/install make-trace-cmd.sh install|install_libs|clean|uninstall"
+ echo
+ echo " Used to create a self contained directory to copy to other machines."
+ echo
+ echo " Please read PACKAGING for more information."
+ echo
+ exit
+fi
+
+if [ ! -d $INSTALL_PATH ]; then
+ mkdir $INSTALL_PATH
+fi
+
+if [ ! -z "$BUILD_PATH" ]; then
+ if [ ! -d $BUILD_PATH ]; then
+ mkdir $BUILD_PATH
+ fi
+ O_PATH="O=$BUILD_PATH"
+fi
+
+if [ -z "$PREFIX" ]; then
+ PREFIX="/usr"
+fi
+
+PKG_PATH=`pkg-config --variable pc_path pkg-config | tr ":" " " | cut -d' ' -f1`
+
+WITH_PATH=""
+# If pkg-config supports --with-path, use that as well
+if pkg-config --with-path=/tmp --variable pc_path pkg-config &> /dev/null ; then
+ WITH_PATH="--with-path=$INSTALL_PATH$PKG_PATH"
+fi
+
+PKG_CONFIG_PATH="$INSTALL_PATH/$PKG_PATH" PKG_CONFIG="pkg-config $WITH_PATH --define-variable=prefix=$INSTALL_PATH/$PREFIX" CFLAGS="-g -Wall -I$INSTALL_PATH/$PREFIX/include" make DESTDIR=$INSTALL_PATH $O_PATH prefix=$PREFIX $@
diff --git a/python/Makefile b/python/Makefile
new file mode 100644
index 00000000..63f5736d
--- /dev/null
+++ b/python/Makefile
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: GPL-2.0
+
+include $(src)/scripts/utils.mk
+
+ifdef BUILD_PYTHON_WORKS
+PYTHON_SO_INSTALL := ctracecmd.install
+PYTHON_PY_PROGS := event-viewer.install
+PYTHON_PY_LIBS := tracecmd.install
+endif
+
+ctracecmd.so: ctracecmd.i $(LIBTRACECMD_STATIC)
+ swig -Wall -python -noproxy -I$(src)/include/trace-cmd $(LIBTRACEEVENT_CFLAGS) ctracecmd.i
+ $(CC) -fpic -c $(CPPFLAGS) $(CFLAGS) $(PYTHON_INCLUDES) ctracecmd_wrap.c
+ $(CC) --shared $(LIBTRACECMD_STATIC) $(LDFLAGS) ctracecmd_wrap.o -o ctracecmd.so $(TRACE_LIBS)
+
+$(PYTHON_SO_INSTALL): %.install : %.so force
+ $(Q)$(call do_install_data,$<,$(python_dir_SQ))
+
+$(PYTHON_PY_PROGS): %.install : %.py force
+ $(Q)$(call do_install,$<,$(python_dir_SQ))
+
+$(PYTHON_PY_LIBS): %.install : %.py force
+ $(Q)$(call do_install_data,$<,$(python_dir_SQ))
+
+install_python: $(PYTHON_SO_INSTALL) $(PYTHON_PY_PROGS) $(PYTHON_PY_LIBS)
+
+
+clean:
+ $(RM) *.a *.so *.o .*.d ctracecmd_wrap.*
+
+force:
+.PHONY: clean force
diff --git a/python/ctracecmd.i b/python/ctracecmd.i
new file mode 100644
index 00000000..6d0179e3
--- /dev/null
+++ b/python/ctracecmd.i
@@ -0,0 +1,250 @@
+// tracecmd.i
+%module ctracecmd
+%include "typemaps.i"
+%include "constraints.i"
+
+%nodefaultctor record;
+%nodefaultdtor record;
+
+%apply Pointer NONNULL { struct tracecmd_input *handle };
+%apply Pointer NONNULL { struct tep_handle *pevent };
+%apply Pointer NONNULL { struct tep_format_field * };
+%apply unsigned long long *OUTPUT {unsigned long long *}
+%apply int *OUTPUT {int *}
+
+
+%{
+#include "trace-cmd.h"
+#include "event-parse.h"
+#include "event-utils.h"
+#include <Python.h>
+%}
+
+
+%typemap(in) PyObject *pyfunc {
+ if (!PyCallable_Check($input)) {
+ PyErr_SetString(PyExc_TypeError, "Need a callable object!");
+ return NULL;
+ }
+ $1 = $input;
+}
+
+%ignore python_callback;
+
+%inline %{
+static int python_callback(struct trace_seq *s,
+ struct tep_record *record,
+ struct tep_event *event,
+ void *context);
+
+static int skip_output = 0;
+
+static void py_supress_trace_output(void)
+{
+ skip_output = 1;
+}
+
+void warning(const char *fmt, ...)
+{
+ va_list ap;
+
+ if (skip_output)
+ return;
+
+ va_start(ap, fmt);
+ tep_vprint("tracecmd", TEP_LOG_WARNING, true, fmt, ap);
+ va_end(ap);
+}
+
+PyObject *convert_pevent(unsigned long pevent)
+{
+ void *pev = (void *)pevent;
+ return SWIG_NewPointerObj(SWIG_as_voidptr(pev), SWIGTYPE_p_tep_handle, 0);
+}
+
+void py_pevent_register_event_handler(struct tep_handle *pevent, int id,
+ char *subsys, char *evname,
+ PyObject *pyfunc)
+{
+ Py_INCREF(pyfunc);
+ tep_register_event_handler(pevent, id, subsys, evname,
+ python_callback, pyfunc);
+}
+
+static PyObject *py_field_get_stack(struct tep_handle *pevent,
+ struct tep_record *record,
+ struct tep_event *event,
+ int long_size)
+{
+ PyObject *list;
+ struct tep_format_field *field;
+ void *data = record->data;
+ const char *func = NULL;
+ unsigned long addr;
+
+ field = tep_find_any_field(event, "caller");
+ if (!field) {
+ PyErr_SetString(PyExc_TypeError,
+ "Event doesn't have caller field");
+ return NULL;
+ }
+
+ list = PyList_New(0);
+
+ for (data += field->offset; data < record->data + record->size;
+ data += long_size) {
+ addr = tep_read_number(event->tep, data, long_size);
+
+ if ((long_size == 8 && addr == (unsigned long long)-1) ||
+ ((int)addr == -1))
+ break;
+ func = tep_find_function(event->tep, addr);
+ if (PyList_Append(list, PyUnicode_FromString(func))) {
+ Py_DECREF(list);
+ return NULL;
+ }
+ }
+
+ return list;
+}
+
+#if PY_MAJOR_VERSION >= 3
+static PyObject *fromMemory(void *buf, size_t len)
+{
+ return PyMemoryView_FromMemory(buf, len, PyBUF_READ);
+}
+#define PY_INT_AS_LONG PyLong_AsLong
+#else
+static PyObject *fromMemory(void *buf, size_t len)
+{
+ return PyBuffer_FromMemory(buf, len);
+}
+#define PY_INT_AS_LONG PyInt_AS_LONG
+#endif
+
+
+
+static PyObject *py_field_get_data(struct tep_format_field *f, struct tep_record *r)
+{
+ if (!strncmp(f->type, "__data_loc ", 11)) {
+ unsigned long long val;
+ int len, offset;
+
+ if (tep_read_number_field(f, r->data, &val)) {
+ PyErr_SetString(PyExc_TypeError,
+ "Field is not a valid number");
+ return NULL;
+ }
+
+ /*
+ * The actual length of the dynamic array is stored
+ * in the top half of the field, and the offset
+ * is in the bottom half of the 32 bit field.
+ */
+ offset = val & 0xffff;
+ len = val >> 16;
+
+ return fromMemory(r->data + offset, len);
+ }
+
+ return fromMemory(r->data + f->offset, f->size);
+}
+
+static PyObject *py_field_get_str(struct tep_format_field *f, struct tep_record *r)
+{
+ if (!strncmp(f->type, "__data_loc ", 11)) {
+ unsigned long long val;
+ int offset;
+
+ if (tep_read_number_field(f, r->data, &val)) {
+ PyErr_SetString(PyExc_TypeError,
+ "Field is not a valid number");
+ return NULL;
+ }
+
+ /*
+ * The actual length of the dynamic array is stored
+ * in the top half of the field, and the offset
+ * is in the bottom half of the 32 bit field.
+ */
+ offset = val & 0xffff;
+
+ return PyUnicode_FromString((char *)r->data + offset);
+ }
+
+ return PyUnicode_FromStringAndSize((char *)r->data + f->offset,
+ strnlen((char *)r->data + f->offset, f->size));
+}
+
+static PyObject *py_format_get_keys(struct tep_event *ef)
+{
+ PyObject *list;
+ struct tep_format_field *f;
+
+ list = PyList_New(0);
+
+ for (f = ef->format.fields; f; f = f->next) {
+ if (PyList_Append(list, PyUnicode_FromString(f->name))) {
+ Py_DECREF(list);
+ return NULL;
+ }
+ }
+
+ return list;
+}
+%}
+
+
+%wrapper %{
+static int python_callback(struct trace_seq *s,
+ struct tep_record *record,
+ struct tep_event *event,
+ void *context)
+{
+ PyObject *arglist, *result;
+ int r = 0;
+
+ record->ref_count++;
+
+ arglist = Py_BuildValue("(OOO)",
+ SWIG_NewPointerObj(SWIG_as_voidptr(s),
+ SWIGTYPE_p_trace_seq, 0),
+ SWIG_NewPointerObj(SWIG_as_voidptr(record),
+ SWIGTYPE_p_tep_record, 0),
+ SWIG_NewPointerObj(SWIG_as_voidptr(event),
+ SWIGTYPE_p_tep_event, 0));
+
+ result = PyEval_CallObject(context, arglist);
+ Py_XDECREF(arglist);
+ if (result && result != Py_None) {
+ if (!PyInt_Check(result)) {
+ PyErr_SetString(PyExc_TypeError,
+ "callback must return int");
+ PyErr_Print();
+ Py_XDECREF(result);
+ return 0;
+ }
+ r = PY_INT_AS_LONG(result);
+ } else if (result == Py_None)
+ r = 0;
+ else
+ PyErr_Print();
+
+ Py_XDECREF(result);
+
+ return r;
+}
+%}
+
+
+%ignore trace_seq_vprintf;
+%ignore vpr_stat;
+
+/* SWIG can't grok these, define them to nothing */
+#define __trace
+#define __attribute__(x)
+#define __thread
+
+%include "trace-cmd.h"
+%include <trace-seq.h>
+%include <event-parse.h>
diff --git a/python/event-viewer.py b/python/event-viewer.py
new file mode 100755
index 00000000..e3b2edd4
--- /dev/null
+++ b/python/event-viewer.py
@@ -0,0 +1,272 @@
+#!/usr/bin/env python2
+
+import getopt
+from gobject import *
+import gtk
+from tracecmd import *
+import time
+
+app = None
+data_func_cnt = 0
+
+# In a "real" app these width should be determined at runtime testing max length
+# strings in the current font.
+TS_COL_W = 150
+CPU_COL_W = 35
+EVENT_COL_W = 150
+PID_COL_W = 75
+COMM_COL_W = 250
+
+
+def timing(func):
+ def wrapper(*arg):
+ start = time.time()
+ ret = func(*arg)
+ end = time.time()
+ print('@%s took %0.3f s' % (func.func_name, (end-start)))
+ return ret
+ return wrapper
+
+
+class EventStore(gtk.GenericTreeModel):
+ class EventRef(object):
+ '''Inner class to build the trace event index'''
+ def __init__(self, index, timestamp, offset, cpu):
+ self.index = index
+ self.offset = offset
+ self.ts = timestamp
+ self.cpu = cpu
+
+ def __cmp__(self, other):
+ if self.ts < other.ts:
+ return -1
+ if self.ts > other.ts:
+ return 1
+ if self.offset < other.offset:
+ return -1
+ if self.offset > other.offset:
+ return 1
+ return 0
+
+ # The store only returns the record offset into the trace
+ # The view is responsible for looking up the Event with the offset
+ column_types = (long,)
+
+ @timing
+ def __init__(self, trace):
+ gtk.GenericTreeModel.__init__(self)
+ self.trace = trace
+ self.refs = []
+ self._load_trace()
+ self._sort()
+ self._reindex()
+
+ @timing
+ def _load_trace(self):
+ print("Building trace index...")
+ index = 0
+ for cpu in range(0, trace.cpus):
+ rec = tracecmd_read_data(self.trace._handle, cpu)
+ while rec:
+ offset = tep_record_offset_get(rec)
+ ts = tep_record_ts_get(rec)
+ self.refs.append(self.EventRef(index, ts, offset, cpu))
+ index = index + 1
+ rec = tracecmd_read_data(self.trace._handle, cpu)
+ print("Loaded %d events from trace" % (index))
+
+ @timing
+ def _sort(self):
+ self.refs.sort()
+
+ @timing
+ def _reindex(self):
+ for i in range(0, len(self.refs)):
+ self.refs[i].index = i
+
+ def on_get_flags(self):
+ return gtk.TREE_MODEL_LIST_ONLY | gtk.TREE_MODEL_ITERS_PERSIST
+
+ def on_get_n_columns(self):
+ return len(self.column_types)
+
+ def on_get_column_type(self, col):
+ return self.column_types[col]
+
+ def on_get_iter(self, path):
+ return self.refs[path[0]]
+
+ def on_get_path(self, ref):
+ return ref.index
+
+ def on_get_value(self, ref, col):
+ '''
+ The Event record was getting deleted when passed back via this
+ method, now it just returns the ref itself. Use get_event() instead.
+ '''
+ if col == 0:
+ #return self.trace.read_event_at(ref.offset)
+ return ref
+ return None
+
+ def on_iter_next(self, ref):
+ try:
+ return self.refs[ref.index+1]
+ except IndexError:
+ return None
+
+ def on_iter_children(self, ref):
+ if ref:
+ return None
+ return self.refs[0]
+
+ def on_iter_has_child(self, ref):
+ return False
+
+ def on_iter_n_children(self, ref):
+ if ref:
+ return 0
+ return len(self.refs)
+
+ def on_iter_nth_child(self, ref, n):
+ if ref:
+ return None
+ try:
+ return self.refs[n]
+ except IndexError:
+ return None
+
+ def on_iter_parent(self, child):
+ return None
+
+ def get_event(self, iter):
+ '''This allocates a record which must be freed by the caller'''
+ try:
+ ref = self.refs[self.get_path(iter)[0]]
+ ev = self.trace.read_event_at(ref.offset)
+ return ev
+ except IndexError:
+ return None
+
+
+class EventView(gtk.TreeView):
+ def __init__(self, model):
+ gtk.TreeView.__init__(self, model)
+ self.set_fixed_height_mode(True)
+
+ ts_col = gtk.TreeViewColumn("Time (s)")
+ ts_col.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
+ ts_col.set_fixed_width(TS_COL_W)
+ ts_cell = gtk.CellRendererText()
+ ts_col.pack_start(ts_cell, False)
+ ts_col.set_cell_data_func(ts_cell, self.data_func, "ts")
+ self.append_column(ts_col)
+
+ cpu_col = gtk.TreeViewColumn("CPU")
+ cpu_col.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
+ cpu_col.set_fixed_width(CPU_COL_W)
+ cpu_cell = gtk.CellRendererText()
+ cpu_col.pack_start(cpu_cell, False)
+ cpu_col.set_cell_data_func(cpu_cell, self.data_func, "cpu")
+ self.append_column(cpu_col)
+
+ event_col = gtk.TreeViewColumn("Event")
+ event_col.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
+ event_col.set_fixed_width(EVENT_COL_W)
+ event_cell = gtk.CellRendererText()
+ event_col.pack_start(event_cell, False)
+ event_col.set_cell_data_func(event_cell, self.data_func, "event")
+ self.append_column(event_col)
+
+ pid_col = gtk.TreeViewColumn("PID")
+ pid_col.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
+ pid_col.set_fixed_width(PID_COL_W)
+ pid_cell = gtk.CellRendererText()
+ pid_col.pack_start(pid_cell, False)
+ pid_col.set_cell_data_func(pid_cell, self.data_func, "pid")
+ self.append_column(pid_col)
+
+ comm_col = gtk.TreeViewColumn("Comm")
+ comm_col.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
+ comm_col.set_fixed_width(COMM_COL_W)
+ comm_cell = gtk.CellRendererText()
+ comm_col.pack_start(comm_cell, False)
+ comm_col.set_cell_data_func(comm_cell, self.data_func, "comm")
+ self.append_column(comm_col)
+
+ def data_func(self, col, cell, model, iter, data):
+ global app, data_func_cnt
+
+ ev = model.get_event(iter)
+ #ev = model.get_value(iter, 0)
+ if not ev:
+ return False
+
+ if data == "ts":
+ cell.set_property("markup", "%d.%09d" % (ev.ts/1000000000,
+ ev.ts%1000000000))
+ data_func_cnt = data_func_cnt + 1
+ if app:
+ app.inc_data_func()
+ elif data == "cpu":
+ cell.set_property("markup", ev.cpu)
+ elif data == "event":
+ cell.set_property("markup", ev.name)
+ elif data == "pid":
+ cell.set_property("markup", ev.pid)
+ elif data == "comm":
+ cell.set_property("markup", ev.comm)
+ else:
+ print("Unknown Column:", data)
+ return False
+
+ return True
+
+
+class EventViewerApp(gtk.Window):
+ def __init__(self, trace):
+ gtk.Window.__init__(self)
+
+ self.set_size_request(650, 400)
+ self.set_position(gtk.WIN_POS_CENTER)
+
+ self.connect("destroy", gtk.main_quit)
+ self.set_title("Event Viewer")
+
+ store = EventStore(trace)
+ view = EventView(store)
+
+ sw = gtk.ScrolledWindow()
+ sw.set_policy(gtk.POLICY_NEVER, gtk.POLICY_ALWAYS)
+ sw.add(view)
+
+ # track how often the treeview data_func is called
+ self.data_func_label = gtk.Label("0")
+ hbox = gtk.HBox()
+ hbox.pack_start(gtk.Label("TS Data Func Calls:"), False, False)
+ hbox.pack_start(self.data_func_label, False, False)
+
+ vbox = gtk.VBox()
+ vbox.pack_start(hbox, False)
+ vbox.pack_end(sw)
+
+ self.add(vbox)
+ self.show_all()
+
+ def inc_data_func(self):
+ global data_func_cnt
+ self.data_func_label.set_text(str(data_func_cnt))
+
+
+if __name__ == "__main__":
+ if len(sys.argv) >=2:
+ filename = sys.argv[1]
+ else:
+ filename = "trace.dat"
+
+ print("Initializing trace...")
+ trace = Trace(filename)
+ print("Initializing app...")
+ app = EventViewerApp(trace)
+ print("Go!")
+ gtk.main()
diff --git a/python/tracecmd.py b/python/tracecmd.py
new file mode 100644
index 00000000..4d481576
--- /dev/null
+++ b/python/tracecmd.py
@@ -0,0 +1,255 @@
+#
+# Copyright (C) International Business Machines Corp., 2009
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# 2009-Dec-17: Initial version by Darren Hart <dvhltc@us.ibm.com>
+#
+
+from functools import update_wrapper
+from ctracecmd import *
+from UserDict import DictMixin
+
+"""
+Python interface to the tracecmd library for parsing ftrace traces
+
+Python tracecmd applications should be written to this interface. It will be
+updated as the tracecmd C API changes and try to minimze the impact to python
+applications. The ctracecmd Python module is automatically generated using SWIG
+and it is recommended applications not use it directly.
+
+TODO: consider a complete class hierarchy of ftrace events...
+"""
+
+def cached_property(func, name=None):
+ if name is None:
+ name = func.__name__
+ def _get(self):
+ try:
+ return self.__cached_properties[name]
+ except AttributeError:
+ self.__cached_properties = {}
+ except KeyError:
+ pass
+ value = func(self)
+ self.__cached_properties[name] = value
+ return value
+ update_wrapper(_get, func)
+ def _del(self):
+ self.__cached_properties.pop(name, None)
+ return property(_get, None, _del)
+
+class Event(object, DictMixin):
+ """
+ This class can be used to access event data
+ according to an event's record and format.
+ """
+ def __init__(self, pevent, record, format):
+ self._pevent = pevent
+ self._record = record
+ self._format = format
+
+ def __str__(self):
+ return "%d.%09d CPU%d %s: pid=%d comm=%s type=%d" % \
+ (self.ts/1000000000, self.ts%1000000000, self.cpu, self.name,
+ self.num_field("common_pid"), self.comm, self.type)
+
+ def __del__(self):
+ free_record(self._record)
+
+ def __getitem__(self, n):
+ f = tep_find_field(self._format, n)
+ if f is None:
+ raise KeyError("no field '%s'" % n)
+ return Field(self._record, f)
+
+ def keys(self):
+ return py_format_get_keys(self._format)
+
+ @cached_property
+ def comm(self):
+ return tep_data_comm_from_pid(self._pevent, self.pid)
+
+ @cached_property
+ def cpu(self):
+ return tep_record_cpu_get(self._record)
+
+ @cached_property
+ def name(self):
+ return event_format_name_get(self._format)
+
+ @cached_property
+ def pid(self):
+ return tep_data_pid(self._pevent, self._record)
+
+ @cached_property
+ def ts(self):
+ return tep_record_ts_get(self._record)
+
+ @cached_property
+ def type(self):
+ return tep_data_type(self._pevent, self._record)
+
+ def num_field(self, name):
+ f = tep_find_any_field(self._format, name)
+ if f is None:
+ return None
+ ret, val = tep_read_number_field(f, tep_record_data_get(self._record))
+ if ret:
+ return None
+ return val
+
+ def str_field(self, name):
+ f = tep_find_any_field(self._format, name)
+ if f is None:
+ return None
+ return py_field_get_str(f, self._record)
+
+ def stack_field(self, long_size):
+ return py_field_get_stack(self._pevent, self._record, self._format,
+ long_size)
+
+class TraceSeq(object):
+ def __init__(self, trace_seq):
+ self._trace_seq = trace_seq
+
+ def puts(self, s):
+ return trace_seq_puts(self._trace_seq, s)
+
+class FieldError(Exception):
+ pass
+
+class Field(object):
+ def __init__(self, record, field):
+ self._record = record
+ self._field = field
+
+ @cached_property
+ def data(self):
+ return py_field_get_data(self._field, self._record)
+
+ def __long__(self):
+ ret, val = tep_read_number_field(self._field,
+ tep_record_data_get(self._record))
+ if ret:
+ raise FieldError("Not a number field")
+ return val
+ __int__ = __long__
+
+ def __str__(self):
+ return py_field_get_str(self._field, self._record)
+
+class PEvent(object):
+ def __init__(self, pevent):
+ self._pevent = pevent
+
+ def _handler(self, cb, s, record, event_fmt):
+ return cb(TraceSeq(s), Event(self._pevent, record, event_fmt))
+
+ def register_event_handler(self, subsys, event_name, callback):
+ l = lambda s, r, e: self._handler(callback, s, r, e)
+
+ py_pevent_register_event_handler(
+ self._pevent, -1, subsys, event_name, l)
+
+ @cached_property
+ def file_endian(self):
+ if tep_is_file_bigendian(self._pevent):
+ return '>'
+ return '<'
+
+
+class FileFormatError(Exception):
+ pass
+
+class Trace(object):
+ """
+ Trace object represents the trace file it is created with.
+
+ The Trace object aggregates the tracecmd structures and functions that are
+ used to manage the trace and extract events from it.
+ """
+ def __init__(self, filename):
+ self._handle = tracecmd_alloc(filename)
+
+ if tracecmd_read_headers(self._handle):
+ raise FileFormatError("Invalid headers")
+
+ if tracecmd_init_data(self._handle):
+ raise FileFormatError("Failed to init data")
+
+ self._pevent = tracecmd_get_pevent(self._handle)
+
+ @cached_property
+ def cpus(self):
+ return tracecmd_cpus(self._handle)
+
+ @cached_property
+ def long_size(self):
+ return tracecmd_long_size(self._handle)
+
+ def read_event(self, cpu):
+ rec = tracecmd_read_data(self._handle, cpu)
+ if rec:
+ type = tep_data_type(self._pevent, rec)
+ format = tep_find_event(self._pevent, type)
+ # rec ownership goes over to Event instance
+ return Event(self._pevent, rec, format)
+ return None
+
+ def read_event_at(self, offset):
+ res = tracecmd_read_at(self._handle, offset)
+ # SWIG only returns the CPU if the record is None for some reason
+ if isinstance(res, int):
+ return None
+ rec, cpu = res
+ type = tep_data_type(self._pevent, rec)
+ format = tep_find_event(self._pevent, type)
+ # rec ownership goes over to Event instance
+ return Event(self._pevent, rec, format)
+
+ def read_next_event(self):
+ res = tracecmd_read_next_data(self._handle)
+ if isinstance(res, int):
+ return None
+ rec, cpu = res
+ type = tep_data_type(self._pevent, rec)
+ format = tep_find_event(self._pevent, type)
+ return Event(self._pevent, rec, format)
+
+ def peek_event(self, cpu):
+ rec = tracecmd_peek_data_ref(self._handle, cpu)
+ if rec is None:
+ return None
+ type = tep_data_type(self._pevent, rec)
+ format = tep_find_event(self._pevent, type)
+ # rec ownership goes over to Event instance
+ return Event(self._pevent, rec, format)
+
+
+# Basic builtin test, execute module directly
+if __name__ == "__main__":
+ t = Trace("trace.dat")
+ print("Trace contains data for %d cpus" % (t.cpus))
+
+ for cpu in range(0, t.cpus):
+ print("CPU %d" % (cpu))
+ ev = t.read_event(cpu)
+ while ev:
+ print("\t%s" % (ev))
+ ev = t.read_event(cpu)
+
+
+
diff --git a/python/tracecmdgui.py b/python/tracecmdgui.py
new file mode 100644
index 00000000..01bfd614
--- /dev/null
+++ b/python/tracecmdgui.py
@@ -0,0 +1,239 @@
+#
+# Copyright (C) International Business Machines Corp., 2009
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# 2009-Dec-31: Initial version by Darren Hart <dvhltc@us.ibm.com>
+#
+
+import gobject #delete me ?
+import time
+import sys
+import gtk
+from tracecmd import *
+from ctracecmdgui import *
+
+"""
+Python interface for tracecmd GTK widgets
+
+Python tracecmd applications should be written to this interface. It will be
+updated as the tracecmd gui C API changes and try to minimze the impact to
+python applications. The ctracecmdgui Python module is automatically generated
+using SWIG and it is recommended applications not use it directly.
+"""
+
+# In a "real" app these width should be determined at runtime testing max length
+# strings in the current font.
+TS_COL_W = 150
+CPU_COL_W = 35
+EVENT_COL_W = 150
+PID_COL_W = 75
+COMM_COL_W = 250
+
+
+def timing(func):
+ def wrapper(*arg):
+ start = time.time()
+ ret = func(*arg)
+ end = time.time()
+ print('@%s took %0.3f s' % (func.func_name, (end-start)))
+ return ret
+ return wrapper
+
+
+class EventStore(gtk.GenericTreeModel):
+ # FIXME: get these from the C code: trace_view_store->column_types ...
+ @timing
+ def __init__(self, trace):
+ gtk.GenericTreeModel.__init__(self)
+ self.trace = trace
+ self.cstore = trace_view_store_new(trace.handle)
+ self.gtk_cstore = trace_view_store_as_gtk_tree_model(self.cstore)
+ num_rows = trace_view_store_num_rows_get(self.cstore)
+ print("Loaded %d events from trace" % (num_rows))
+
+ def on_get_flags(self):
+ return trace_view_store_get_flags(self.gtk_cstore)
+
+ def on_get_n_columns(self):
+ return trace_view_store_get_n_columns(self.gtk_cstore)
+
+ def on_get_column_type(self, col):
+ # I couldn't figure out how to convert the C GType into the python
+ # GType. The current typemap converts the C GType into the python type,
+ # which is what this function is supposed to return anyway.
+ pytype = trace_view_store_get_column_type(self.gtk_cstore, col)
+ return pytype
+
+ def on_get_iter(self, path):
+ if len(path) > 1 and path[1] != 1:
+ return None
+ n = path[0]
+ rec = trace_view_store_get_row(self.cstore, n)
+ return rec
+
+ def on_get_path(self, rec):
+ if not rec:
+ return None
+ start_row = trace_view_store_start_row_get(self.cstore)
+ return (trace_view_record_pos_get(rec) - start_row,)
+
+ def on_get_value(self, rec, col):
+ # FIXME: write SWIG wrapper to marshal the Gvalue and wrap the rec in an
+ # Iter
+ pass
+ #return trace_view_store_get_value_py(self.cstore, rec, col)
+
+ def on_iter_next(self, rec):
+ pos = trace_view_record_pos_get(rec)
+ start_row = trace_view_store_start_row_get(self.cstore)
+ return trace_view_store_get_row(self.cstore, pos - start_row + 1)
+
+ def on_iter_children(self, rec):
+ if rec:
+ return None
+ return trace_view_store_get_row(self.cstore, 0)
+
+ def on_iter_has_child(self, rec):
+ return False
+
+ def on_iter_n_children(self, rec):
+ if rec:
+ return 0
+ return trace_view_store_num_rows_get(self.cstore)
+
+ def on_iter_nth_child(self, rec, n):
+ if rec:
+ return None
+ return trace_view_store_get_row(self.cstore, n)
+
+ def on_iter_parent(self, child):
+ return None
+
+ def get_event(self, iter):
+ path = self.get_path(iter)
+ if not path:
+ return None
+ rec = trace_view_store_get_row(self.cstore, path[0])
+ if not rec:
+ return None
+ ev = self.trace.read_event_at(trace_view_record_offset_get(rec))
+ return ev
+
+
+class EventView(gtk.TreeView):
+ def __init__(self, model):
+ gtk.TreeView.__init__(self, model)
+ self.set_fixed_height_mode(True)
+
+ ts_col = gtk.TreeViewColumn("Time (s)")
+ ts_col.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
+ ts_col.set_fixed_width(TS_COL_W)
+ ts_cell = gtk.CellRendererText()
+ ts_col.pack_start(ts_cell, False)
+ ts_col.set_cell_data_func(ts_cell, self.data_func, "ts")
+ self.append_column(ts_col)
+
+ cpu_col = gtk.TreeViewColumn("CPU")
+ cpu_col.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
+ cpu_col.set_fixed_width(CPU_COL_W)
+ cpu_cell = gtk.CellRendererText()
+ cpu_col.pack_start(cpu_cell, False)
+ cpu_col.set_cell_data_func(cpu_cell, self.data_func, "cpu")
+ self.append_column(cpu_col)
+
+ event_col = gtk.TreeViewColumn("Event")
+ event_col.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
+ event_col.set_fixed_width(EVENT_COL_W)
+ event_cell = gtk.CellRendererText()
+ event_col.pack_start(event_cell, False)
+ event_col.set_cell_data_func(event_cell, self.data_func, "event")
+ self.append_column(event_col)
+
+ pid_col = gtk.TreeViewColumn("PID")
+ pid_col.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
+ pid_col.set_fixed_width(PID_COL_W)
+ pid_cell = gtk.CellRendererText()
+ pid_col.pack_start(pid_cell, False)
+ pid_col.set_cell_data_func(pid_cell, self.data_func, "pid")
+ self.append_column(pid_col)
+
+ comm_col = gtk.TreeViewColumn("Comm")
+ comm_col.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
+ comm_col.set_fixed_width(COMM_COL_W)
+ comm_cell = gtk.CellRendererText()
+ comm_col.pack_start(comm_cell, False)
+ comm_col.set_cell_data_func(comm_cell, self.data_func, "comm")
+ self.append_column(comm_col)
+
+ def data_func(self, col, cell, model, iter, data):
+ ev = model.get_event(iter)
+ #ev = model.get_value(iter, 0)
+ if not ev:
+ return False
+
+ if data == "ts":
+ cell.set_property("markup", "%d.%d" % (ev.ts/1000000000,
+ ev.ts%1000000000))
+ elif data == "cpu":
+ cell.set_property("markup", ev.cpu)
+ elif data == "event":
+ cell.set_property("markup", ev.name)
+ elif data == "pid":
+ cell.set_property("markup", ev.pid)
+ elif data == "comm":
+ cell.set_property("markup", ev.comm)
+ else:
+ print("Unknown Column:", data)
+ return False
+
+ return True
+
+
+class EventViewerApp(gtk.Window):
+ def __init__(self, trace):
+ gtk.Window.__init__(self)
+
+ self.set_size_request(650, 400)
+ self.set_position(gtk.WIN_POS_CENTER)
+
+ self.connect("destroy", gtk.main_quit)
+ self.set_title("Event Viewer")
+
+ store = EventStore(trace)
+ view = EventView(store)
+
+ sw = gtk.ScrolledWindow()
+ sw.set_policy(gtk.POLICY_NEVER, gtk.POLICY_ALWAYS)
+ sw.add(view)
+
+ # track how often the treeview data_func is called
+ self.add(sw)
+ self.show_all()
+
+
+# Basic builtin test, execute module directly
+if __name__ == "__main__":
+ if len(sys.argv) >=2:
+ filename = sys.argv[1]
+ else:
+ filename = "trace.dat"
+
+ print("Initializing trace...")
+ trace = Trace(filename)
+ print("Initializing app...")
+ app = EventViewerApp(trace)
+ print("Go!")
+ gtk.main()
diff --git a/scripts/debug/tsync_hist.py b/scripts/debug/tsync_hist.py
new file mode 100644
index 00000000..819d1e8f
--- /dev/null
+++ b/scripts/debug/tsync_hist.py
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2019, VMware Inc, Tzvetomir Stoyanov <tz.stoyanov@gmail.com>
+# Copyright (C) 2019, VMware Inc, Yordan Karadzhov <ykaradzhov@vmware.com>
+
+
+import matplotlib.pyplot as plt
+import matplotlib.lines as mlines
+import numpy as np
+import sys
+
+def newline(p1, p2):
+ ax = plt.gca()
+ xmin, xmax = ax.get_xbound()
+
+ if(p2[0] == p1[0]):
+ xmin = xmax = p1[0]
+ ymin, ymax = ax.get_ybound()
+ else:
+ ymax = p1[1]+(p2[1]-p1[1])/(p2[0]-p1[0])*(xmax-p1[0])
+ ymin = p1[1]+(p2[1]-p1[1])/(p2[0]-p1[0])*(xmin-p1[0])
+
+ l = mlines.Line2D([xmin,xmax], [ymin,ymax], color='red')
+ ax.add_line(l)
+ return l
+
+
+data = np.loadtxt(fname = sys.argv[1])
+selected_ts = data[-1, 1]
+selected_ofs = data[-1, 0]
+data = data[:-1,:]
+
+x = data[:, 1] - data[:, 0]
+
+mean = x.mean()
+std = x.std()
+
+num_bins = 500
+min = x.min() #+ .4 * (x.max() - x.min())
+max = x.max() #- .4 * (x.max() - x.min())
+bins = np.linspace(min, max, num_bins, endpoint = False, dtype=int)
+
+fig, ax = plt.subplots()
+
+# the histogram of the data
+n, bins, patches = ax.hist(x, bins, histtype=u'step');
+
+ax.set_xlabel('clock offset [$\mu$s]')
+ax.set_ylabel('entries')
+ax.set_title("$\sigma$=%i" % std)
+
+x1, y1 = [selected_ofs, min], [selected_ofs, max]
+newline(x1, y1)
+
+# Tweak spacing to prevent clipping of ylabel
+fig.tight_layout()
+plt.show()
diff --git a/scripts/debug/tsync_readme b/scripts/debug/tsync_readme
new file mode 100644
index 00000000..f3ebb25d
--- /dev/null
+++ b/scripts/debug/tsync_readme
@@ -0,0 +1,12 @@
+PTP-like algorithm debug
+========================
+
+tsync_*.py scripts can be used to visualise debug files, written when the PTP-like algorithm
+is compiled with TSYNC_DEBUG defined. The files are located in the guest machine:
+ s-cid*.txt - For each offset calculation: host and guest clocks and calculated offset.
+ res-cid*.txt - For each tracing session: all calculated clock offsets.
+
+tsync_hist.py plots a histogram, using data from a s-cid*.txt file:
+ "python tsync_hist.py s-cid2_1.txt"
+tsync_res.py plots a line, using data from res-cid*.txt file:
+ "python tsync_res.py res-cid2.txt"
diff --git a/scripts/debug/tsync_res.py b/scripts/debug/tsync_res.py
new file mode 100644
index 00000000..7d109863
--- /dev/null
+++ b/scripts/debug/tsync_res.py
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2019, VMware Inc, Tzvetomir Stoyanov <tz.stoyanov@gmail.com>
+# Copyright (C) 2019, VMware Inc, Yordan Karadzhov <ykaradzhov@vmware.com>
+
+
+import matplotlib.pyplot as plt
+import matplotlib.lines as mlines
+import numpy as np
+import sys
+
+def newline(p1, p2):
+ ax = plt.gca()
+ xmin, xmax = ax.get_xbound()
+
+ if(p2[0] == p1[0]):
+ xmin = xmax = p1[0]
+ ymin, ymax = ax.get_ybound()
+ else:
+ ymax = p1[1]+(p2[1]-p1[1])/(p2[0]-p1[0])*(xmax-p1[0])
+ ymin = p1[1]+(p2[1]-p1[1])/(p2[0]-p1[0])*(xmin-p1[0])
+
+ l = mlines.Line2D([xmin,xmax], [ymin,ymax], color='red')
+ ax.add_line(l)
+ return l
+
+data = np.loadtxt(fname = sys.argv[1])
+x = data[:, 0]
+y = data[:, 1]
+
+fig, ax = plt.subplots()
+
+ax.set_xlabel('samples (t)')
+ax.set_ylabel('clock offset')
+ax.set_title("$\delta$=%i ns" % (max(y) - min(y)))
+
+l = mlines.Line2D(x, y)
+ax.add_line(l)
+ax.set_xlim(min(x), max(x))
+ax.set_ylim(min(y), max(y) )
+
+print(min(y), max(y), max(y) - min(y))
+
+# Tweak spacing to prevent clipping of ylabel
+fig.tight_layout()
+plt.show()
diff --git a/scripts/utils.mk b/scripts/utils.mk
new file mode 100644
index 00000000..3fc2d74f
--- /dev/null
+++ b/scripts/utils.mk
@@ -0,0 +1,210 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# Utils
+
+ifeq ($(BUILDGUI), 1)
+ GUI = 'GUI '
+ GSPACE =
+else
+ GUI =
+ GSPACE = " "
+endif
+
+ GOBJ = $(GSPACE)$(notdir $(strip $@))
+
+
+ifeq ($(VERBOSE),1)
+ Q =
+ S =
+else
+ Q = @
+ S = -s
+endif
+
+# Use empty print_* macros if either SILENT or VERBOSE.
+ifeq ($(findstring 1,$(SILENT)$(VERBOSE)),1)
+ print_compile =
+ print_app_build =
+ print_fpic_compile =
+ print_shared_lib_compile =
+ print_plugin_obj_compile =
+ print_plugin_build =
+ print_install =
+ print_uninstall =
+ print_update =
+ print_asciidoc =
+ print_xsltproc =
+ print_install =
+ hide_xsltproc_output =
+else
+ print_compile = echo ' $(GUI)COMPILE '$(GOBJ);
+ print_app_build = echo ' $(GUI)BUILD '$(GOBJ);
+ print_fpic_compile = echo ' $(GUI)COMPILE FPIC '$(GOBJ);
+ print_shared_lib_compile = echo ' $(GUI)COMPILE SHARED LIB '$(GOBJ);
+ print_plugin_obj_compile = echo ' $(GUI)COMPILE PLUGIN OBJ '$(GOBJ);
+ print_plugin_build = echo ' $(GUI)BUILD PLUGIN '$(GOBJ);
+ print_static_lib_build = echo ' $(GUI)BUILD STATIC LIB '$(GOBJ);
+ print_install = echo ' $(GUI)INSTALL '$(GSPACE)$1' to $(DESTDIR_SQ)$2';
+ print_update = echo ' $(GUI)UPDATE '$(GOBJ);
+ print_uninstall = echo ' $(GUI)UNINSTALLING $(DESTDIR_SQ)$1';
+ print_asciidoc = echo ' ASCIIDOC '`basename $@`;
+ print_xsltproc = echo ' XSLTPROC '`basename $@`;
+ print_install = echo ' INSTALL '`basename $1`' to $(DESTDIR_SQ)'$2;
+ hide_xsltproc_output = 2> /dev/null
+endif
+
+do_fpic_compile = \
+ ($(print_fpic_compile) \
+ $(CC) -c $(CPPFLAGS) $(CFLAGS) $(EXT) -fPIC $< -o $@)
+
+do_compile = \
+ ($(if $(GENERATE_PIC), $(do_fpic_compile), \
+ $(print_compile) \
+ $(CC) -c $(CPPFLAGS) $(CFLAGS) $(EXT) $< -o $@))
+
+do_app_build = \
+ ($(print_app_build) \
+ $(CC) $^ -rdynamic -Wl,-rpath=$(libdir) -o $@ $(LDFLAGS) $(CONFIG_LIBS) $(LIBS))
+
+do_build_static_lib = \
+ ($(print_static_lib_build) \
+ $(RM) $@; $(AR) rcs $@ $^)
+
+do_compile_shared_library = \
+ ($(print_shared_lib_compile) \
+ $(CC) --shared $^ '-Wl,-soname,$(1),-rpath=$$ORIGIN' -o $@ $(LDFLAGS) $(LIBS))
+
+do_compile_plugin_obj = \
+ ($(print_plugin_obj_compile) \
+ $(CC) -c $(CPPFLAGS) $(CFLAGS) -fPIC -o $@ $<)
+
+do_plugin_build = \
+ ($(print_plugin_build) \
+ $(CC) $(CFLAGS) $(LDFLAGS) -shared -nostartfiles -o $@ $<)
+
+do_compile_python_plugin_obj = \
+ ($(print_plugin_obj_compile) \
+ $(CC) -c $(CPPFLAGS) $(CFLAGS) $(PYTHON_DIR_SQ) $(PYTHON_INCLUDES) -fPIC -o $@ $<)
+
+do_python_plugin_build = \
+ ($(print_plugin_build) \
+ $(CC) $< -shared $(LDFLAGS) $(PYTHON_LDFLAGS) -o $@)
+
+define make_version.h
+ (echo '/* This file is automatically generated. Do not modify. */'; \
+ echo \#define VERSION_CODE $(shell \
+ expr $(VERSION) \* 256 + $(PATCHLEVEL)); \
+ echo '#define EXTRAVERSION ' $(EXTRAVERSION); \
+ echo '#define VERSION_STRING "'$(VERSION).$(PATCHLEVEL).$(EXTRAVERSION)'"'; \
+ echo '#define FILE_VERSION '$(FILE_VERSION); \
+ if [ -d $(src)/.git ]; then \
+ d=`git diff`; \
+ x=""; \
+ if [ ! -z "$$d" ]; then x="+"; fi; \
+ echo '#define VERSION_GIT "'$(shell \
+ git log -1 --pretty=format:"%H" 2>/dev/null)$$x'"'; \
+ else \
+ echo '#define VERSION_GIT "not-a-git-repo"'; \
+ fi \
+ ) > $1
+endef
+
+define update_version.h
+ ($(call make_version.h, $@.tmp); \
+ if [ -r $@ ] && cmp -s $@ $@.tmp; then \
+ rm -f $@.tmp; \
+ else \
+ $(print_update) \
+ mv -f $@.tmp $@; \
+ fi);
+endef
+
+define update_dir
+ (echo $1 > $@.tmp; \
+ if [ -r $@ ] && cmp -s $@ $@.tmp; then \
+ rm -f $@.tmp; \
+ else \
+ $(print_update) \
+ mv -f $@.tmp $@; \
+ fi);
+endef
+
+define build_prefix
+ (echo $1 > $@.tmp; \
+ if [ -r $@ ] && cmp -s $@ $@.tmp; then \
+ rm -f $@.tmp; \
+ else \
+ $(print_update) \
+ mv -f $@.tmp $@; \
+ fi);
+endef
+
+define do_install
+ $(print_install) \
+ if [ ! -d '$(DESTDIR_SQ)$2' ]; then \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
+ fi; \
+ $(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2'
+endef
+
+define do_install_data
+ $(print_install) \
+ if [ ! -d '$(DESTDIR_SQ)$2' ]; then \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
+ fi; \
+ $(INSTALL) -m 644 $1 '$(DESTDIR_SQ)$2'
+endef
+
+define do_install_pkgconfig_file
+ if [ -n "${pkgconfig_dir}" ]; then \
+ $(call do_install,$(PKG_CONFIG_FILE),$(pkgconfig_dir),644); \
+ else \
+ (echo Failed to locate pkg-config directory) 1>&2; \
+ fi
+endef
+
+define do_make_pkgconfig_file
+ $(print_app_build)
+ $(Q)cp -f $(srctree)/${PKG_CONFIG_SOURCE_FILE}.template ${PKG_CONFIG_FILE}; \
+ sed -i "s|INSTALL_PREFIX|${1}|g" ${PKG_CONFIG_FILE}; \
+ sed -i "s|LIB_VERSION|${LIBTRACECMD_VERSION}|g" ${PKG_CONFIG_FILE}; \
+ sed -i "s|LIB_DIR|$(libdir)|g" ${PKG_CONFIG_FILE}; \
+ sed -i "s|LIBTRACEFS_MIN_VERSION|$(LIBTRACEFS_MIN_VERSION)|g" ${PKG_CONFIG_FILE}; \
+ sed -i "s|HEADER_DIR|$(includedir)/trace-cmd|g" ${PKG_CONFIG_FILE};
+endef
+
+do_asciidoc_build = \
+ ($(print_asciidoc) \
+ asciidoc -d manpage -b docbook -o $@ $<)
+
+do_xsltproc_build = \
+ ($(print_xsltproc) \
+ xsltproc --nonet -o $@ ${MANPAGE_DOCBOOK_XSL} $< $(hide_xsltproc_output))
+
+#
+# asciidoc requires a synopsis, but file format man pages (5) do
+# not require them. This removes it from the file in the final step.
+define remove_synopsis
+ (sed -e '/^\.SH "SYNOPSIS"/,/ignore/d' $1 > $1.tmp;\
+ mv $1.tmp $1)
+endef
+
+define do_install_docs
+ $(print_install) \
+ if [ ! -d '$(DESTDIR_SQ)$2' ]; then \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
+ fi; \
+ $(INSTALL) -m 644 $1 '$(DESTDIR_SQ)$2'
+endef
+
+ifneq ($(findstring $(MAKEFLAGS),s),s)
+ifneq ($(V),1)
+ QUIET_ASCIIDOC = @echo ' ASCIIDOC '$@;
+ QUIET_XMLTO = @echo ' XMLTO '$@;
+ QUIET_SUBDIR0 = +@subdir=
+ QUIET_SUBDIR1 = ;$(NO_SUBDIR) \
+ echo ' SUBDIR ' $$subdir; \
+ $(MAKE) $(PRINT_DIR) -C $$subdir
+ export V
+endif
+endif
diff --git a/tracecmd/.gitignore b/tracecmd/.gitignore
new file mode 100644
index 00000000..30f93eb4
--- /dev/null
+++ b/tracecmd/.gitignore
@@ -0,0 +1 @@
+trace-cmd
diff --git a/tracecmd/Makefile b/tracecmd/Makefile
new file mode 100644
index 00000000..0114948f
--- /dev/null
+++ b/tracecmd/Makefile
@@ -0,0 +1,98 @@
+# SPDX-License-Identifier: GPL-2.0
+
+VERSION := $(TC_VERSION)
+PATCHLEVEL := $(TC_PATCHLEVEL)
+EXTRAVERSION := $(TC_EXTRAVERSION)
+
+bdir:=$(obj)/tracecmd
+
+TC_VERSION := $(bdir)/include/tc_version.h
+TARGETS = $(bdir)/trace-cmd $(TC_VERSION)
+
+BUILDGUI := 0
+include $(src)/scripts/utils.mk
+
+CFLAGS += -I$(bdir)/include
+
+TRACE_CMD_OBJS =
+TRACE_CMD_OBJS += trace-cmd.o
+TRACE_CMD_OBJS += trace-record.o
+TRACE_CMD_OBJS += trace-read.o
+TRACE_CMD_OBJS += trace-split.o
+TRACE_CMD_OBJS += trace-listen.o
+TRACE_CMD_OBJS += trace-stack.o
+TRACE_CMD_OBJS += trace-hist.o
+TRACE_CMD_OBJS += trace-mem.o
+TRACE_CMD_OBJS += trace-snapshot.o
+TRACE_CMD_OBJS += trace-stat.o
+TRACE_CMD_OBJS += trace-profile.o
+TRACE_CMD_OBJS += trace-stream.o
+TRACE_CMD_OBJS += trace-record.o
+TRACE_CMD_OBJS += trace-restore.o
+TRACE_CMD_OBJS += trace-check-events.o
+TRACE_CMD_OBJS += trace-show.o
+TRACE_CMD_OBJS += trace-list.o
+TRACE_CMD_OBJS += trace-usage.o
+TRACE_CMD_OBJS += trace-dump.o
+TRACE_CMD_OBJS += trace-clear.o
+TRACE_CMD_OBJS += trace-vm.o
+TRACE_CMD_OBJS += trace-convert.o
+TRACE_CMD_OBJS += trace-agent.o
+TRACE_CMD_OBJS += trace-setup-guest.o
+ifeq ($(VSOCK_DEFINED), 1)
+TRACE_CMD_OBJS += trace-vsock.o
+endif
+
+ALL_OBJS := $(TRACE_CMD_OBJS:%.o=$(bdir)/%.o)
+
+all_objs := $(sort $(ALL_OBJS))
+all_deps := $(all_objs:$(bdir)/%.o=$(bdir)/.%.d)
+
+CONFIG_INCLUDES =
+CONFIG_LIBS = -lrt -lpthread $(TRACE_LIBS) $(LIBZSTD_LDLAGS)
+CONFIG_FLAGS =
+
+ifeq ($(ZLIB_INSTALLED), 1)
+CONFIG_LIBS += -lz
+endif
+
+all: $(TARGETS)
+
+$(bdir):
+ @mkdir -p $(bdir)
+
+$(bdir)/include: | $(bdir)
+ @mkdir -p $(bdir)/include
+
+$(TC_VERSION): force | $(bdir)/include
+ $(Q)$(call update_version.h)
+
+$(all_deps): | $(bdir)
+$(all_objs): | $(bdir)
+
+$(bdir)/trace-cmd: $(ALL_OBJS)
+ $(Q)$(do_app_build)
+
+$(bdir)/trace-cmd: $(LIBTRACECMD_STATIC)
+
+$(bdir)/%.o: %.c
+ $(Q)$(call do_compile)
+
+$(all_deps): $(bdir)/.%.d: %.c
+ $(Q)$(CC) -M -MT $(bdir)/$*.o $(CPPFLAGS) $(CFLAGS) $< > $@
+
+$(all_deps): $(TC_VERSION)
+
+$(all_objs): $(bdir)/%.o : $(bdir)/.%.d
+
+dep_includes := $(wildcard $(DEPS))
+
+ifneq ($(dep_includes),)
+ include $(dep_includes)
+endif
+
+clean:
+ $(RM) $(bdir)/*.a $(bdir)/*.so $(bdir)/*.o $(bdir)/.*.d $(TARGETS)
+
+force:
+.PHONY: clean
diff --git a/tracecmd/include/bug.h b/tracecmd/include/bug.h
new file mode 100644
index 00000000..9222f935
--- /dev/null
+++ b/tracecmd/include/bug.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+#ifndef __TRACE_CMD_BUG
+#define __TRACE_CMD_BUG
+
+#define unlikely(cond) __builtin_expect(!!(cond), 0)
+
+#define WARN_ONCE(cond, fmt, ...) \
+ ({ \
+ int __c__ = cond; \
+ if (unlikely(__c__)) { \
+ warning(fmt, ##__VA_ARGS__); \
+ } \
+ __c__; \
+ })
+#endif /* __TRACE_CMD_BUG */
diff --git a/tracecmd/include/list.h b/tracecmd/include/list.h
new file mode 100644
index 00000000..fa0de6df
--- /dev/null
+++ b/tracecmd/include/list.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+/*
+ * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#ifndef __LIST_H
+#define __LIST_H
+
+#define offset_of(type, field) __builtin_offsetof(type, field)
+#define container_of(p, type, field) (type *)((long)p - offset_of(type, field))
+
+struct list_head {
+ struct list_head *next;
+ struct list_head *prev;
+};
+
+static inline void list_head_init(struct list_head *list)
+{
+ list->next = list;
+ list->prev = list;
+}
+
+static inline void list_add(struct list_head *p, struct list_head *head)
+{
+ struct list_head *next = head->next;
+
+ p->prev = head;
+ p->next = next;
+ next->prev = p;
+ head->next = p;
+}
+
+static inline void list_add_tail(struct list_head *p, struct list_head *head)
+{
+ struct list_head *prev = head->prev;
+
+ p->prev = prev;
+ p->next = head;
+ prev->next = p;
+ head->prev = p;
+}
+
+static inline void list_del(struct list_head *p)
+{
+ struct list_head *next = p->next;
+ struct list_head *prev = p->prev;
+
+ next->prev = prev;
+ prev->next = next;
+}
+
+static inline int list_empty(struct list_head *list)
+{
+ return list->next == list;
+}
+
+#define list_for_each_entry(p, list, field) \
+ for (p = container_of((list)->next, typeof(*p), field); \
+ &(p)->field != list; \
+ p = container_of((p)->field.next, typeof(*p), field))
+
+#define list_for_each_entry_safe(p, n, list, field) \
+ for (p = container_of((list)->next, typeof(*p), field), \
+ n = container_of((p)->field.next, typeof(*p), field); \
+ &(p)->field != list; \
+ p = n, n = container_of((p)->field.next, typeof(*p), field))
+
+#endif /* __LIST_H */
diff --git a/tracecmd/include/trace-local.h b/tracecmd/include/trace-local.h
new file mode 100644
index 00000000..e3fec131
--- /dev/null
+++ b/tracecmd/include/trace-local.h
@@ -0,0 +1,437 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#ifndef __TRACE_LOCAL_H
+#define __TRACE_LOCAL_H
+
+#include <sys/types.h>
+#include <dirent.h> /* for DIR */
+#include <ctype.h> /* for isdigit() */
+#include <errno.h>
+#include <limits.h>
+
+#include "trace-cmd-private.h"
+#include "event-utils.h"
+
+#define TRACE_AGENT_DEFAULT_PORT 823
+
+#define DEFAULT_INPUT_FILE "trace.dat"
+#define GUEST_PIPE_NAME "trace-pipe-cpu"
+#define GUEST_DIR_FMT "/var/lib/trace-cmd/virt/%s"
+#define GUEST_FIFO_FMT GUEST_DIR_FMT "/" GUEST_PIPE_NAME "%d"
+#define VIRTIO_FIFO_FMT "/dev/virtio-ports/" GUEST_PIPE_NAME "%d"
+
+/* fix stupid glib guint64 typecasts and printf formats */
+typedef unsigned long long u64;
+
+struct buffer_instance;
+
+#define __printf(a, b) __attribute__((format(printf,a,b)))
+
+__printf(1,2)
+void warning(const char *fmt, ...);
+
+/* for local shared information with trace-cmd executable */
+
+void usage(char **argv);
+
+extern int silence_warnings;
+extern int show_status;
+
+int trace_set_verbose(char *level);
+
+enum port_type {
+ USE_UDP = 0, /* Default setting */
+ USE_TCP,
+ USE_VSOCK
+};
+
+struct pid_record_data {
+ int pid;
+ int brass[2];
+ int cpu;
+ int closed;
+ struct tracecmd_input *stream;
+ struct buffer_instance *instance;
+ struct tep_record *record;
+};
+
+void show_file(const char *name);
+
+struct tracecmd_input *read_trace_header(const char *file, int flags);
+int read_trace_files(void);
+
+void trace_record(int argc, char **argv);
+
+void trace_stop(int argc, char **argv);
+
+void trace_restart(int argc, char **argv);
+
+void trace_reset(int argc, char **argv);
+
+void trace_start(int argc, char **argv);
+
+void trace_set(int argc, char **argv);
+
+void trace_extract(int argc, char **argv);
+
+void trace_stream(int argc, char **argv);
+
+void trace_profile(int argc, char **argv);
+
+void trace_report(int argc, char **argv);
+
+void trace_split(int argc, char **argv);
+
+void trace_listen(int argc, char **argv);
+
+void trace_agent(int argc, char **argv);
+
+void trace_setup_guest(int argc, char **argv);
+
+void trace_restore(int argc, char **argv);
+
+void trace_clear(int argc, char **argv);
+
+void trace_check_events(int argc, char **argv);
+
+void trace_stack(int argc, char **argv);
+
+void trace_option(int argc, char **argv);
+
+void trace_hist(int argc, char **argv);
+
+void trace_snapshot(int argc, char **argv);
+
+void trace_mem(int argc, char **argv);
+
+void trace_stat(int argc, char **argv);
+
+void trace_show(int argc, char **argv);
+
+void trace_list(int argc, char **argv);
+
+void trace_usage(int argc, char **argv);
+
+void trace_dump(int argc, char **argv);
+
+void trace_convert(int argc, char **argv);
+
+int trace_record_agent(struct tracecmd_msg_handle *msg_handle,
+ int cpus, int *fds,
+ int argc, char **argv, bool use_fifos,
+ unsigned long long trace_id, const char *host);
+
+struct hook_list;
+
+void trace_init_profile(struct tracecmd_input *handle, struct hook_list *hooks,
+ int global);
+int do_trace_profile(void);
+void trace_profile_set_merge_like_comms(void);
+
+struct tracecmd_input *
+trace_stream_init(struct buffer_instance *instance, int cpu, int fd, int cpus,
+ struct hook_list *hooks,
+ tracecmd_handle_init_func handle_init, int global);
+int trace_stream_read(struct pid_record_data *pids, int nr_pids, struct timeval *tv);
+
+void trace_show_data(struct tracecmd_input *handle, struct tep_record *record);
+
+/* --- event interation --- */
+
+/*
+ * Use this to iterate through the event directories
+ */
+
+
+enum event_process {
+ PROCESSED_NONE,
+ PROCESSED_EVENT,
+ PROCESSED_SYSTEM
+};
+
+enum process_type {
+ PROCESS_EVENT,
+ PROCESS_SYSTEM
+};
+
+struct event_iter {
+ DIR *system_dir;
+ DIR *event_dir;
+ struct dirent *system_dent;
+ struct dirent *event_dent;
+};
+
+enum event_iter_type {
+ EVENT_ITER_NONE,
+ EVENT_ITER_SYSTEM,
+ EVENT_ITER_EVENT
+};
+
+struct event_iter *trace_event_iter_alloc(const char *path);
+enum event_iter_type trace_event_iter_next(struct event_iter *iter,
+ const char *path, const char *system);
+void trace_event_iter_free(struct event_iter *iter);
+
+char *append_file(const char *dir, const char *name);
+char *get_file_content(const char *file);
+
+char *strstrip(char *str);
+
+/* --- instance manipulation --- */
+
+enum buffer_instance_flags {
+ BUFFER_FL_KEEP = 1 << 0,
+ BUFFER_FL_PROFILE = 1 << 1,
+ BUFFER_FL_GUEST = 1 << 2,
+ BUFFER_FL_AGENT = 1 << 3,
+ BUFFER_FL_HAS_CLOCK = 1 << 4,
+ BUFFER_FL_TSC2NSEC = 1 << 5,
+ BUFFER_FL_NETWORK = 1 << 6,
+};
+
+struct func_list {
+ struct func_list *next;
+ const char *func;
+ const char *mod;
+};
+
+struct pid_addr_maps {
+ struct pid_addr_maps *next;
+ struct tracecmd_proc_addr_map *lib_maps;
+ unsigned int nr_lib_maps;
+ char *proc_name;
+ int pid;
+};
+
+struct opt_list {
+ struct opt_list *next;
+ const char *option;
+};
+
+struct filter_pids {
+ struct filter_pids *next;
+ int pid;
+ int exclude;
+};
+
+struct tsc_nsec {
+ int mult;
+ int shift;
+ unsigned long long offset;
+};
+
+struct buffer_instance {
+ struct buffer_instance *next;
+ char *name;
+ struct tracefs_instance *tracefs;
+ unsigned long long trace_id;
+ char *cpumask;
+ char *output_file;
+ struct event_list *events;
+ struct event_list **event_next;
+ bool delete;
+
+ struct event_list *sched_switch_event;
+ struct event_list *sched_wakeup_event;
+ struct event_list *sched_wakeup_new_event;
+
+ const char *plugin;
+ char *filter_mod;
+ struct func_list *filter_funcs;
+ struct func_list *notrace_funcs;
+
+ struct opt_list *options;
+ struct filter_pids *filter_pids;
+ struct filter_pids *process_pids;
+ char *common_pid_filter;
+ int nr_filter_pids;
+ int len_filter_pids;
+ int nr_process_pids;
+ bool ptrace_child;
+
+ int have_set_event_pid;
+ int have_event_fork;
+ int have_func_fork;
+ int get_procmap;
+
+ const char *clock;
+ unsigned int *client_ports;
+
+ struct trace_seq *s_save;
+ struct trace_seq *s_print;
+
+ struct tracecmd_input *handle;
+
+ struct tracecmd_msg_handle *msg_handle;
+ struct tracecmd_output *network_handle;
+ const char *host;
+
+ struct pid_addr_maps *pid_maps;
+
+ char *max_graph_depth;
+
+ int flags;
+ int tracing_on_init_val;
+ int tracing_on_fd;
+ int buffer_size;
+ int cpu_count;
+
+ int argc;
+ char **argv;
+
+ struct addrinfo *result;
+ unsigned int cid;
+ unsigned int port;
+ int *fds;
+ bool use_fifos;
+
+ enum port_type port_type; /* Default to USE_UDP (zero) */
+ int tsync_loop_interval;
+ struct tracecmd_time_sync *tsync;
+};
+
+void init_top_instance(void);
+
+extern struct buffer_instance top_instance;
+extern struct buffer_instance *buffer_instances;
+extern struct buffer_instance *first_instance;
+
+#define for_each_instance(i) for (i = buffer_instances; i; i = (i)->next)
+#define for_all_instances(i) for (i = first_instance; i; \
+ i = i == &top_instance ? buffer_instances : (i)->next)
+
+#define is_agent(instance) ((instance)->flags & BUFFER_FL_AGENT)
+#define is_guest(instance) ((instance)->flags & BUFFER_FL_GUEST)
+#define is_network(instance) ((instance)->flags & BUFFER_FL_NETWORK)
+
+#define START_PORT_SEARCH 1500
+#define MAX_PORT_SEARCH 6000
+
+struct sockaddr_storage;
+
+int trace_net_make(int port, enum port_type type);
+int trace_net_search(int start_port, int *sfd, enum port_type type);
+int trace_net_print_connection(int fd);
+bool trace_net_cmp_connection(struct sockaddr_storage *addr, const char *name);
+bool trace_net_cmp_connection_fd(int fd, const char *name);
+
+struct buffer_instance *allocate_instance(const char *name);
+void add_instance(struct buffer_instance *instance, int cpu_count);
+void update_first_instance(struct buffer_instance *instance, int topt);
+
+void show_instance_file(struct buffer_instance *instance, const char *name);
+void show_options(const char *prefix, struct buffer_instance *buffer);
+
+struct trace_guest {
+ struct tracefs_instance *instance;
+ char *name;
+ int cid;
+ int pid;
+ int cpu_max;
+ int *cpu_pid;
+ int *task_pids;
+};
+struct trace_guest *trace_get_guest(unsigned int cid, const char *name);
+bool trace_have_guests_pid(void);
+void read_qemu_guests(void);
+int get_guest_pid(unsigned int guest_cid);
+int get_guest_vcpu_pid(unsigned int guest_cid, unsigned int guest_vcpu);
+
+/* moved from trace-cmd.h */
+void tracecmd_remove_instances(void);
+int tracecmd_add_event(const char *event_str, int stack);
+void tracecmd_enable_events(void);
+void tracecmd_disable_all_tracing(int disable_tracer);
+void tracecmd_disable_tracing(void);
+void tracecmd_enable_tracing(void);
+void tracecmd_stat_cpu(struct trace_seq *s, int cpu);
+
+int tracecmd_host_tsync(struct buffer_instance *instance,
+ unsigned int tsync_port);
+void tracecmd_host_tsync_complete(struct buffer_instance *instance);
+const char *tracecmd_guest_tsync(struct tracecmd_tsync_protos *tsync_protos,
+ char *clock, unsigned int *tsync_port,
+ pthread_t *thr_id);
+
+int trace_make_vsock(unsigned int port);
+int trace_get_vsock_port(int sd, unsigned int *port);
+int trace_open_vsock(unsigned int cid, unsigned int port);
+
+int get_local_cid(unsigned int *cid);
+
+char *trace_get_guest_file(const char *file, const char *guest);
+
+#ifdef VSOCK
+int trace_vsock_open(unsigned int cid, unsigned int port);
+int trace_vsock_make(unsigned int port);
+int trace_vsock_make_any(void);
+int get_vsocket_params(int fd, unsigned int *lcid, unsigned int *rcid);
+int trace_vsock_get_port(int sd, unsigned int *port);
+bool trace_vsock_can_splice_read(void);
+int trace_vsock_local_cid(void);
+int trace_vsock_print_connection(int fd);
+#else
+static inline int trace_vsock_open(unsigned int cid, unsigned int port)
+{
+ return -ENOTSUP;
+}
+
+static inline int trace_vsock_make(unsigned int port)
+{
+ return -ENOTSUP;
+
+}
+
+static inline int trace_vsock_make_any(void)
+{
+ return -ENOTSUP;
+
+}
+
+static inline int get_vsocket_params(int fd, unsigned int *lcid, unsigned int *rcid)
+{
+ return -ENOTSUP;
+}
+
+static inline int trace_vsock_get_port(int sd, unsigned int *port)
+{
+ return -ENOTSUP;
+}
+
+static inline bool trace_vsock_can_splice_read(void)
+{
+ return false;
+}
+
+static inline int trace_vsock_local_cid(void)
+{
+ return -ENOTSUP;
+}
+static inline int trace_vsock_print_connection(int fd)
+{
+ return -1;
+}
+#endif /* VSOCK */
+
+/* No longer in event-utils.h */
+__printf(1,2)
+void __noreturn die(const char *fmt, ...); /* Can be overriden */
+void *malloc_or_die(unsigned int size); /* Can be overridden */
+__printf(1,2)
+void __noreturn __die(const char *fmt, ...);
+void __noreturn _vdie(const char *fmt, va_list ap);
+
+static inline bool is_digits(const char *s)
+{
+ for (; *s; s++)
+ if (!isdigit(*s))
+ return false;
+ return true;
+}
+
+bool trace_tsc2nsec_is_supported(void);
+
+#endif /* __TRACE_LOCAL_H */
diff --git a/tracecmd/trace-agent.c b/tracecmd/trace-agent.c
new file mode 100644
index 00000000..f0723a66
--- /dev/null
+++ b/tracecmd/trace-agent.c
@@ -0,0 +1,384 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018 VMware Inc, Slavomir Kaslev <kaslevs@vmware.com>
+ *
+ * based on prior implementation by Yoshihiro Yunomae
+ * Copyright (C) 2013 Hitachi, Ltd.
+ * Yoshihiro YUNOMAE <yoshihiro.yunomae.ez@hitachi.com>
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <pthread.h>
+
+#include "trace-local.h"
+#include "trace-msg.h"
+
+#define dprint(fmt, ...) tracecmd_debug(fmt, ##__VA_ARGS__)
+
+static void make_vsocks(int nr, int *fds, unsigned int *ports)
+{
+ unsigned int port;
+ int i, fd, ret;
+
+ for (i = 0; i < nr; i++) {
+ fd = trace_vsock_make_any();
+ if (fd < 0)
+ die("Failed to open vsocket");
+
+ ret = trace_vsock_get_port(fd, &port);
+ if (ret < 0)
+ die("Failed to get vsocket address");
+
+ fds[i] = fd;
+ ports[i] = port;
+ }
+}
+
+static void make_net(int nr, int *fds, unsigned int *ports)
+{
+ int port;
+ int i, fd;
+ int start_port = START_PORT_SEARCH;
+
+ for (i = 0; i < nr; i++) {
+ port = trace_net_search(start_port, &fd, USE_TCP);
+ if (port < 0)
+ die("Failed to open socket");
+ if (listen(fd, 5) < 0)
+ die("Failed to listen on port %d\n", port);
+ fds[i] = fd;
+ ports[i] = port;
+ dprint("CPU[%d]: fd:%d port:%d\n", i, fd, port);
+ start_port = port + 1;
+ }
+}
+
+static void make_sockets(int nr, int *fds, unsigned int *ports,
+ const char * network)
+{
+ if (network)
+ return make_net(nr, fds, ports);
+ else
+ return make_vsocks(nr, fds, ports);
+}
+
+static int open_agent_fifos(int nr_cpus, int *fds)
+{
+ char path[PATH_MAX];
+ int i, fd, ret;
+
+ for (i = 0; i < nr_cpus; i++) {
+ snprintf(path, sizeof(path), VIRTIO_FIFO_FMT, i);
+ fd = open(path, O_WRONLY);
+ if (fd < 0) {
+ ret = -errno;
+ goto cleanup;
+ }
+
+ fds[i] = fd;
+ }
+
+ return 0;
+
+cleanup:
+ while (--i >= 0)
+ close(fds[i]);
+
+ return ret;
+}
+
+static char *get_clock(int argc, char **argv)
+{
+ int i;
+
+ if (!argc || !argv)
+ return NULL;
+
+ for (i = 0; i < argc - 1; i++) {
+ if (!strcmp("-C", argv[i]))
+ return argv[i+1];
+ }
+ return NULL;
+}
+
+static void trace_print_connection(int fd, const char *network)
+{
+ int ret;
+
+ if (network)
+ ret = trace_net_print_connection(fd);
+ else
+ ret = trace_vsock_print_connection(fd);
+ if (ret < 0)
+ tracecmd_debug("Could not print connection fd:%d\n", fd);
+}
+
+static void agent_handle(int sd, int nr_cpus, int page_size, const char *network)
+{
+ struct tracecmd_tsync_protos *tsync_protos = NULL;
+ struct tracecmd_time_sync *tsync = NULL;
+ struct tracecmd_msg_handle *msg_handle;
+ char *tsync_proto = NULL;
+ unsigned long long trace_id;
+ unsigned int remote_id;
+ unsigned int local_id;
+ unsigned int tsync_port = 0;
+ unsigned int *ports;
+ char **argv = NULL;
+ int argc = 0;
+ bool use_fifos;
+ int *fds;
+ int ret;
+ int fd;
+
+ fds = calloc(nr_cpus, sizeof(*fds));
+ ports = calloc(nr_cpus, sizeof(*ports));
+ if (!fds || !ports)
+ die("Failed to allocate memory");
+
+ msg_handle = tracecmd_msg_handle_alloc(sd, 0);
+ if (!msg_handle)
+ die("Failed to allocate message handle");
+
+ ret = tracecmd_msg_recv_trace_req(msg_handle, &argc, &argv,
+ &use_fifos, &trace_id,
+ &tsync_protos);
+ if (ret < 0)
+ die("Failed to receive trace request");
+
+ if (use_fifos && open_agent_fifos(nr_cpus, fds))
+ use_fifos = false;
+
+ if (!use_fifos)
+ make_sockets(nr_cpus, fds, ports, network);
+ if (tsync_protos && tsync_protos->names) {
+ if (network) {
+ /* For now just use something */
+ remote_id = 2;
+ local_id = 1;
+ tsync_port = trace_net_search(START_PORT_SEARCH, &fd, USE_TCP);
+ if (listen(fd, 5) < 0)
+ die("Failed to listen on %d\n", tsync_port);
+ } else {
+ if (get_vsocket_params(msg_handle->fd, &local_id,
+ &remote_id)) {
+ warning("Failed to get local and remote ids");
+ /* Just make something up */
+ remote_id = -1;
+ local_id = -2;
+ }
+ fd = trace_vsock_make_any();
+ if (fd >= 0 &&
+ trace_vsock_get_port(fd, &tsync_port) < 0) {
+ close(fd);
+ fd = -1;
+ }
+ }
+ if (fd >= 0) {
+ tsync = tracecmd_tsync_with_host(fd, tsync_protos,
+ get_clock(argc, argv),
+ remote_id, local_id);
+ }
+ if (tsync) {
+ tracecmd_tsync_get_selected_proto(tsync, &tsync_proto);
+ } else {
+ warning("Failed to negotiate timestamps synchronization with the host");
+ if (fd >= 0)
+ close(fd);
+ }
+ }
+ trace_id = tracecmd_generate_traceid();
+ ret = tracecmd_msg_send_trace_resp(msg_handle, nr_cpus, page_size,
+ ports, use_fifos, trace_id,
+ tsync_proto, tsync_port);
+ if (ret < 0)
+ die("Failed to send trace response");
+
+ trace_record_agent(msg_handle, nr_cpus, fds, argc, argv,
+ use_fifos, trace_id, network);
+
+ if (tsync) {
+ tracecmd_tsync_with_host_stop(tsync);
+ tracecmd_tsync_free(tsync);
+ }
+
+ if (tsync_protos) {
+ free(tsync_protos->names);
+ free(tsync_protos);
+ }
+ free(argv[0]);
+ free(argv);
+ free(ports);
+ free(fds);
+ tracecmd_msg_handle_close(msg_handle);
+ exit(0);
+}
+
+static volatile pid_t handler_pid;
+
+static void handle_sigchld(int sig)
+{
+ int wstatus;
+ pid_t pid;
+
+ for (;;) {
+ pid = waitpid(-1, &wstatus, WNOHANG);
+ if (pid <= 0)
+ break;
+
+ if (pid == handler_pid)
+ handler_pid = 0;
+ }
+}
+
+static pid_t do_fork()
+{
+ /* in debug mode, we do not fork off children */
+ if (tracecmd_get_debug())
+ return 0;
+
+ return fork();
+}
+
+static void agent_serve(unsigned int port, bool do_daemon, const char *network)
+{
+ struct sockaddr_storage net_addr;
+ struct sockaddr *addr = NULL;
+ socklen_t *addr_len_p = NULL;
+ socklen_t addr_len = sizeof(net_addr);
+ int sd, cd, nr_cpus;
+ unsigned int cid;
+ pid_t pid;
+
+ signal(SIGCHLD, handle_sigchld);
+
+ if (network) {
+ addr = (struct sockaddr *)&net_addr;
+ addr_len_p = &addr_len;
+ }
+
+ nr_cpus = tracecmd_count_cpus();
+ page_size = getpagesize();
+
+ if (network) {
+ sd = trace_net_make(port, USE_TCP);
+ if (listen(sd, 5) < 0)
+ die("Failed to listen on %d\n", port);
+ } else
+ sd = trace_vsock_make(port);
+ if (sd < 0)
+ die("Failed to open socket");
+ tracecmd_tsync_init();
+
+ if (!network) {
+ cid = trace_vsock_local_cid();
+ if (cid >= 0)
+ printf("listening on @%u:%u\n", cid, port);
+ }
+
+ if (do_daemon && daemon(1, 0))
+ die("daemon");
+
+ for (;;) {
+ cd = accept(sd, addr, addr_len_p);
+ if (cd < 0) {
+ if (errno == EINTR)
+ continue;
+ die("accept");
+ }
+ if (tracecmd_get_debug())
+ trace_print_connection(cd, network);
+
+ if (network && !trace_net_cmp_connection(&net_addr, network)) {
+ dprint("Client does not match '%s'\n", network);
+ close(cd);
+ continue;
+ }
+
+ if (handler_pid)
+ goto busy;
+
+ pid = do_fork();
+ if (pid == 0) {
+ close(sd);
+ signal(SIGCHLD, SIG_DFL);
+ agent_handle(cd, nr_cpus, page_size, network);
+ }
+ if (pid > 0)
+ handler_pid = pid;
+
+busy:
+ close(cd);
+ }
+}
+
+enum {
+ OPT_verbose = 254,
+ DO_DEBUG = 255
+};
+
+void trace_agent(int argc, char **argv)
+{
+ bool do_daemon = false;
+ unsigned int port = TRACE_AGENT_DEFAULT_PORT;
+ const char *network = NULL;
+
+ if (argc < 2)
+ usage(argv);
+
+ if (strcmp(argv[1], "agent") != 0)
+ usage(argv);
+
+ for (;;) {
+ int c, option_index = 0;
+ static struct option long_options[] = {
+ {"port", required_argument, NULL, 'p'},
+ {"help", no_argument, NULL, '?'},
+ {"debug", no_argument, NULL, DO_DEBUG},
+ {"verbose", optional_argument, NULL, OPT_verbose},
+ {NULL, 0, NULL, 0}
+ };
+
+ c = getopt_long(argc-1, argv+1, "+hp:DN:",
+ long_options, &option_index);
+ if (c == -1)
+ break;
+ switch (c) {
+ case 'h':
+ usage(argv);
+ break;
+ case 'N':
+ network = optarg;
+ break;
+ case 'p':
+ port = atoi(optarg);
+ break;
+ case 'D':
+ do_daemon = true;
+ break;
+ case DO_DEBUG:
+ tracecmd_set_debug(true);
+ break;
+ case OPT_verbose:
+ if (trace_set_verbose(optarg) < 0)
+ die("invalid verbose level %s", optarg);
+ break;
+ default:
+ usage(argv);
+ }
+ }
+
+ if (optind < argc-1)
+ usage(argv);
+
+ agent_serve(port, do_daemon, network);
+}
diff --git a/tracecmd/trace-check-events.c b/tracecmd/trace-check-events.c
new file mode 100644
index 00000000..46f57e17
--- /dev/null
+++ b/tracecmd/trace-check-events.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#include <stdlib.h>
+#include <getopt.h>
+#include <errno.h>
+
+#include "tracefs.h"
+#include "trace-local.h"
+
+enum {
+ OPT_verbose = 255,
+};
+
+void trace_check_events(int argc, char **argv)
+{
+ const char *tracing;
+ int ret, c;
+ int parsing_failures = 0;
+ struct tep_handle *pevent = NULL;
+ struct tep_plugin_list *list = NULL;
+ int open_flags = 0;
+ int option_index = 0;
+ static struct option long_options[] = {
+ {"verbose", optional_argument, NULL, OPT_verbose},
+ {NULL, 0, NULL, 0}
+ };
+
+
+ while ((c = getopt_long(argc-1, argv+1, "+hN", long_options, &option_index)) >= 0) {
+ switch (c) {
+ case 'h':
+ default:
+ usage(argv);
+ break;
+ case 'N':
+ open_flags |= TRACECMD_FL_LOAD_NO_PLUGINS;
+ break;
+ case OPT_verbose:
+ if (trace_set_verbose(optarg) < 0)
+ die("invalid verbose level %s", optarg);
+ break;
+ }
+ }
+ tracing = tracefs_tracing_dir();
+
+ if (!tracing) {
+ printf("Can not find or mount tracing directory!\n"
+ "Either tracing is not configured for this "
+ "kernel\n"
+ "or you do not have the proper permissions to "
+ "mount the directory");
+ exit(EINVAL);
+ }
+
+ pevent = tep_alloc();
+ if (!pevent)
+ exit(EINVAL);
+
+ list = trace_load_plugins(pevent, open_flags);
+ ret = tracefs_fill_local_events(tracing, pevent, &parsing_failures);
+ if (ret || parsing_failures)
+ ret = EINVAL;
+ tep_unload_plugins(list, pevent);
+ tep_free(pevent);
+
+ return;
+}
diff --git a/tracecmd/trace-clear.c b/tracecmd/trace-clear.c
new file mode 100644
index 00000000..999e80fe
--- /dev/null
+++ b/tracecmd/trace-clear.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2008, 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * Updates:
+ * Copyright (C) 2020, VMware, Tzvetomir Stoyanov <tz.stoyanov@gmail.com>
+ *
+ */
+#include <stdlib.h>
+#include <unistd.h>
+#include <getopt.h>
+
+#include "tracefs.h"
+#include "trace-local.h"
+
+struct instances_list {
+ struct instances_list *next;
+ struct tracefs_instance *instance;
+};
+
+static int add_new_instance(struct instances_list **list, char *name)
+{
+ struct instances_list *new;
+
+ if (!tracefs_instance_exists(name))
+ return -1;
+ new = calloc(1, sizeof(*new));
+ if (!new)
+ return -1;
+ new->instance = tracefs_instance_create(name);
+ if (!new->instance) {
+ free(new);
+ return -1;
+ }
+
+ new->next = *list;
+ *list = new;
+ return 0;
+}
+
+static int add_instance_walk(const char *name, void *data)
+{
+ return add_new_instance((struct instances_list **)data, (char *)name);
+}
+
+static void clear_list(struct instances_list *list)
+{
+ struct instances_list *del;
+
+ while (list) {
+ del = list;
+ list = list->next;
+ tracefs_instance_free(del->instance);
+ free(del);
+ }
+}
+
+static void clear_instance_trace(struct tracefs_instance *instance)
+{
+ FILE *fp;
+ char *path;
+
+ /* reset the trace */
+ path = tracefs_instance_get_file(instance, "trace");
+ fp = fopen(path, "w");
+ if (!fp)
+ die("writing to '%s'", path);
+ tracefs_put_tracing_file(path);
+ fwrite("0", 1, 1, fp);
+ fclose(fp);
+}
+
+static void clear_trace(struct instances_list *instances)
+{
+ if (instances) {
+ while (instances) {
+ clear_instance_trace(instances->instance);
+ instances = instances->next;
+ }
+ } else
+ clear_instance_trace(NULL);
+}
+
+void trace_clear(int argc, char **argv)
+{
+ struct instances_list *instances = NULL;
+ bool all = false;
+ int c;
+
+ for (;;) {
+ int option_index = 0;
+ static struct option long_options[] = {
+ {"all", no_argument, NULL, 'a'},
+ {"help", no_argument, NULL, '?'},
+ {NULL, 0, NULL, 0}
+ };
+
+ c = getopt_long (argc-1, argv+1, "+haB:",
+ long_options, &option_index);
+ if (c == -1)
+ break;
+ switch (c) {
+ case 'B':
+ if (add_new_instance(&instances, optarg))
+ die("Failed to allocate instance %s", optarg);
+ break;
+ case 'a':
+ all = true;
+ if (tracefs_instances_walk(add_instance_walk, &instances))
+ die("Failed to add all instances");
+ break;
+ case 'h':
+ case '?':
+ default:
+ usage(argv);
+ break;
+ }
+ }
+
+ clear_trace(instances);
+ if (all)
+ clear_trace(NULL);
+ clear_list(instances);
+ exit(0);
+}
diff --git a/tracecmd/trace-cmd.bash b/tracecmd/trace-cmd.bash
new file mode 100644
index 00000000..66bd6f4b
--- /dev/null
+++ b/tracecmd/trace-cmd.bash
@@ -0,0 +1,345 @@
+show_instances()
+{
+ local cur="$1"
+ local bufs=$(trace-cmd list -B)
+ if [ "$bufs" == "No buffer instances defined" ]; then
+ return 0
+ fi
+ COMPREPLY=( $(compgen -W "${bufs}" -- "${cur}") )
+ return 0
+}
+
+show_virt()
+{
+ local cur="$1"
+ if ! which virsh &>/dev/null; then
+ return 1
+ fi
+ local virt=`virsh list | awk '/^ *[0-9]/ { print $2 }'`
+ COMPREPLY=( $(compgen -W "${virt}" -- "${cur}") )
+ return 0
+}
+
+show_options()
+{
+ local cur="$1"
+ local options=$(trace-cmd list -o | sed -e 's/^\(no\)*\(.*\)/\2 no\2/')
+ COMPREPLY=( $(compgen -W "${options}" -- "${cur}") )
+ return 0
+}
+
+__show_files()
+{
+ COMPREPLY=( $(compgen -f -- "$cur") )
+ if [ ${#COMPREPLY[@]} -gt 1 ]; then
+ return 0;
+ fi
+ # directories get '/' instead of space
+ DIRS=( $(compgen -d -- "$cur"))
+ if [ ${#DIRS[@]} -eq 1 ]; then
+ compopt -o nospace
+ COMPREPLY="$DIRS/"
+ return 0;
+ fi
+ return 0
+}
+
+cmd_options()
+{
+ local type="$1"
+ local cur="$2"
+ local cmds=$(trace-cmd $type -h 2>/dev/null|grep "^ *-" | \
+ sed -e 's/ *\(-[^ ]*\).*/\1/')
+ COMPREPLY=( $(compgen -W "${cmds}" -- "${cur}") )
+ if [ ${#COMPREPLY[@]} -eq 0 ]; then
+ __show_files "${cur}"
+ fi
+}
+
+plugin_options()
+{
+ local cur="$1"
+
+ local opts=$(trace-cmd list -O | sed -ne 's/option://p')
+ COMPREPLY=( $(compgen -W "${opts}" -- "${cur}") )
+}
+
+compression_param()
+{
+ local opts=$(trace-cmd list -c | grep -v 'Supported' | cut -d "," -f1)
+ opts+=" any none "
+ COMPREPLY=( $(compgen -W "${opts}") )
+}
+
+__trace_cmd_list_complete()
+{
+ local prev=$1
+ local cur=$2
+ shift 2
+ local words=("$@")
+
+ case "$prev" in
+ list)
+ local cmds=$(trace-cmd list -h |egrep "^ {10}-" | \
+ sed -e 's/.*\(-.\).*/\1/')
+ COMPREPLY=( $(compgen -W "${cmds}" -- "${cur}") )
+ ;;
+ *)
+ size=${#words[@]}
+ if [ $size -gt 3 ]; then
+ if [ "$cur" == "-" ]; then
+ let size=$size-3
+ else
+ let size=$size-2
+ fi
+ local w="${words[$size]}"
+ if [ "$w" == "-e" ]; then
+ local cmds=$(trace-cmd list -h |egrep "^ {12}-" | \
+ sed -e 's/.*\(-.\).*/\1/')
+ COMPREPLY=( $(compgen -W "${cmds}" -- "${cur}") )
+ fi
+ fi
+ ;;
+ esac
+}
+
+__trace_cmd_show_complete()
+{
+ local prev=$1
+ local cur=$2
+ shift 2
+ local words=("$@")
+
+ case "$prev" in
+ -B)
+ show_instances "$cur"
+ ;;
+ *)
+ cmd_options show "$cur"
+ ;;
+ esac
+}
+
+__trace_cmd_extract_complete()
+{
+ local prev=$1
+ local cur=$2
+ shift 2
+ local words=("$@")
+
+ case "$prev" in
+ extract)
+ cmd_options "$prev" "$cur"
+ ;;
+ -B)
+ show_instances "$cur"
+ ;;
+ *)
+ __show_files
+ ;;
+ esac
+}
+
+__trace_cmd_record_complete()
+{
+ local prev=$1
+ local cur=$2
+ shift 2
+ local words=("$@")
+
+ case "$prev" in
+ -e)
+ local list=$(trace-cmd list -e "$cur")
+ local prefix=${cur%%:*}
+ if [ -z "$cur" -o "$cur" != "$prefix" ]; then
+ COMPREPLY=( $(compgen -W "all ${list}" -- "${cur}") )
+ else
+ local events=$(for e in $list; do echo ${e/*:/}; done | sort -u)
+ local systems=$(for s in $list; do echo ${s/:*/:}; done | sort -u)
+
+ COMPREPLY=( $(compgen -W "all ${events} ${systems}" -- "${cur}") )
+ fi
+
+ # This is still to handle the "*:*" special case
+ if [[ -n "$prefix" ]]; then
+ local reply_n=${#COMPREPLY[*]}
+ for (( i = 0; i < $reply_n; i++)); do
+ COMPREPLY[$i]=${COMPREPLY[i]##${prefix}:}
+ done
+ fi
+ ;;
+ -p)
+ local plugins=$(trace-cmd list -p)
+ COMPREPLY=( $(compgen -W "${plugins}" -- "${cur}" ) )
+ ;;
+ -l|-n|-g)
+ # This is extremely slow still (may take >1sec).
+ local funcs=$(trace-cmd list -f | sed 's/ .*//')
+ COMPREPLY=( $(compgen -W "${funcs}" -- "${cur}") )
+ ;;
+ -B)
+ show_instances "$cur"
+ ;;
+ -O)
+ show_options "$cur"
+ ;;
+ -A)
+ if ! show_virt "$cur"; then
+ cmd_options record "$cur"
+ fi
+ ;;
+ --compression)
+ compression_param
+ ;;
+ *)
+ # stream start and profile do not show all options
+ cmd_options record "$cur"
+ ;;
+ esac
+}
+
+__trace_cmd_report_complete()
+{
+ local prev=$1
+ local cur=$2
+ shift 2
+ local words=("$@")
+
+ case "$prev" in
+ -O)
+ plugin_options "$cur"
+ ;;
+ *)
+ cmd_options report "$cur"
+ ;;
+ esac
+}
+
+__trace_cmd_dump_complete()
+{
+ local prev=$1
+ local cur=$2
+ shift 2
+ local words=("$@")
+
+ case "$prev" in
+ -i)
+ __show_files
+ ;;
+ *)
+ cmd_options dump "$cur"
+ ;;
+ esac
+}
+
+__trace_cmd_convert_complete()
+{
+ local prev=$1
+ local cur=$2
+ shift 2
+ local words=("$@")
+
+ case "$prev" in
+ -i)
+ __show_files
+ ;;
+ -o)
+ __show_files
+ ;;
+ --compression)
+ compression_param
+ ;;
+ *)
+ cmd_options convert "$cur"
+ ;;
+ esac
+}
+
+__show_command_options()
+{
+ local command="$1"
+ local prev="$2"
+ local cur="$3"
+ local cmds=( $(trace-cmd --help 2>/dev/null | \
+ grep " - " | sed 's/^ *//; s/ -.*//') )
+
+ for cmd in ${cmds[@]}; do
+ if [ $cmd == "$command" ]; then
+ local opts=$(trace-cmd $cmd -h 2>/dev/null|grep "^ *-" | \
+ sed -e 's/ *\(-[^ ]*\).*/\1/')
+ if [ "$prev" == "-B" ]; then
+ for opt in ${opts[@]}; do
+ if [ "$opt" == "-B" ]; then
+ show_instances "$cur"
+ return 0
+ fi
+ done
+ fi
+ COMPREPLY=( $(compgen -W "${opts}" -- "$cur"))
+ break
+ fi
+ done
+ if [ ${#COMPREPLY[@]} -eq 0 ]; then
+ __show_files "${cur}"
+ fi
+}
+
+_trace_cmd_complete()
+{
+ local cur=""
+ local prev=""
+ local words=()
+
+ # Not to use COMP_WORDS to avoid buggy behavior of Bash when
+ # handling with words including ":", like:
+ #
+ # prev="${COMP_WORDS[COMP_CWORD-1]}"
+ # cur="${COMP_WORDS[COMP_CWORD]}"
+ #
+ # Instead, we use _get_comp_words_by_ref() magic.
+ _get_comp_words_by_ref -n : cur prev words
+
+ if [ "$prev" == "trace-cmd" ]; then
+ local cmds=$(trace-cmd --help 2>/dev/null | \
+ grep " - " | sed 's/^ *//; s/ -.*//')
+ COMPREPLY=( $(compgen -W "${cmds}" -- "${cur}") )
+ return;
+ fi
+
+ local w="${words[1]}"
+
+ case "$w" in
+ list)
+ __trace_cmd_list_complete "${prev}" "${cur}" ${words[@]}
+ return 0
+ ;;
+ show)
+ __trace_cmd_show_complete "${prev}" "${cur}" ${words[@]}
+ return 0
+ ;;
+ extract)
+ __trace_cmd_extract_complete "${prev}" "${cur}" ${words[@]}
+ return 0
+ ;;
+ record|stream|start|profile)
+ __trace_cmd_record_complete "${prev}" "${cur}" ${words[@]}
+ return 0
+ ;;
+ report)
+ __trace_cmd_report_complete "${prev}" "${cur}" ${words[@]}
+ return 0
+ ;;
+ dump)
+ __trace_cmd_dump_complete "${prev}" "${cur}" ${words[@]}
+ return 0
+ ;;
+ convert)
+ __trace_cmd_convert_complete "${prev}" "${cur}" ${words[@]}
+ return 0
+ ;;
+ *)
+ __show_command_options "$w" "${prev}" "${cur}"
+ ;;
+ esac
+}
+complete -F _trace_cmd_complete trace-cmd
diff --git a/tracecmd/trace-cmd.c b/tracecmd/trace-cmd.c
new file mode 100644
index 00000000..69800d26
--- /dev/null
+++ b/tracecmd/trace-cmd.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2008, 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#include <stdio.h>
+#include <string.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+
+#include "trace-local.h"
+
+int silence_warnings;
+int show_status;
+
+#ifndef gettid
+#define gettid() syscall(__NR_gettid)
+#endif
+
+void warning(const char *fmt, ...)
+{
+ va_list ap;
+
+ if (silence_warnings)
+ return;
+
+ if (errno)
+ perror("trace-cmd");
+ errno = 0;
+
+ va_start(ap, fmt);
+ fprintf(stderr, " ");
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+
+ fprintf(stderr, "\n");
+}
+
+void *malloc_or_die(unsigned int size)
+{
+ void *data;
+
+ data = malloc(size);
+ if (!data)
+ die("malloc");
+ return data;
+}
+
+void tracecmd_debug(const char *fmt, ...)
+{
+ va_list ap;
+
+ if (!tracecmd_get_debug())
+ return;
+
+ va_start(ap, fmt);
+ printf("[%d] ", (int)gettid());
+ vprintf(fmt, ap);
+ va_end(ap);
+}
+
+static struct trace_log_severity {
+ int id;
+ const char *name;
+} log_severity[] = {
+ { .id = TEP_LOG_NONE, .name = "none" },
+ { .id = TEP_LOG_CRITICAL, .name = "crit" },
+ { .id = TEP_LOG_ERROR, .name = "err" },
+ { .id = TEP_LOG_WARNING, .name = "warn" },
+ { .id = TEP_LOG_INFO, .name = "info" },
+ { .id = TEP_LOG_DEBUG, .name = "debug" },
+ { .id = TEP_LOG_ALL, .name = "all" },
+};
+
+int trace_set_verbose(char *level)
+{
+ int id;
+
+ /* Default level is info */
+ if (!level)
+ level = "info";
+
+ if (isdigit(level[0])) {
+ id = atoi(level);
+ if (id >= TEP_LOG_NONE) {
+ if (id > TEP_LOG_ALL)
+ id = TEP_LOG_ALL;
+ tracecmd_set_loglevel(id);
+ return 0;
+ }
+ } else {
+ int size = ARRAY_SIZE(log_severity);
+ int i;
+
+ for (i = 0; i < size; i++) {
+ if (!strncmp(level, log_severity[i].name, strlen(log_severity[i].name))) {
+ tracecmd_set_loglevel(log_severity[i].id);
+ return 0;
+ }
+ }
+ }
+
+ return -1;
+}
+
+/**
+ * struct command
+ * @name command name
+ * @run function to execute on command `name`
+ */
+struct command {
+ char *name;
+ void (*run)(int argc, char **argv);
+};
+
+
+/**
+ * Lookup table that maps command names to functions
+ */
+struct command commands[] = {
+ {"report", trace_report},
+ {"snapshot", trace_snapshot},
+ {"hist", trace_hist},
+ {"mem", trace_mem},
+ {"listen", trace_listen},
+ {"agent", trace_agent},
+ {"setup-guest", trace_setup_guest},
+ {"split", trace_split},
+ {"restore", trace_restore},
+ {"stack", trace_stack},
+ {"check-events", trace_check_events},
+ {"record", trace_record},
+ {"start", trace_start},
+ {"set", trace_set},
+ {"extract", trace_extract},
+ {"stop", trace_stop},
+ {"stream", trace_stream},
+ {"profile", trace_profile},
+ {"restart", trace_restart},
+ {"clear", trace_clear},
+ {"reset", trace_reset},
+ {"stat", trace_stat},
+ {"options", trace_option},
+ {"show", trace_show},
+ {"list", trace_list},
+ {"help", trace_usage},
+ {"dump", trace_dump},
+ {"convert", trace_convert},
+ {"-h", trace_usage},
+};
+
+int main (int argc, char **argv)
+{
+ int i;
+
+ errno = 0;
+
+ if (argc < 2)
+ trace_usage(argc, argv);
+
+ for (i = 0; i < ARRAY_SIZE(commands); ++i) {
+ if (strcmp(argv[1], commands[i].name) == 0 ){
+ commands[i].run(argc, argv);
+ goto out;
+ }
+ }
+
+ /* No valid command found, show help */
+ trace_usage(argc, argv);
+out:
+ exit(0);
+}
diff --git a/tracecmd/trace-convert.c b/tracecmd/trace-convert.c
new file mode 100644
index 00000000..88935dc7
--- /dev/null
+++ b/tracecmd/trace-convert.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021, VMware, Tzvetomir Stoyanov <tz.stoyanov@gmail.com>
+ */
+#include <stdlib.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <fcntl.h>
+#include <errno.h>
+
+#include "trace-local.h"
+#include "trace-cmd.h"
+#include "trace-cmd-private.h"
+
+static void convert_file(const char *in, const char *out, int file_version, char *compr)
+{
+ struct tracecmd_input *ihandle;
+ struct tracecmd_output *ohandle;
+
+ ihandle = tracecmd_open_head(in, 0);
+ if (!ihandle)
+ die("error reading %s", in);
+
+ ohandle = tracecmd_copy(ihandle, out, TRACECMD_FILE_CPU_FLYRECORD, file_version, compr);
+ if (!ohandle)
+ die("error writing %s", out);
+
+ tracecmd_output_close(ohandle);
+ tracecmd_close(ihandle);
+}
+
+enum {
+ OPT_file_version = 254,
+ OPT_compression = 255,
+};
+
+void trace_convert(int argc, char **argv)
+{
+ char *input_file = NULL;
+ char *output_file = NULL;
+ char *compression = NULL;
+ int file_version = tracecmd_default_file_version();
+ int c;
+
+ if (argc < 2)
+ usage(argv);
+
+ if (strcmp(argv[1], "convert") != 0)
+ usage(argv);
+
+ for (;;) {
+ int option_index = 0;
+ static struct option long_options[] = {
+ {"compression", required_argument, NULL, OPT_compression},
+ {"file-version", required_argument, NULL, OPT_file_version},
+ {"help", no_argument, NULL, '?'},
+ {NULL, 0, NULL, 0}
+ };
+
+ c = getopt_long (argc-1, argv+1, "+hi:o:", long_options, &option_index);
+ if (c == -1)
+ break;
+ switch (c) {
+ case 'i':
+ if (input_file)
+ die("Only one input file is supported, %s already set",
+ input_file);
+ input_file = optarg;
+ break;
+ case 'o':
+ if (output_file)
+ die("Only one output file is supported, %s already set",
+ output_file);
+ output_file = optarg;
+ break;
+ case OPT_compression:
+ if (strcmp(optarg, "any") && strcmp(optarg, "none") &&
+ !tracecmd_compress_is_supported(optarg, NULL))
+ die("Compression algorithm %s is not supported", optarg);
+ compression = optarg;
+ break;
+ case OPT_file_version:
+ file_version = atoi(optarg);
+ if (file_version < FILE_VERSION_MIN || file_version > FILE_VERSION_MAX)
+ die("Unsupported file version %d, "
+ "supported versions are from %d to %d",
+ file_version, FILE_VERSION_MIN, FILE_VERSION_MAX);
+
+ break;
+ case 'h':
+ case '?':
+ default:
+ usage(argv);
+ }
+ }
+
+ if ((argc - optind) >= 2) {
+ if (output_file)
+ usage(argv);
+ output_file = argv[optind + 1];
+ }
+
+ if (!input_file)
+ input_file = DEFAULT_INPUT_FILE;
+ if (!output_file)
+ usage(argv);
+
+ convert_file(input_file, output_file, file_version, compression);
+}
diff --git a/tracecmd/trace-dump.c b/tracecmd/trace-dump.c
new file mode 100644
index 00000000..22e3d871
--- /dev/null
+++ b/tracecmd/trace-dump.c
@@ -0,0 +1,1355 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * Updates:
+ * Copyright (C) 2019, VMware, Tzvetomir Stoyanov <tz.stoyanov@gmail.com>
+ */
+#include <stdlib.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+
+#include "trace-local.h"
+
+#define TRACING_STR "tracing"
+#define HEAD_PAGE_STR "header_page"
+#define HEAD_PAGE_EVENT "header_event"
+#define HEAD_OPTIONS "options "
+#define HEAD_LATENCY "latency "
+#define HEAD_FLYRECORD "flyrecord"
+
+#define DUMP_SIZE 1024
+
+static struct tep_handle *tep;
+static unsigned int trace_cpus;
+static int has_clock;
+static unsigned long file_version;
+static bool read_compress;
+static struct tracecmd_compression *compress;
+static char *meta_strings;
+static int meta_strings_size;
+
+enum dump_items {
+ SUMMARY = (1 << 0),
+ HEAD_PAGE = (1 << 1),
+ HEAD_EVENT = (1 << 2),
+ FTRACE_FORMAT = (1 << 3),
+ EVENT_SYSTEMS = (1 << 4),
+ EVENT_FORMAT = (1 << 5),
+ KALLSYMS = (1 << 6),
+ TRACE_PRINTK = (1 << 7),
+ CMDLINES = (1 << 8),
+ OPTIONS = (1 << 9),
+ FLYRECORD = (1 << 10),
+ CLOCK = (1 << 11),
+ SECTIONS = (1 << 12),
+ STRINGS = (1 << 13),
+};
+
+struct file_section {
+ int id;
+ unsigned long long offset;
+ struct file_section *next;
+ enum dump_items verbosity;
+};
+
+static struct file_section *sections;
+
+enum dump_items verbosity;
+
+#define DUMP_CHECK(X) ((X) & verbosity)
+
+#define do_print(ids, fmt, ...) \
+ do { \
+ if (!(ids) || DUMP_CHECK(ids)) \
+ tracecmd_plog(fmt, ##__VA_ARGS__); \
+ } while (0)
+
+static int read_fd(int fd, char *dst, int len)
+{
+ size_t size = 0;
+ int r;
+
+ do {
+ r = read(fd, dst+size, len);
+ if (r > 0) {
+ size += r;
+ len -= r;
+ } else
+ break;
+ } while (r > 0);
+
+ if (len)
+ return -1;
+ return size;
+}
+
+static int read_compressed(int fd, char *dst, int len)
+{
+
+ if (read_compress)
+ return tracecmd_compress_buffer_read(compress, dst, len);
+
+ return read_fd(fd, dst, len);
+}
+
+static int do_lseek(int fd, int offset, int whence)
+{
+ if (read_compress)
+ return tracecmd_compress_lseek(compress, offset, whence);
+
+ return lseek64(fd, offset, whence);
+}
+
+static int read_file_string(int fd, char *dst, int len)
+{
+ size_t size = 0;
+ int r;
+
+ do {
+ r = read_compressed(fd, dst+size, 1);
+ if (r > 0) {
+ size++;
+ len--;
+ } else
+ break;
+ if (!dst[size - 1])
+ break;
+ } while (r > 0 && len);
+
+ if (!size || dst[size - 1])
+ return -1;
+ return 0;
+}
+
+static int read_file_bytes(int fd, char *dst, int len)
+{
+ int ret;
+
+ ret = read_compressed(fd, dst, len);
+ return ret < 0 ? ret : 0;
+}
+
+static void read_dump_string(int fd, int size, enum dump_items id)
+{
+ char buf[DUMP_SIZE];
+ int lsize;
+
+ while (size) {
+ lsize = (size < DUMP_SIZE) ? size : DUMP_SIZE - 1;
+ if (read_file_bytes(fd, buf, lsize))
+ die("cannot read %d bytes", lsize);
+ buf[lsize] = 0;
+ do_print(id, "%s", buf);
+ size -= lsize;
+ }
+
+ do_print(id, "\n");
+}
+
+static int read_file_number(int fd, void *digit, int size)
+{
+ unsigned long long val;
+ char buf[8];
+
+ if (size > 8)
+ return -1;
+
+ if (read_file_bytes(fd, buf, size))
+ return -1;
+
+ val = tep_read_number(tep, buf, size);
+ switch (size) {
+ case 1:
+ *((char *)digit) = val;
+ break;
+ case 2:
+ *((unsigned short *)digit) = val;
+ break;
+ case 4:
+ *((unsigned int *)digit) = val;
+ break;
+ case 8:
+ *((unsigned long long *)digit) = val;
+ break;
+ default:
+ return -1;
+ }
+
+ return 0;
+}
+
+static const char *get_metadata_string(int offset)
+{
+ if (!meta_strings || offset < 0 || meta_strings_size <= offset)
+ return NULL;
+
+ return meta_strings + offset;
+}
+
+static void dump_initial_format(int fd)
+{
+ char magic[] = TRACECMD_MAGIC;
+ char buf[DUMP_SIZE];
+ int val4;
+
+ do_print(SUMMARY, "\t[Initial format]\n");
+
+ /* check initial bytes */
+ if (read_file_bytes(fd, buf, sizeof(magic)))
+ die("cannot read %zu bytes magic", sizeof(magic));
+ if (memcmp(buf, magic, sizeof(magic)) != 0)
+ die("wrong file magic");
+
+ /* check initial tracing string */
+ if (read_file_bytes(fd, buf, strlen(TRACING_STR)))
+ die("cannot read %zu bytes tracing string", strlen(TRACING_STR));
+ buf[strlen(TRACING_STR)] = 0;
+ if (strncmp(buf, TRACING_STR, strlen(TRACING_STR)) != 0)
+ die("wrong tracing string: %s", buf);
+
+ /* get file version */
+ if (read_file_string(fd, buf, DUMP_SIZE))
+ die("no version string");
+
+ do_print(SUMMARY, "\t\t%s\t[Version]\n", buf);
+ file_version = strtol(buf, NULL, 10);
+ if (!file_version && errno)
+ die("Invalid file version string %s", buf);
+ if (!tracecmd_is_version_supported(file_version))
+ die("Unsupported file version %lu", file_version);
+
+ /* get file endianness*/
+ if (read_file_bytes(fd, buf, 1))
+ die("cannot read file endianness");
+ do_print(SUMMARY, "\t\t%d\t[%s endian]\n", buf[0], buf[0]?"Big":"Little");
+
+ tep_set_file_bigendian(tep, buf[0]);
+ tep_set_local_bigendian(tep, tracecmd_host_bigendian());
+
+ /* get file bytes per long*/
+ if (read_file_bytes(fd, buf, 1))
+ die("cannot read file bytes per long");
+ do_print(SUMMARY, "\t\t%d\t[Bytes in a long]\n", buf[0]);
+
+ if (read_file_number(fd, &val4, 4))
+ die("cannot read file page size");
+ do_print(SUMMARY, "\t\t%d\t[Page size, bytes]\n", val4);
+}
+
+static void dump_compress(int fd)
+{
+ char zname[DUMP_SIZE];
+ char zver[DUMP_SIZE];
+
+ if (file_version < FILE_VERSION_COMPRESSION)
+ return;
+
+ /* get compression header */
+ if (read_file_string(fd, zname, DUMP_SIZE))
+ die("no compression header");
+
+ if (read_file_string(fd, zver, DUMP_SIZE))
+ die("no compression version");
+
+ do_print((SUMMARY), "\t\t%s\t[Compression algorithm]\n", zname);
+ do_print((SUMMARY), "\t\t%s\t[Compression version]\n", zver);
+
+ if (strcmp(zname, "none")) {
+ compress = tracecmd_compress_alloc(zname, zver, fd, tep, NULL);
+ if (!compress)
+ die("cannot uncompress the file");
+ }
+}
+
+static void dump_header_page(int fd)
+{
+ unsigned long long size;
+ char buf[DUMP_SIZE];
+
+ do_print((SUMMARY | HEAD_PAGE), "\t[Header page, ");
+
+ /* check header string */
+ if (read_file_bytes(fd, buf, strlen(HEAD_PAGE_STR) + 1))
+ die("cannot read %zu bytes header string", strlen(HEAD_PAGE_STR));
+ if (strncmp(buf, HEAD_PAGE_STR, strlen(HEAD_PAGE_STR)) != 0)
+ die("wrong header string: %s", buf);
+
+ if (read_file_number(fd, &size, 8))
+ die("cannot read the size of the page header information");
+
+ do_print((SUMMARY | HEAD_PAGE), "%lld bytes]\n", size);
+
+ read_dump_string(fd, size, HEAD_PAGE);
+}
+
+static void dump_header_event(int fd)
+{
+ unsigned long long size;
+ char buf[DUMP_SIZE];
+
+ do_print((SUMMARY | HEAD_EVENT), "\t[Header event, ");
+
+ /* check header string */
+ if (read_file_bytes(fd, buf, strlen(HEAD_PAGE_EVENT) + 1))
+ die("cannot read %zu bytes header string", strlen(HEAD_PAGE_EVENT));
+ if (strncmp(buf, HEAD_PAGE_EVENT, strlen(HEAD_PAGE_EVENT)) != 0)
+ die("wrong header string: %s", buf);
+
+ if (read_file_number(fd, &size, 8))
+ die("cannot read the size of the page header information");
+
+ do_print((SUMMARY | HEAD_EVENT), "%lld bytes]\n", size);
+
+ read_dump_string(fd, size, HEAD_EVENT);
+}
+
+static void uncompress_reset(void)
+{
+ if (compress && file_version >= FILE_VERSION_COMPRESSION) {
+ read_compress = false;
+ tracecmd_compress_reset(compress);
+ }
+}
+
+static int uncompress_block(void)
+{
+ int ret = 0;
+
+ if (compress && file_version >= FILE_VERSION_COMPRESSION) {
+ ret = tracecmd_uncompress_block(compress);
+ if (!ret)
+ read_compress = true;
+
+ }
+
+ return ret;
+}
+
+static void dump_ftrace_events_format(int fd)
+{
+ unsigned long long size;
+ unsigned int count;
+
+ do_print((SUMMARY | FTRACE_FORMAT), "\t[Ftrace format, ");
+ if (read_file_number(fd, &count, 4))
+ die("cannot read the count of the ftrace events");
+
+ do_print((SUMMARY | FTRACE_FORMAT), "%d events]\n", count);
+
+ while (count) {
+ if (read_file_number(fd, &size, 8))
+ die("cannot read the size of the %d ftrace event", count);
+ read_dump_string(fd, size, FTRACE_FORMAT);
+ count--;
+ }
+}
+
+static void dump_events_format(int fd)
+{
+ unsigned long long size;
+ unsigned int systems;
+ unsigned int events;
+ char buf[DUMP_SIZE];
+
+ do_print((SUMMARY | EVENT_FORMAT | EVENT_SYSTEMS), "\t[Events format, ");
+
+ if (read_file_number(fd, &systems, 4))
+ die("cannot read the count of the event systems");
+
+ do_print((SUMMARY | EVENT_FORMAT | EVENT_SYSTEMS), "%d systems]\n", systems);
+
+ while (systems) {
+
+ if (read_file_string(fd, buf, DUMP_SIZE))
+ die("cannot read the name of the %dth system", systems);
+ if (read_file_number(fd, &events, 4))
+ die("cannot read the count of the events in system %s",
+ buf);
+ do_print(EVENT_SYSTEMS, "\t\t%s %d [system, events]\n", buf, events);
+ while (events) {
+ if (read_file_number(fd, &size, 8))
+ die("cannot read the format size of the %dth event from system %s",
+ events, buf);
+ read_dump_string(fd, size, EVENT_FORMAT);
+ events--;
+ }
+ systems--;
+ }
+}
+
+static void dump_kallsyms(int fd)
+{
+ unsigned int size;
+
+ do_print((SUMMARY | KALLSYMS), "\t[Kallsyms, ");
+
+ if (read_file_number(fd, &size, 4))
+ die("cannot read the size of the kallsyms");
+
+ do_print((SUMMARY | KALLSYMS), "%d bytes]\n", size);
+
+ read_dump_string(fd, size, KALLSYMS);
+}
+
+static void dump_printk(int fd)
+{
+ unsigned int size;
+
+ do_print((SUMMARY | TRACE_PRINTK), "\t[Trace printk, ");
+
+ if (read_file_number(fd, &size, 4))
+ die("cannot read the size of the trace printk");
+
+ do_print((SUMMARY | TRACE_PRINTK), "%d bytes]\n", size);
+
+ read_dump_string(fd, size, TRACE_PRINTK);
+}
+
+static void dump_cmdlines(int fd)
+{
+ unsigned long long size;
+
+ do_print((SUMMARY | CMDLINES), "\t[Saved command lines, ");
+
+ if (read_file_number(fd, &size, 8))
+ die("cannot read the size of the saved command lines");
+
+ do_print((SUMMARY | CMDLINES), "%d bytes]\n", size);
+
+ read_dump_string(fd, size, CMDLINES);
+}
+
+static void dump_cpus_count(int fd)
+{
+ if (read_file_number(fd, &trace_cpus, 4))
+ die("cannot read the cpu count");
+
+ do_print(SUMMARY, "\t%d [CPUs with tracing data]\n", trace_cpus);
+}
+
+static void dump_option_string(int fd, int size, char *desc)
+{
+ do_print(OPTIONS, "\t\t[Option %s, %d bytes]\n", desc, size);
+ if (size)
+ read_dump_string(fd, size, OPTIONS);
+}
+
+static void dump_section_header(int fd, enum dump_items v, unsigned short *flags)
+{
+ unsigned long long offset, size;
+ unsigned short fl;
+ unsigned short id;
+ const char *desc;
+ int desc_id;
+
+ offset = lseek64(fd, 0, SEEK_CUR);
+ if (read_file_number(fd, &id, 2))
+ die("cannot read the section id");
+
+ if (read_file_number(fd, &fl, 2))
+ die("cannot read the section flags");
+
+ if (read_file_number(fd, &desc_id, 4))
+ die("no section description");
+
+ desc = get_metadata_string(desc_id);
+ if (!desc)
+ desc = "Unknown";
+
+ if (read_file_number(fd, &size, 8))
+ die("cannot read section size");
+
+ do_print(v, "\t[Section %d @ %lld: \"%s\", flags 0x%X, %lld bytes]\n",
+ id, offset, desc, fl, size);
+
+ if (flags)
+ *flags = fl;
+}
+
+static void dump_option_buffer(int fd, unsigned short option, int size)
+{
+ unsigned long long total_size = 0;
+ unsigned long long data_size;
+ unsigned long long current;
+ unsigned long long offset;
+ unsigned short flags;
+ char clock[DUMP_SIZE];
+ char name[DUMP_SIZE];
+ int page_size;
+ int cpus = 0;
+ int id;
+ int i;
+
+ if (size < 8)
+ die("broken buffer option with size %d", size);
+
+ if (read_file_number(fd, &offset, 8))
+ die("cannot read the offset of the buffer option");
+
+ if (read_file_string(fd, name, DUMP_SIZE))
+ die("cannot read the name of the buffer option");
+
+ if (file_version < FILE_VERSION_SECTIONS) {
+ do_print(OPTIONS|FLYRECORD, "\t\t[Option BUFFER, %d bytes]\n", size);
+ do_print(OPTIONS|FLYRECORD, "%lld [offset]\n", offset);
+ do_print(OPTIONS|FLYRECORD, "\"%s\" [name]\n", name);
+ return;
+ }
+
+ current = lseek64(fd, 0, SEEK_CUR);
+ if (lseek64(fd, offset, SEEK_SET) == (off_t)-1)
+ die("cannot goto buffer offset %lld", offset);
+
+ dump_section_header(fd, FLYRECORD, &flags);
+
+ if (lseek64(fd, current, SEEK_SET) == (off_t)-1)
+ die("cannot go back to buffer option");
+
+ do_print(OPTIONS|FLYRECORD, "\t\t[Option BUFFER, %d bytes]\n", size);
+ do_print(OPTIONS|FLYRECORD, "%lld [offset]\n", offset);
+ do_print(OPTIONS|FLYRECORD, "\"%s\" [name]\n", name);
+
+ if (read_file_string(fd, clock, DUMP_SIZE))
+ die("cannot read clock of the buffer option");
+
+ do_print(OPTIONS|FLYRECORD, "\"%s\" [clock]\n", clock);
+ if (option == TRACECMD_OPTION_BUFFER) {
+ if (read_file_number(fd, &page_size, 4))
+ die("cannot read the page size of the buffer option");
+ do_print(OPTIONS|FLYRECORD, "%d [Page size, bytes]\n", page_size);
+
+ if (read_file_number(fd, &cpus, 4))
+ die("cannot read the cpu count of the buffer option");
+
+ do_print(OPTIONS|FLYRECORD, "%d [CPUs]:\n", cpus);
+ for (i = 0; i < cpus; i++) {
+ if (read_file_number(fd, &id, 4))
+ die("cannot read the id of cpu %d from the buffer option", i);
+
+ if (read_file_number(fd, &offset, 8))
+ die("cannot read the offset of cpu %d from the buffer option", i);
+
+ if (read_file_number(fd, &data_size, 8))
+ die("cannot read the data size of cpu %d from the buffer option", i);
+
+ total_size += data_size;
+ do_print(OPTIONS|FLYRECORD, " %d %lld\t%lld\t[id, data offset and size]\n",
+ id, offset, data_size);
+ }
+ do_print(SUMMARY, "\t\[buffer \"%s\", \"%s\" clock, %d page size, "
+ "%d cpus, %lld bytes flyrecord data]\n",
+ name, clock, page_size, cpus, total_size);
+ } else {
+ do_print(SUMMARY, "\t\[buffer \"%s\", \"%s\" clock, latency data]\n", name, clock);
+ }
+
+}
+
+static void dump_option_int(int fd, int size, char *desc)
+{
+ int val;
+
+ do_print(OPTIONS, "\t\t[Option %s, %d bytes]\n", desc, size);
+ read_file_number(fd, &val, size);
+ do_print(OPTIONS, "%d\n", val);
+}
+
+static void dump_option_xlong(int fd, int size, char *desc)
+{
+ long long val;
+
+ do_print(OPTIONS, "\t\t[Option %s, %d bytes]\n", desc, size);
+ read_file_number(fd, &val, size);
+ do_print(OPTIONS, "0x%llX\n", val);
+}
+
+struct time_shift_cpu {
+ unsigned int count;
+ long long *scalings;
+ long long *frac;
+ long long *offsets;
+ unsigned long long *times;
+};
+
+static void dump_option_timeshift(int fd, int size)
+{
+ struct time_shift_cpu *cpus_data;
+ long long trace_id;
+ unsigned int flags;
+ unsigned int cpus;
+ int i, j;
+
+ /*
+ * long long int (8 bytes) trace session ID
+ * int (4 bytes) count of timestamp offsets.
+ * long long array of size [count] of times,
+ * when the offsets were calculated.
+ * long long array of size [count] of timestamp offsets.
+ */
+ if (size < 12) {
+ do_print(OPTIONS, "Broken time shift option, size %s", size);
+ return;
+ }
+ do_print(OPTIONS, "\t\t[Option TimeShift, %d bytes]\n", size);
+ read_file_number(fd, &trace_id, 8);
+ size -= 8;
+ do_print(OPTIONS, "0x%llX [peer's trace id]\n", trace_id);
+ read_file_number(fd, &flags, 4);
+ size -= 4;
+ do_print(OPTIONS, "0x%llX [peer's protocol flags]\n", flags);
+ read_file_number(fd, &cpus, 4);
+ size -= 4;
+ do_print(OPTIONS, "0x%llX [peer's CPU count]\n", cpus);
+ cpus_data = calloc(cpus, sizeof(struct time_shift_cpu));
+ if (!cpus_data)
+ return;
+ for (j = 0; j < cpus; j++) {
+ if (size < 4)
+ goto out;
+ read_file_number(fd, &cpus_data[j].count, 4);
+ size -= 4;
+ do_print(OPTIONS, "%lld [samples count for CPU %d]\n", cpus_data[j].count, j);
+ cpus_data[j].times = calloc(cpus_data[j].count, sizeof(long long));
+ cpus_data[j].offsets = calloc(cpus_data[j].count, sizeof(long long));
+ cpus_data[j].scalings = calloc(cpus_data[j].count, sizeof(long long));
+ cpus_data[j].frac = calloc(cpus_data[j].count, sizeof(long long));
+ if (!cpus_data[j].times || !cpus_data[j].offsets ||
+ !cpus_data[j].scalings || !cpus_data[j].frac)
+ goto out;
+ for (i = 0; i < cpus_data[j].count; i++) {
+ if (size < 8)
+ goto out;
+ read_file_number(fd, cpus_data[j].times + i, 8);
+ size -= 8;
+ }
+ for (i = 0; i < cpus_data[j].count; i++) {
+ if (size < 8)
+ goto out;
+ read_file_number(fd, cpus_data[j].offsets + i, 8);
+ size -= 8;
+ }
+ for (i = 0; i < cpus_data[j].count; i++) {
+ if (size < 8)
+ goto out;
+ read_file_number(fd, cpus_data[j].scalings + i, 8);
+ size -= 8;
+ }
+ }
+
+ if (size > 0) {
+ for (j = 0; j < cpus; j++) {
+ if (!cpus_data[j].frac)
+ goto out;
+ for (i = 0; i < cpus_data[j].count; i++) {
+ if (size < 8)
+ goto out;
+ read_file_number(fd, cpus_data[j].frac + i, 8);
+ size -= 8;
+ }
+ }
+ }
+
+ for (j = 0; j < cpus; j++) {
+ for (i = 0; i < cpus_data[j].count; i++)
+ do_print(OPTIONS, "\t%lld %lld %llu %llu[offset * scaling >> fraction @ time]\n",
+ cpus_data[j].offsets[i], cpus_data[j].scalings[i],
+ cpus_data[j].frac[i], cpus_data[j].times[i]);
+
+ }
+
+out:
+ if (j < cpus)
+ do_print(OPTIONS, "Broken time shift option\n");
+ for (j = 0; j < cpus; j++) {
+ free(cpus_data[j].times);
+ free(cpus_data[j].offsets);
+ free(cpus_data[j].scalings);
+ free(cpus_data[j].frac);
+ }
+ free(cpus_data);
+}
+
+void dump_option_guest(int fd, int size)
+{
+ unsigned long long trace_id;
+ char *buf, *p;
+ int cpu, pid;
+ int cpus;
+ int i;
+
+ do_print(OPTIONS, "\t\t[Option GUEST, %d bytes]\n", size);
+
+ /*
+ * Guest name, null terminated string
+ * long long (8 bytes) trace-id
+ * int (4 bytes) number of guest CPUs
+ * array of size number of guest CPUs:
+ * int (4 bytes) Guest CPU id
+ * int (4 bytes) Host PID, running the guest CPU
+ */
+ buf = calloc(1, size);
+ if (!buf)
+ return;
+ if (read_file_bytes(fd, buf, size))
+ goto out;
+
+ p = buf;
+ do_print(OPTIONS, "%s [Guest name]\n", p);
+ size -= strlen(buf) + 1;
+ p += strlen(buf) + 1;
+
+ if (size < sizeof(long long))
+ goto out;
+ trace_id = tep_read_number(tep, p, sizeof(long long));
+ size -= sizeof(long long);
+ p += sizeof(long long);
+ do_print(OPTIONS, "0x%llX [trace id]\n", trace_id);
+
+ if (size < sizeof(int))
+ goto out;
+ cpus = tep_read_number(tep, p, sizeof(int));
+ size -= sizeof(int);
+ p += sizeof(int);
+ do_print(OPTIONS, "%d [Guest CPUs]\n", cpus);
+
+ for (i = 0; i < cpus; i++) {
+ if (size < 2 * sizeof(int))
+ goto out;
+ cpu = tep_read_number(tep, p, sizeof(int));
+ size -= sizeof(int);
+ p += sizeof(int);
+ pid = tep_read_number(tep, p, sizeof(int));
+ size -= sizeof(int);
+ p += sizeof(int);
+ do_print(OPTIONS, " %d %d [guest cpu, host pid]\n", cpu, pid);
+ }
+
+out:
+ free(buf);
+}
+
+void dump_option_tsc2nsec(int fd, int size)
+{
+ int mult, shift;
+ unsigned long long offset;
+
+ do_print(OPTIONS, "\n\t\t[Option TSC2NSEC, %d bytes]\n", size);
+
+ if (read_file_number(fd, &mult, 4))
+ die("cannot read tsc2nsec multiplier");
+ if (read_file_number(fd, &shift, 4))
+ die("cannot read tsc2nsec shift");
+ if (read_file_number(fd, &offset, 8))
+ die("cannot read tsc2nsec offset");
+ do_print(OPTIONS, "%d %d %llu [multiplier, shift, offset]\n", mult, shift, offset);
+}
+
+static void dump_option_section(int fd, unsigned int size,
+ unsigned short id, char *desc, enum dump_items v)
+{
+ struct file_section *sec;
+
+ sec = calloc(1, sizeof(struct file_section));
+ if (!sec)
+ die("cannot allocate new section");
+
+ sec->next = sections;
+ sections = sec;
+ sec->id = id;
+ sec->verbosity = v;
+ if (read_file_number(fd, &sec->offset, 8))
+ die("cannot read the option %d offset", id);
+
+ do_print(OPTIONS, "\t\t[Option %s, %d bytes] @ %lld\n", desc, size, sec->offset);
+}
+
+static void dump_sections(int fd, int count)
+{
+ struct file_section *sec = sections;
+ unsigned short flags;
+
+ while (sec) {
+ if (lseek64(fd, sec->offset, SEEK_SET) == (off_t)-1)
+ die("cannot goto option offset %lld", sec->offset);
+
+ dump_section_header(fd, sec->verbosity, &flags);
+
+ if ((flags & TRACECMD_SEC_FL_COMPRESS) && uncompress_block())
+ die("cannot uncompress section block");
+
+ switch (sec->id) {
+ case TRACECMD_OPTION_HEADER_INFO:
+ dump_header_page(fd);
+ dump_header_event(fd);
+ break;
+ case TRACECMD_OPTION_FTRACE_EVENTS:
+ dump_ftrace_events_format(fd);
+ break;
+ case TRACECMD_OPTION_EVENT_FORMATS:
+ dump_events_format(fd);
+ break;
+ case TRACECMD_OPTION_KALLSYMS:
+ dump_kallsyms(fd);
+ break;
+ case TRACECMD_OPTION_PRINTK:
+ dump_printk(fd);
+ break;
+ case TRACECMD_OPTION_CMDLINES:
+ dump_cmdlines(fd);
+ break;
+ }
+ uncompress_reset();
+ sec = sec->next;
+ }
+ do_print(SUMMARY|SECTIONS, "\t[%d sections]\n", count);
+}
+
+static int dump_options_read(int fd);
+
+static int dump_option_done(int fd, int size)
+{
+ unsigned long long offset;
+
+ do_print(OPTIONS, "\t\t[Option DONE, %d bytes]\n", size);
+
+ if (file_version < FILE_VERSION_SECTIONS || size < 8)
+ return 0;
+
+ if (read_file_number(fd, &offset, 8))
+ die("cannot read the next options offset");
+
+ do_print(OPTIONS, "%lld\n", offset);
+ if (!offset)
+ return 0;
+
+ if (lseek64(fd, offset, SEEK_SET) == (off_t)-1)
+ die("cannot goto next options offset %lld", offset);
+
+ do_print(OPTIONS, "\n\n");
+
+ return dump_options_read(fd);
+}
+
+static int dump_options_read(int fd)
+{
+ unsigned short flags = 0;
+ unsigned short option;
+ unsigned int size;
+ int count = 0;
+
+ if (file_version >= FILE_VERSION_SECTIONS)
+ dump_section_header(fd, OPTIONS, &flags);
+
+ if ((flags & TRACECMD_SEC_FL_COMPRESS) && uncompress_block())
+ die("cannot uncompress file block");
+
+ for (;;) {
+ if (read_file_number(fd, &option, 2))
+ die("cannot read the option id");
+ if (option == TRACECMD_OPTION_DONE && file_version < FILE_VERSION_SECTIONS)
+ break;
+ if (read_file_number(fd, &size, 4))
+ die("cannot read the option size");
+
+ count++;
+ switch (option) {
+ case TRACECMD_OPTION_DATE:
+ dump_option_string(fd, size, "DATE");
+ break;
+ case TRACECMD_OPTION_CPUSTAT:
+ dump_option_string(fd, size, "CPUSTAT");
+ break;
+ case TRACECMD_OPTION_BUFFER:
+ case TRACECMD_OPTION_BUFFER_TEXT:
+ dump_option_buffer(fd, option, size);
+ break;
+ case TRACECMD_OPTION_TRACECLOCK:
+ do_print(OPTIONS, "\t\t[Option TRACECLOCK, %d bytes]\n", size);
+ read_dump_string(fd, size, OPTIONS | CLOCK);
+ has_clock = 1;
+ break;
+ case TRACECMD_OPTION_UNAME:
+ dump_option_string(fd, size, "UNAME");
+ break;
+ case TRACECMD_OPTION_HOOK:
+ dump_option_string(fd, size, "HOOK");
+ break;
+ case TRACECMD_OPTION_OFFSET:
+ dump_option_string(fd, size, "OFFSET");
+ break;
+ case TRACECMD_OPTION_CPUCOUNT:
+ dump_option_int(fd, size, "CPUCOUNT");
+ break;
+ case TRACECMD_OPTION_VERSION:
+ dump_option_string(fd, size, "VERSION");
+ break;
+ case TRACECMD_OPTION_PROCMAPS:
+ dump_option_string(fd, size, "PROCMAPS");
+ break;
+ case TRACECMD_OPTION_TRACEID:
+ dump_option_xlong(fd, size, "TRACEID");
+ break;
+ case TRACECMD_OPTION_TIME_SHIFT:
+ dump_option_timeshift(fd, size);
+ break;
+ case TRACECMD_OPTION_GUEST:
+ dump_option_guest(fd, size);
+ break;
+ case TRACECMD_OPTION_TSC2NSEC:
+ dump_option_tsc2nsec(fd, size);
+ break;
+ case TRACECMD_OPTION_HEADER_INFO:
+ dump_option_section(fd, size, option, "HEADERS", HEAD_PAGE | HEAD_EVENT);
+ break;
+ case TRACECMD_OPTION_FTRACE_EVENTS:
+ dump_option_section(fd, size, option, "FTRACE EVENTS", FTRACE_FORMAT);
+ break;
+ case TRACECMD_OPTION_EVENT_FORMATS:
+ dump_option_section(fd, size, option,
+ "EVENT FORMATS", EVENT_SYSTEMS | EVENT_FORMAT);
+ break;
+ case TRACECMD_OPTION_KALLSYMS:
+ dump_option_section(fd, size, option, "KALLSYMS", KALLSYMS);
+ break;
+ case TRACECMD_OPTION_PRINTK:
+ dump_option_section(fd, size, option, "PRINTK", TRACE_PRINTK);
+ break;
+ case TRACECMD_OPTION_CMDLINES:
+ dump_option_section(fd, size, option, "CMDLINES", CMDLINES);
+ break;
+ case TRACECMD_OPTION_DONE:
+ uncompress_reset();
+ count += dump_option_done(fd, size);
+ return count;
+ default:
+ do_print(OPTIONS, " %d %d\t[Unknown option, size - skipping]\n",
+ option, size);
+ do_lseek(fd, size, SEEK_CUR);
+ break;
+ }
+ }
+ uncompress_reset();
+ return count;
+}
+
+static void dump_options(int fd)
+{
+ int count;
+
+ count = dump_options_read(fd);
+ do_print(SUMMARY|OPTIONS, "\t[%d options]\n", count);
+}
+
+static void dump_latency(int fd)
+{
+ do_print(SUMMARY, "\t[Latency tracing data]\n");
+}
+
+static void dump_clock(int fd)
+{
+ long long size;
+ char *clock;
+
+ do_print((SUMMARY | CLOCK), "\t[Tracing clock]\n");
+ if (!has_clock) {
+ do_print((SUMMARY | CLOCK), "\t\t No tracing clock saved in the file\n");
+ return;
+ }
+ if (read_file_number(fd, &size, 8))
+ die("cannot read clock size");
+ clock = calloc(1, size);
+ if (!clock)
+ die("cannot allocate clock %lld bytes", size);
+
+ if (read_file_bytes(fd, clock, size))
+ die("cannot read clock %lld bytes", size);
+ clock[size] = 0;
+ do_print((SUMMARY | CLOCK), "\t\t%s\n", clock);
+ free(clock);
+}
+
+static void dump_flyrecord(int fd)
+{
+ long long cpu_offset;
+ long long cpu_size;
+ int i;
+
+ do_print((SUMMARY | FLYRECORD), "\t[Flyrecord tracing data]\n");
+
+ for (i = 0; i < trace_cpus; i++) {
+ if (read_file_number(fd, &cpu_offset, 8))
+ die("cannot read the cpu %d offset", i);
+ if (read_file_number(fd, &cpu_size, 8))
+ die("cannot read the cpu %d size", i);
+ do_print(FLYRECORD, "\t %10.lld %10.lld\t[offset, size of cpu %d]\n",
+ cpu_offset, cpu_size, i);
+ }
+ dump_clock(fd);
+}
+
+static void dump_therest(int fd)
+{
+ char str[10];
+
+ for (;;) {
+ if (read_file_bytes(fd, str, 10))
+ die("cannot read the rest of the header");
+
+ if (strncmp(str, HEAD_OPTIONS, 10) == 0)
+ dump_options(fd);
+ else if (strncmp(str, HEAD_LATENCY, 10) == 0)
+ dump_latency(fd);
+ else if (strncmp(str, HEAD_FLYRECORD, 10) == 0)
+ dump_flyrecord(fd);
+ else {
+ lseek64(fd, -10, SEEK_CUR);
+ break;
+ }
+ }
+}
+
+static void dump_v6_file(int fd)
+{
+ dump_header_page(fd);
+ dump_header_event(fd);
+ dump_ftrace_events_format(fd);
+ dump_events_format(fd);
+ dump_kallsyms(fd);
+ dump_printk(fd);
+ dump_cmdlines(fd);
+ dump_cpus_count(fd);
+ dump_therest(fd);
+}
+
+static int read_metadata_strings(int fd, unsigned long long size)
+{
+ char *str, *strings;
+ int psize;
+ int ret;
+
+ strings = realloc(meta_strings, meta_strings_size + size);
+ if (!strings)
+ return -1;
+ meta_strings = strings;
+
+ ret = read_file_bytes(fd, meta_strings + meta_strings_size, size);
+ if (ret < 0)
+ return -1;
+
+ do_print(STRINGS, "\t[String @ offset]\n");
+ psize = 0;
+ while (psize < size) {
+ str = meta_strings + meta_strings_size + psize;
+ do_print(STRINGS, "\t\t\"%s\" @ %d\n", str, meta_strings_size + psize);
+ psize += strlen(str) + 1;
+ }
+
+ meta_strings_size += size;
+
+ return 0;
+}
+
+static void get_meta_strings(int fd)
+{
+ unsigned long long offset, size;
+ unsigned int csize, rsize;
+ unsigned short fl, id;
+ int desc_id;
+
+ offset = lseek64(fd, 0, SEEK_CUR);
+ do {
+ if (read_file_number(fd, &id, 2))
+ break;
+ if (read_file_number(fd, &fl, 2))
+ die("cannot read section flags");
+ if (read_file_number(fd, &desc_id, 4))
+ die("cannot read section description");
+ if (read_file_number(fd, &size, 8))
+ die("cannot read section size");
+ if (id == TRACECMD_OPTION_STRINGS) {
+ if ((fl & TRACECMD_SEC_FL_COMPRESS)) {
+ read_file_number(fd, &csize, 4);
+ read_file_number(fd, &rsize, 4);
+ lseek64(fd, -8, SEEK_CUR);
+ if (uncompress_block())
+ break;
+ } else {
+ rsize = size;
+ }
+ read_metadata_strings(fd, rsize);
+ uncompress_reset();
+ } else {
+ if (lseek64(fd, size, SEEK_CUR) == (off_t)-1)
+ break;
+ }
+ } while (1);
+
+ if (lseek64(fd, offset, SEEK_SET) == (off_t)-1)
+ die("cannot restore the original file location");
+}
+
+static int walk_v7_sections(int fd)
+{
+ unsigned long long offset, soffset, size;
+ unsigned short fl;
+ unsigned short id;
+ int csize, rsize;
+ int count = 0;
+ int desc_id;
+ const char *desc;
+
+ offset = lseek64(fd, 0, SEEK_CUR);
+ do {
+ soffset = lseek64(fd, 0, SEEK_CUR);
+ if (read_file_number(fd, &id, 2))
+ break;
+
+ if (read_file_number(fd, &fl, 2))
+ die("cannot read section flags");
+
+ if (read_file_number(fd, &desc_id, 4))
+ die("cannot read section description");
+
+ desc = get_metadata_string(desc_id);
+ if (!desc)
+ desc = "Unknown";
+
+ if (read_file_number(fd, &size, 8))
+ die("cannot read section size");
+
+ if (id >= TRACECMD_OPTION_MAX)
+ do_print(SECTIONS, "Unknown section id %d: %s", id, desc);
+
+ count++;
+ if (fl & TRACECMD_SEC_FL_COMPRESS) {
+ if (id == TRACECMD_OPTION_BUFFER ||
+ id == TRACECMD_OPTION_BUFFER_TEXT) {
+ do_print(SECTIONS,
+ "\t[Section %2d @ %-16lld\t\"%s\", flags 0x%X, "
+ "%lld compressed bytes]\n",
+ id, soffset, desc, fl, size);
+ } else {
+ if (read_file_number(fd, &csize, 4))
+ die("cannot read section size");
+
+ if (read_file_number(fd, &rsize, 4))
+ die("cannot read section size");
+
+ do_print(SECTIONS, "\t[Section %2d @ %-16lld\t\"%s\", flags 0x%X, "
+ "%d compressed, %d uncompressed]\n",
+ id, soffset, desc, fl, csize, rsize);
+ size -= 8;
+ }
+ } else {
+ do_print(SECTIONS, "\t[Section %2d @ %-16lld\t\"%s\", flags 0x%X, %lld bytes]\n",
+ id, soffset, desc, fl, size);
+ }
+
+ if (lseek64(fd, size, SEEK_CUR) == (off_t)-1)
+ break;
+ } while (1);
+
+ if (lseek64(fd, offset, SEEK_SET) == (off_t)-1)
+ die("cannot restore the original file location");
+
+ return count;
+}
+
+static void dump_v7_file(int fd)
+{
+ long long offset;
+ int sections;
+
+ if (read_file_number(fd, &offset, 8))
+ die("cannot read offset of the first option section");
+
+ get_meta_strings(fd);
+ sections = walk_v7_sections(fd);
+
+ if (lseek64(fd, offset, SEEK_SET) == (off_t)-1)
+ die("cannot goto options offset %lld", offset);
+
+ dump_options(fd);
+ dump_sections(fd, sections);
+}
+
+static void free_sections(void)
+{
+ struct file_section *del;
+
+ while (sections) {
+ del = sections;
+ sections = sections->next;
+ free(del);
+ }
+}
+
+static void dump_file(const char *file)
+{
+ int fd;
+
+ tep = tep_alloc();
+ if (!tep)
+ return;
+
+ fd = open(file, O_RDONLY);
+ if (fd < 0)
+ die("cannot open '%s'\n", file);
+
+ do_print(SUMMARY, "\n Tracing meta data in file %s:\n", file);
+
+ dump_initial_format(fd);
+ dump_compress(fd);
+ if (file_version < FILE_VERSION_SECTIONS)
+ dump_v6_file(fd);
+ else
+ dump_v7_file(fd);
+ free_sections();
+ tep_free(tep);
+ tep = NULL;
+ close(fd);
+}
+
+enum {
+ OPT_sections = 240,
+ OPT_strings = 241,
+ OPT_verbose = 242,
+ OPT_clock = 243,
+ OPT_all = 244,
+ OPT_summary = 245,
+ OPT_flyrecord = 246,
+ OPT_options = 247,
+ OPT_cmd_lines = 248,
+ OPT_printk = 249,
+ OPT_kallsyms = 250,
+ OPT_events = 251,
+ OPT_systems = 252,
+ OPT_ftrace = 253,
+ OPT_head_event = 254,
+ OPT_head_page = 255,
+};
+
+void trace_dump(int argc, char **argv)
+{
+ char *input_file = NULL;
+ bool validate = false;
+ int c;
+
+ if (argc < 2)
+ usage(argv);
+
+ if (strcmp(argv[1], "dump") != 0)
+ usage(argv);
+ for (;;) {
+ int option_index = 0;
+ static struct option long_options[] = {
+ {"all", no_argument, NULL, OPT_all},
+ {"summary", no_argument, NULL, OPT_summary},
+ {"head-page", no_argument, NULL, OPT_head_page},
+ {"head-event", no_argument, NULL, OPT_head_event},
+ {"ftrace-events", no_argument, NULL, OPT_ftrace},
+ {"systems", no_argument, NULL, OPT_systems},
+ {"events", no_argument, NULL, OPT_events},
+ {"kallsyms", no_argument, NULL, OPT_kallsyms},
+ {"printk", no_argument, NULL, OPT_printk},
+ {"cmd-lines", no_argument, NULL, OPT_cmd_lines},
+ {"options", no_argument, NULL, OPT_options},
+ {"flyrecord", no_argument, NULL, OPT_flyrecord},
+ {"clock", no_argument, NULL, OPT_clock},
+ {"strings", no_argument, NULL, OPT_strings},
+ {"sections", no_argument, NULL, OPT_sections},
+ {"validate", no_argument, NULL, 'v'},
+ {"help", no_argument, NULL, '?'},
+ {"verbose", optional_argument, NULL, OPT_verbose},
+ {NULL, 0, NULL, 0}
+ };
+
+ c = getopt_long (argc-1, argv+1, "+hvai:",
+ long_options, &option_index);
+ if (c == -1)
+ break;
+ switch (c) {
+ case 'h':
+ usage(argv);
+ break;
+ case 'i':
+ input_file = optarg;
+ break;
+ case 'v':
+ validate = true;
+ break;
+ case OPT_all:
+ verbosity = 0xFFFFFFFF;
+ break;
+ case OPT_summary:
+ verbosity |= SUMMARY;
+ break;
+ case OPT_flyrecord:
+ verbosity |= FLYRECORD;
+ break;
+ case OPT_options:
+ verbosity |= OPTIONS;
+ break;
+ case OPT_cmd_lines:
+ verbosity |= CMDLINES;
+ break;
+ case OPT_printk:
+ verbosity |= TRACE_PRINTK;
+ break;
+ case OPT_kallsyms:
+ verbosity |= KALLSYMS;
+ break;
+ case OPT_events:
+ verbosity |= EVENT_FORMAT;
+ break;
+ case OPT_systems:
+ verbosity |= EVENT_SYSTEMS;
+ break;
+ case OPT_ftrace:
+ verbosity |= FTRACE_FORMAT;
+ break;
+ case OPT_head_event:
+ verbosity |= HEAD_EVENT;
+ break;
+ case OPT_head_page:
+ verbosity |= HEAD_PAGE;
+ break;
+ case OPT_clock:
+ verbosity |= CLOCK;
+ break;
+ case OPT_verbose:
+ if (trace_set_verbose(optarg) < 0)
+ die("invalid verbose level %s", optarg);
+ break;
+ case OPT_strings:
+ verbosity |= STRINGS;
+ break;
+ case OPT_sections:
+ verbosity |= SECTIONS;
+ break;
+ default:
+ usage(argv);
+ }
+ }
+
+ if ((argc - optind) >= 2) {
+ if (input_file)
+ usage(argv);
+ input_file = argv[optind + 1];
+ }
+
+ if (!input_file)
+ input_file = DEFAULT_INPUT_FILE;
+
+ if (!verbosity && !validate)
+ verbosity = SUMMARY;
+
+ dump_file(input_file);
+
+ if (validate)
+ tracecmd_plog("File %s is a valid trace-cmd file\n", input_file);
+}
diff --git a/tracecmd/trace-hist.c b/tracecmd/trace-hist.c
new file mode 100644
index 00000000..efb790ac
--- /dev/null
+++ b/tracecmd/trace-hist.c
@@ -0,0 +1,1076 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2013 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * Several of the ideas in this file came from Arnaldo Carvalho de Melo's
+ * work on the perf ui.
+ */
+#define _LARGEFILE64_SOURCE
+#include <dirent.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <signal.h>
+
+#include "trace-hash-local.h"
+#include "trace-local.h"
+#include "list.h"
+
+static int sched_wakeup_type;
+static int sched_wakeup_new_type;
+static int sched_switch_type;
+static int function_type;
+static int function_graph_entry_type;
+static int function_graph_exit_type;
+static int kernel_stack_type;
+
+static int long_size;
+
+static struct tep_format_field *common_type_hist;
+static struct tep_format_field *common_pid_field;
+static struct tep_format_field *sched_wakeup_comm_field;
+static struct tep_format_field *sched_wakeup_new_comm_field;
+static struct tep_format_field *sched_wakeup_pid_field;
+static struct tep_format_field *sched_wakeup_new_pid_field;
+static struct tep_format_field *sched_switch_prev_field;
+static struct tep_format_field *sched_switch_next_field;
+static struct tep_format_field *sched_switch_prev_pid_field;
+static struct tep_format_field *sched_switch_next_pid_field;
+static struct tep_format_field *function_ip_field;
+static struct tep_format_field *function_parent_ip_field;
+static struct tep_format_field *function_graph_entry_func_field;
+static struct tep_format_field *function_graph_entry_depth_field;
+static struct tep_format_field *function_graph_exit_func_field;
+static struct tep_format_field *function_graph_exit_depth_field;
+static struct tep_format_field *function_graph_exit_calltime_field;
+static struct tep_format_field *function_graph_exit_rettime_field;
+static struct tep_format_field *function_graph_exit_overrun_field;
+static struct tep_format_field *kernel_stack_caller_field;
+
+static int compact;
+
+static void *zalloc(size_t size)
+{
+ return calloc(1, size);
+}
+
+static const char **ips;
+static int ips_idx;
+static int func_depth;
+static int current_pid = -1;
+
+struct stack_save {
+ struct stack_save *next;
+ const char **ips;
+ int ips_idx;
+ int func_depth;
+ int pid;
+};
+
+struct stack_save *saved_stacks;
+
+static void reset_stack(void)
+{
+ current_pid = -1;
+ ips_idx = 0;
+ func_depth = 0;
+ /* Don't free here, it may be saved */
+ ips = NULL;
+}
+
+static void save_stack(void)
+{
+ struct stack_save *stack;
+
+ stack = zalloc(sizeof(*stack));
+ if (!stack)
+ die("malloc");
+
+ stack->pid = current_pid;
+ stack->ips_idx = ips_idx;
+ stack->func_depth = func_depth;
+ stack->ips = ips;
+
+ stack->next = saved_stacks;
+ saved_stacks = stack;
+
+ reset_stack();
+}
+
+static void restore_stack(int pid)
+{
+ struct stack_save *last = NULL, *stack;
+
+ for (stack = saved_stacks; stack; last = stack, stack = stack->next) {
+ if (stack->pid == pid)
+ break;
+ }
+
+ if (!stack)
+ return;
+
+ if (last)
+ last->next = stack->next;
+ else
+ saved_stacks = stack->next;
+
+ current_pid = stack->pid;
+ ips_idx = stack->ips_idx;
+ func_depth = stack->func_depth;
+ free(ips);
+ ips = stack->ips;
+ free(stack);
+}
+
+struct pid_list;
+
+struct chain {
+ struct chain *next;
+ struct chain *sibling;
+ const char *func;
+ struct chain *parents;
+ struct pid_list *pid_list;
+ int nr_parents;
+ int count;
+ int total;
+ int event;
+};
+static struct chain *chains;
+static int nr_chains;
+static int total_counts;
+
+struct pid_list {
+ struct pid_list *next;
+ struct chain chain;
+ int pid;
+};
+static struct pid_list *list_pids;
+static struct pid_list all_pid_list;
+
+static void add_chain(struct chain *chain)
+{
+ if (chain->next)
+ die("chain not null?");
+ chain->next = chains;
+ chains = chain;
+ nr_chains++;
+}
+
+static void
+insert_chain(struct pid_list *pid_list, struct chain *chain_list,
+ const char **chain_str, int size, int event)
+{
+ struct chain *chain;
+
+ /* Record all counts */
+ if (!chain_list->func)
+ total_counts++;
+
+ chain_list->count++;
+
+ if (!size--)
+ return;
+
+ for (chain = chain_list->parents; chain; chain = chain->sibling) {
+ if (chain->func == chain_str[size]) {
+ insert_chain(pid_list, chain, chain_str, size, 0);
+ return;
+ }
+ }
+
+ chain_list->nr_parents++;
+ chain = zalloc(sizeof(struct chain));
+ if (!chain)
+ die("malloc");
+ chain->sibling = chain_list->parents;
+ chain_list->parents = chain;
+ chain->func = chain_str[size];
+ chain->pid_list = pid_list;
+ chain->event = event;
+
+ /* NULL func means this is the top level of the chain. Store it */
+ if (!chain_list->func)
+ add_chain(chain);
+
+ insert_chain(pid_list, chain, chain_str, size, 0);
+}
+
+static void save_call_chain(int pid, const char **chain, int size, int event)
+{
+ static struct pid_list *pid_list;
+
+ if (compact)
+ pid_list = &all_pid_list;
+
+ else if (!pid_list || pid_list->pid != pid) {
+ for (pid_list = list_pids; pid_list; pid_list = pid_list->next) {
+ if (pid_list->pid == pid)
+ break;
+ }
+ if (!pid_list) {
+ pid_list = zalloc(sizeof(*pid_list));
+ if (!pid_list)
+ die("malloc");
+ pid_list->pid = pid;
+ pid_list->next = list_pids;
+ list_pids = pid_list;
+ }
+ }
+ insert_chain(pid_list, &pid_list->chain, chain, size, event);
+}
+
+static void save_stored_stacks(void)
+{
+ while (saved_stacks) {
+ restore_stack(saved_stacks->pid);
+ save_call_chain(current_pid, ips, ips_idx, 0);
+ }
+}
+
+static void flush_stack(void)
+{
+ if (current_pid < 0)
+ return;
+
+ save_call_chain(current_pid, ips, ips_idx, 0);
+ free(ips);
+ reset_stack();
+}
+
+static void push_stack_func(const char *func)
+{
+ ips_idx++;
+ ips = realloc(ips, ips_idx * sizeof(char *));
+ ips[ips_idx - 1] = func;
+}
+
+static void pop_stack_func(void)
+{
+ ips_idx--;
+ ips[ips_idx] = NULL;
+}
+
+static void
+process_function(struct tep_handle *pevent, struct tep_record *record)
+{
+ unsigned long long parent_ip;
+ unsigned long long ip;
+ unsigned long long val;
+ const char *parent;
+ const char *func;
+ int pid;
+ int ret;
+
+ ret = tep_read_number_field(common_pid_field, record->data, &val);
+ if (ret < 0)
+ die("no pid field for function?");
+
+ ret = tep_read_number_field(function_ip_field, record->data, &ip);
+ if (ret < 0)
+ die("no ip field for function?");
+
+ ret = tep_read_number_field(function_parent_ip_field, record->data, &parent_ip);
+ if (ret < 0)
+ die("no parent ip field for function?");
+
+ pid = val;
+
+ func = tep_find_function(pevent, ip);
+ parent = tep_find_function(pevent, parent_ip);
+
+ if (current_pid >= 0 && pid != current_pid) {
+ save_stack();
+ restore_stack(pid);
+ }
+
+ current_pid = pid;
+
+ if (ips_idx) {
+ if (ips[ips_idx - 1] == parent)
+ push_stack_func(func);
+ else {
+ save_call_chain(pid, ips, ips_idx, 0);
+ while (ips_idx) {
+ pop_stack_func();
+ if (ips[ips_idx - 1] == parent) {
+ push_stack_func(func);
+ break;
+ }
+ }
+ }
+ }
+
+ /* The above check can set ips_idx to zero again */
+ if (!ips_idx) {
+ push_stack_func(parent);
+ push_stack_func(func);
+ }
+}
+
+static void
+process_function_graph_entry(struct tep_handle *pevent, struct tep_record *record)
+{
+ unsigned long long depth;
+ unsigned long long ip;
+ unsigned long long val;
+ const char *func;
+ int pid;
+ int ret;
+
+ ret = tep_read_number_field(common_pid_field, record->data, &val);
+ if (ret < 0)
+ die("no pid field for function graph entry?");
+
+ ret = tep_read_number_field(function_graph_entry_func_field,
+ record->data, &ip);
+ if (ret < 0)
+ die("no ip field for function graph entry?");
+
+ ret = tep_read_number_field(function_graph_entry_depth_field,
+ record->data, &depth);
+ if (ret < 0)
+ die("no parent ip field for function entry?");
+
+ pid = val;
+
+ func = tep_find_function(pevent, ip);
+
+ if (current_pid >= 0 && pid != current_pid) {
+ save_stack();
+ restore_stack(pid);
+ }
+
+ current_pid = pid;
+
+ if (depth != ips_idx) {
+ save_call_chain(pid, ips, ips_idx, 0);
+ while (ips_idx > depth)
+ pop_stack_func();
+ }
+
+ func_depth = depth;
+
+ push_stack_func(func);
+}
+
+static void
+process_function_graph_exit(struct tep_handle *pevent, struct tep_record *record)
+{
+ unsigned long long depth;
+ unsigned long long val;
+ int pid;
+ int ret;
+
+ ret = tep_read_number_field(common_pid_field, record->data, &val);
+ if (ret < 0)
+ die("no pid field for function graph exit?");
+
+ ret = tep_read_number_field(function_graph_exit_depth_field,
+ record->data, &depth);
+ if (ret < 0)
+ die("no parent ip field for function?");
+
+ pid = val;
+
+ if (current_pid >= 0 && pid != current_pid) {
+ save_stack();
+ restore_stack(pid);
+ }
+
+ current_pid = pid;
+
+ if (ips_idx != depth) {
+ save_call_chain(pid, ips, ips_idx, 0);
+ while (ips_idx > depth)
+ pop_stack_func();
+ }
+
+ func_depth = depth - 1;
+}
+
+static int pending_pid = -1;
+static const char **pending_ips;
+static int pending_ips_idx;
+
+static void reset_pending_stack(void)
+{
+ pending_pid = -1;
+ pending_ips_idx = 0;
+ free(pending_ips);
+ pending_ips = NULL;
+}
+
+static void copy_stack_to_pending(int pid)
+{
+ pending_pid = pid;
+ pending_ips = zalloc(sizeof(char *) * ips_idx);
+ memcpy(pending_ips, ips, sizeof(char *) * ips_idx);
+ pending_ips_idx = ips_idx;
+}
+
+static void
+process_kernel_stack(struct tep_handle *pevent, struct tep_record *record)
+{
+ struct tep_format_field *field = kernel_stack_caller_field;
+ unsigned long long val;
+ void *data = record->data;
+ int do_restore = 0;
+ int pid;
+ int ret;
+
+ ret = tep_read_number_field(common_pid_field, record->data, &val);
+ if (ret < 0)
+ die("no pid field for function?");
+ pid = val;
+
+ if (pending_pid >= 0 && pid != pending_pid) {
+ reset_pending_stack();
+ return;
+ }
+
+ if (!field)
+ die("no caller field for kernel stack?");
+
+ if (pending_pid >= 0) {
+ if (current_pid >= 0) {
+ save_stack();
+ do_restore = 1;
+ }
+ } else {
+ /* function stack trace? */
+ if (current_pid >= 0) {
+ copy_stack_to_pending(current_pid);
+ free(ips);
+ reset_stack();
+ }
+ }
+
+ current_pid = pid;
+
+ /* Need to start at the end of the callers and work up */
+ for (data += field->offset; data < record->data + record->size;
+ data += long_size) {
+ unsigned long long addr;
+
+ addr = tep_read_number(pevent, data, long_size);
+
+ if ((long_size == 8 && addr == (unsigned long long)-1) ||
+ ((int)addr == -1))
+ break;
+ }
+
+ for (data -= long_size; data >= record->data + field->offset; data -= long_size) {
+ unsigned long long addr;
+ const char *func;
+
+ addr = tep_read_number(pevent, data, long_size);
+ func = tep_find_function(pevent, addr);
+ if (func)
+ push_stack_func(func);
+ }
+
+ if (pending_pid >= 0) {
+ push_stack_func(pending_ips[pending_ips_idx - 1]);
+ reset_pending_stack();
+ }
+ save_call_chain(current_pid, ips, ips_idx, 1);
+ if (do_restore)
+ restore_stack(current_pid);
+}
+
+static void
+process_sched_wakeup(struct tep_handle *pevent, struct tep_record *record, int type)
+{
+ unsigned long long val;
+ const char *comm;
+ int pid;
+ int ret;
+
+ if (type == sched_wakeup_type) {
+ comm = (char *)(record->data + sched_wakeup_comm_field->offset);
+ ret = tep_read_number_field(sched_wakeup_pid_field, record->data, &val);
+ if (ret < 0)
+ die("no pid field in sched_wakeup?");
+ } else {
+ comm = (char *)(record->data + sched_wakeup_new_comm_field->offset);
+ ret = tep_read_number_field(sched_wakeup_new_pid_field, record->data, &val);
+ if (ret < 0)
+ die("no pid field in sched_wakeup_new?");
+ }
+
+ pid = val;
+
+ tep_register_comm(pevent, comm, pid);
+}
+
+static void
+process_sched_switch(struct tep_handle *pevent, struct tep_record *record)
+{
+ unsigned long long val;
+ const char *comm;
+ int pid;
+ int ret;
+
+ comm = (char *)(record->data + sched_switch_prev_field->offset);
+ ret = tep_read_number_field(sched_switch_prev_pid_field, record->data, &val);
+ if (ret < 0)
+ die("no prev_pid field in sched_switch?");
+ pid = val;
+ tep_register_comm(pevent, comm, pid);
+
+ comm = (char *)(record->data + sched_switch_next_field->offset);
+ ret = tep_read_number_field(sched_switch_next_pid_field, record->data, &val);
+ if (ret < 0)
+ die("no next_pid field in sched_switch?");
+ pid = val;
+ tep_register_comm(pevent, comm, pid);
+}
+
+static void
+process_event(struct tep_handle *pevent, struct tep_record *record, int type)
+{
+ struct tep_event *event;
+ const char *event_name;
+ unsigned long long val;
+ int pid;
+ int ret;
+
+ if (pending_pid >= 0) {
+ save_call_chain(pending_pid, pending_ips, pending_ips_idx, 1);
+ reset_pending_stack();
+ }
+
+ event = tep_find_event(pevent, type);
+ event_name = event->name;
+
+ ret = tep_read_number_field(common_pid_field, record->data, &val);
+ if (ret < 0)
+ die("no pid field for function?");
+
+ pid = val;
+
+ /*
+ * Even if function or function graph tracer is running,
+ * if the user ran with stack traces on events, we want to use
+ * that instead. But unfortunately, that stack doesn't come
+ * until after the event. Thus, we only add the event into
+ * the pending stack.
+ */
+ push_stack_func(event_name);
+ copy_stack_to_pending(pid);
+ pop_stack_func();
+}
+
+static void
+process_record(struct tep_handle *pevent, struct tep_record *record)
+{
+ unsigned long long val;
+ int type;
+
+ tep_read_number_field(common_type_hist, record->data, &val);
+ type = val;
+
+ if (type == function_type)
+ return process_function(pevent, record);
+
+ if (type == function_graph_entry_type)
+ return process_function_graph_entry(pevent, record);
+
+ if (type == function_graph_exit_type)
+ return process_function_graph_exit(pevent, record);
+
+ if (type == kernel_stack_type)
+ return process_kernel_stack(pevent, record);
+
+ if (type == sched_wakeup_type || type == sched_wakeup_new_type)
+ process_sched_wakeup(pevent, record, type);
+
+ else if (type == sched_switch_type)
+ process_sched_switch(pevent, record);
+
+ process_event(pevent, record, type);
+}
+
+static struct tep_event *
+update_event(struct tep_handle *pevent,
+ const char *sys, const char *name, int *id)
+{
+ struct tep_event *event;
+
+ event = tep_find_event_by_name(pevent, sys, name);
+ if (!event)
+ return NULL;
+
+ *id = event->id;
+
+ return event;
+}
+
+static void update_sched_wakeup(struct tep_handle *pevent)
+{
+ struct tep_event *event;
+
+ event = update_event(pevent, "sched", "sched_wakeup", &sched_wakeup_type);
+ if (!event)
+ return;
+
+ sched_wakeup_comm_field = tep_find_field(event, "comm");
+ sched_wakeup_pid_field = tep_find_field(event, "pid");
+}
+
+static void update_sched_wakeup_new(struct tep_handle *pevent)
+{
+ struct tep_event *event;
+
+ event = update_event(pevent, "sched", "sched_wakeup_new", &sched_wakeup_new_type);
+ if (!event)
+ return;
+
+ sched_wakeup_new_comm_field = tep_find_field(event, "comm");
+ sched_wakeup_new_pid_field = tep_find_field(event, "pid");
+}
+
+static void update_sched_switch(struct tep_handle *pevent)
+{
+ struct tep_event *event;
+
+ event = update_event(pevent, "sched", "sched_switch", &sched_switch_type);
+ if (!event)
+ return;
+
+ sched_switch_prev_field = tep_find_field(event, "prev_comm");
+ sched_switch_next_field = tep_find_field(event, "next_comm");
+ sched_switch_prev_pid_field = tep_find_field(event, "prev_pid");
+ sched_switch_next_pid_field = tep_find_field(event, "next_pid");
+}
+
+static void update_function(struct tep_handle *pevent)
+{
+ struct tep_event *event;
+
+ event = update_event(pevent, "ftrace", "function", &function_type);
+ if (!event)
+ return;
+
+ function_ip_field = tep_find_field(event, "ip");
+ function_parent_ip_field = tep_find_field(event, "parent_ip");
+}
+
+static void update_function_graph_entry(struct tep_handle *pevent)
+{
+ struct tep_event *event;
+
+ event = update_event(pevent, "ftrace", "funcgraph_entry", &function_graph_entry_type);
+ if (!event)
+ return;
+
+ function_graph_entry_func_field = tep_find_field(event, "func");
+ function_graph_entry_depth_field = tep_find_field(event, "depth");
+}
+
+static void update_function_graph_exit(struct tep_handle *pevent)
+{
+ struct tep_event *event;
+
+ event = update_event(pevent, "ftrace", "funcgraph_exit", &function_graph_exit_type);
+ if (!event)
+ return;
+
+ function_graph_exit_func_field = tep_find_field(event, "func");
+ function_graph_exit_depth_field = tep_find_field(event, "depth");
+ function_graph_exit_calltime_field = tep_find_field(event, "calltime");
+ function_graph_exit_rettime_field = tep_find_field(event, "rettime");
+ function_graph_exit_overrun_field = tep_find_field(event, "overrun");
+}
+
+static void update_kernel_stack(struct tep_handle *pevent)
+{
+ struct tep_event *event;
+
+ event = update_event(pevent, "ftrace", "kernel_stack", &kernel_stack_type);
+ if (!event)
+ return;
+
+ kernel_stack_caller_field = tep_find_field(event, "caller");
+}
+
+enum field { NEXT_PTR, SIB_PTR };
+
+static struct chain *next_ptr(struct chain *chain, enum field field)
+{
+ if (field == NEXT_PTR)
+ return chain->next;
+ return chain->sibling;
+}
+
+static struct chain *split_chain(struct chain *orig, int size, enum field field)
+{
+ struct chain *chain;
+ int i;
+
+ if (size < 2)
+ return NULL;
+
+ for (i = 1; i < (size + 1) / 2; i++, orig = next_ptr(orig, field))
+ ;
+
+ if (field == NEXT_PTR) {
+ chain = orig->next;
+ orig->next = NULL;
+ } else {
+ chain = orig->sibling;
+ orig->sibling = NULL;
+ }
+
+ return chain;
+}
+
+static struct chain *
+merge_chains(struct chain *a, int nr_a, struct chain *b, int nr_b, enum field field)
+{
+ struct chain *chain;
+ struct chain *final;
+ struct chain **next = &final;
+ int i;
+
+ if (!a)
+ return b;
+ if (!b)
+ return a;
+
+ for (i = 0, chain = a; chain; i++, chain = next_ptr(chain, field))
+ ;
+ if (i != nr_a)
+ die("WTF %d %d", i, nr_a);
+
+ chain = split_chain(a, nr_a, field);
+ a = merge_chains(chain, nr_a / 2, a, (nr_a + 1) / 2, field);
+
+ chain = split_chain(b, nr_b, field);
+ b = merge_chains(chain, nr_b / 2, b, (nr_b + 1) / 2, field);
+
+ while (a && b) {
+ if (a->count > b->count) {
+ *next = a;
+ if (field == NEXT_PTR)
+ next = &a->next;
+ else
+ next = &a->sibling;
+ a = *next;
+ *next = NULL;
+ } else {
+ *next = b;
+ if (field == NEXT_PTR)
+ next = &b->next;
+ else
+ next = &b->sibling;
+ b = *next;
+ *next = NULL;
+ }
+ }
+ if (a)
+ *next = a;
+ else
+ *next = b;
+
+ return final;
+}
+
+static void sort_chain_parents(struct chain *chain)
+{
+ struct chain *parent;
+
+ parent = split_chain(chain->parents, chain->nr_parents, SIB_PTR);
+ chain->parents = merge_chains(parent, chain->nr_parents / 2,
+ chain->parents, (chain->nr_parents + 1) / 2,
+ SIB_PTR);
+
+ for (chain = chain->parents; chain; chain = chain->sibling)
+ sort_chain_parents(chain);
+}
+
+static void sort_chains(void)
+{
+ struct chain *chain;
+
+ chain = split_chain(chains, nr_chains, NEXT_PTR);
+
+ /* The original always has more or equal to the split */
+ chains = merge_chains(chain, nr_chains / 2, chains, (nr_chains + 1) / 2, NEXT_PTR);
+
+ for (chain = chains; chain; chain = chain->next)
+ sort_chain_parents(chain);
+}
+
+static double get_percent(int total, int partial)
+{
+ return ((double)partial / (double)total) * 100.0;
+}
+
+static int single_chain(struct chain *chain)
+{
+ if (chain->nr_parents > 1)
+ return 0;
+
+ if (!chain->parents)
+ return 1;
+
+ return single_chain(chain->parents);
+}
+
+#define START " |\n"
+#define TICK " --- "
+#define BLANK " "
+#define LINE " |"
+#define INDENT " "
+
+unsigned long long line_mask;
+void make_indent(int indent)
+{
+ int i;
+
+ for (i = 0; i < indent; i++) {
+ if (line_mask & (1 << i))
+ printf(LINE);
+ else
+ printf(INDENT);
+ }
+}
+
+static void
+print_single_parent(struct chain *chain, int indent)
+{
+ make_indent(indent);
+
+ printf(BLANK);
+ printf("%s\n", chain->parents->func);
+}
+
+static void
+dump_chain(struct tep_handle *pevent, struct chain *chain, int indent)
+{
+ if (!chain->parents)
+ return;
+
+ print_single_parent(chain, indent);
+ dump_chain(pevent, chain->parents, indent);
+}
+
+static void print_parents(struct tep_handle *pevent, struct chain *chain, int indent)
+{
+ struct chain *parent = chain->parents;
+ int x;
+
+ if (single_chain(chain)) {
+ dump_chain(pevent, chain, indent);
+ return;
+ }
+
+ line_mask |= 1ULL << (indent);
+
+ for (x = 0; parent; x++, parent = parent->sibling) {
+ struct chain *save_parent;
+
+ make_indent(indent + 1);
+ printf("\n");
+
+ make_indent(indent + 1);
+
+ printf("--%%%.2f-- %s # %d\n",
+ get_percent(chain->count, parent->count),
+ parent->func, parent->count);
+
+ if (x == chain->nr_parents - 1)
+ line_mask &= (1ULL << indent) - 1;
+
+ if (single_chain(parent))
+ dump_chain(pevent, parent, indent + 1);
+ else {
+ save_parent = parent;
+
+ while (parent && parent->parents && parent->nr_parents < 2 &&
+ parent->parents->count == parent->count) {
+ print_single_parent(parent, indent + 1);
+ parent = parent->parents;
+ }
+ if (parent)
+ print_parents(pevent, parent, indent + 1);
+ parent = save_parent;
+ }
+ }
+}
+
+static void print_chains(struct tep_handle *pevent)
+{
+ struct chain *chain = chains;
+ int pid;
+
+ for (; chain; chain = chain->next) {
+ pid = chain->pid_list->pid;
+ if (chain != chains)
+ printf("\n");
+ if (compact)
+ printf(" %%%3.2f <all pids> %30s #%d\n",
+ get_percent(total_counts, chain->count),
+ chain->func,
+ chain->count);
+ else
+ printf(" %%%3.2f (%d) %s %30s #%d\n",
+ get_percent(total_counts, chain->count),
+ pid,
+ tep_data_comm_from_pid(pevent, pid),
+ chain->func,
+ chain->count);
+ printf(START);
+ if (chain->event)
+ printf(TICK "*%s*\n", chain->func);
+ else
+ printf(TICK "%s\n", chain->func);
+ print_parents(pevent, chain, 0);
+ }
+}
+
+static void do_trace_hist(struct tracecmd_input *handle)
+{
+ struct tep_handle *pevent = tracecmd_get_tep(handle);
+ struct tep_record *record;
+ struct tep_event *event;
+ int cpus;
+ int cpu;
+ int ret;
+
+ cpus = tracecmd_cpus(handle);
+
+ /* Need to get any event */
+ for (cpu = 0; cpu < cpus; cpu++) {
+ record = tracecmd_peek_data(handle, cpu);
+ if (record)
+ break;
+ }
+ if (!record)
+ die("No records found in file");
+
+ ret = tep_data_type(pevent, record);
+ event = tep_find_event(pevent, ret);
+
+ long_size = tracecmd_long_size(handle);
+
+ common_type_hist = tep_find_common_field(event, "common_type");
+ if (!common_type_hist)
+ die("Can't find a 'type' field?");
+
+ common_pid_field = tep_find_common_field(event, "common_pid");
+ if (!common_pid_field)
+ die("Can't find a 'pid' field?");
+
+ update_sched_wakeup(pevent);
+ update_sched_wakeup_new(pevent);
+ update_sched_switch(pevent);
+ update_function(pevent);
+ update_function_graph_entry(pevent);
+ update_function_graph_exit(pevent);
+ update_kernel_stack(pevent);
+
+ for (cpu = 0; cpu < cpus; cpu++) {
+ for (;;) {
+ struct tep_record *record;
+
+ record = tracecmd_read_data(handle, cpu);
+ if (!record)
+ break;
+
+ /* If we missed events, just flush out the current stack */
+ if (record->missed_events)
+ flush_stack();
+
+ process_record(pevent, record);
+ tracecmd_free_record(record);
+ }
+ }
+
+ if (current_pid >= 0)
+ save_call_chain(current_pid, ips, ips_idx, 0);
+ if (pending_pid >= 0)
+ save_call_chain(pending_pid, pending_ips, pending_ips_idx, 1);
+
+ save_stored_stacks();
+
+ sort_chains();
+ print_chains(pevent);
+}
+
+void trace_hist(int argc, char **argv)
+{
+ struct tracecmd_input *handle;
+ const char *input_file = NULL;
+ int instances;
+ int ret;
+
+ for (;;) {
+ int c;
+
+ c = getopt(argc-1, argv+1, "+hi:P");
+ if (c == -1)
+ break;
+ switch (c) {
+ case 'h':
+ usage(argv);
+ break;
+ case 'i':
+ if (input_file)
+ die("Only one input for historgram");
+ input_file = optarg;
+ break;
+ case 'P':
+ compact = 1;
+ break;
+ default:
+ usage(argv);
+ }
+ }
+
+ if ((argc - optind) >= 2) {
+ if (input_file)
+ usage(argv);
+ input_file = argv[optind + 1];
+ }
+
+ if (!input_file)
+ input_file = DEFAULT_INPUT_FILE;
+
+ handle = tracecmd_alloc(input_file, 0);
+ if (!handle)
+ die("can't open %s\n", input_file);
+
+ ret = tracecmd_read_headers(handle, 0);
+ if (ret)
+ return;
+
+ ret = tracecmd_init_data(handle);
+ if (ret < 0)
+ die("failed to init data");
+
+ if (ret > 0)
+ die("trace-cmd hist does not work with latency traces\n");
+
+ instances = tracecmd_buffer_instances(handle);
+ if (instances) {
+ struct tracecmd_input *new_handle;
+ int i;
+
+ for (i = 0; i < instances; i++) {
+ new_handle = tracecmd_buffer_instance_handle(handle, i);
+ if (!new_handle) {
+ warning("could not retrieve handle %d", i);
+ continue;
+ }
+ do_trace_hist(new_handle);
+ tracecmd_close(new_handle);
+ }
+ } else {
+ do_trace_hist(handle);
+ }
+
+ tracecmd_close(handle);
+}
diff --git a/tracecmd/trace-list.c b/tracecmd/trace-list.c
new file mode 100644
index 00000000..fbf2882e
--- /dev/null
+++ b/tracecmd/trace-list.c
@@ -0,0 +1,760 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+
+#include <stdlib.h>
+#include <sys/stat.h>
+
+#include "tracefs.h"
+#include "trace-local.h"
+
+
+static void dump_file_content(const char *path)
+{
+ char buf[BUFSIZ];
+ ssize_t n;
+ FILE *fp;
+
+ fp = fopen(path, "r");
+ if (!fp)
+ die("reading %s", path);
+
+ do {
+ n = fread(buf, 1, BUFSIZ, fp);
+ if (n > 0)
+ fwrite(buf, 1, n, stdout);
+ } while (n > 0);
+ fclose(fp);
+}
+
+
+
+void show_instance_file(struct buffer_instance *instance, const char *name)
+{
+ char *path;
+
+ path = tracefs_instance_get_file(instance->tracefs, name);
+ dump_file_content(path);
+ tracefs_put_tracing_file(path);
+}
+
+enum {
+ SHOW_EVENT_FORMAT = 1 << 0,
+ SHOW_EVENT_FILTER = 1 << 1,
+ SHOW_EVENT_TRIGGER = 1 << 2,
+ SHOW_EVENT_FULL = 1 << 3,
+};
+
+
+void show_file(const char *name)
+{
+ char *path;
+
+ path = tracefs_get_tracing_file(name);
+ dump_file_content(path);
+ tracefs_put_tracing_file(path);
+}
+
+typedef int (*process_file_func)(char *buf, int len, int flags);
+
+static void process_file_re(process_file_func func,
+ const char *name, const char *re, int flags)
+{
+ regex_t reg;
+ char *path;
+ char *buf = NULL;
+ char *str;
+ FILE *fp;
+ ssize_t n;
+ size_t l = strlen(re);
+
+ /* Just in case :-p */
+ if (!re || l == 0) {
+ show_file(name);
+ return;
+ }
+
+ /* Handle the newline at end of names for the user */
+ str = malloc(l + 3);
+ if (!str)
+ die("Failed to allocate reg ex %s", re);
+ strcpy(str, re);
+ if (re[l-1] == '$')
+ strcpy(&str[l-1], "\n*$");
+
+ if (regcomp(&reg, str, REG_ICASE|REG_NOSUB))
+ die("invalid function regex '%s'", re);
+
+ free(str);
+
+ path = tracefs_get_tracing_file(name);
+ fp = fopen(path, "r");
+ if (!fp)
+ die("reading %s", path);
+ tracefs_put_tracing_file(path);
+
+ do {
+ n = getline(&buf, &l, fp);
+ if (n > 0 && regexec(&reg, buf, 0, NULL, 0) == 0)
+ func(buf, n, flags);
+ } while (n > 0);
+ free(buf);
+ fclose(fp);
+
+ regfree(&reg);
+}
+
+static void show_event(process_file_func func, const char *system,
+ const char *event, int flags)
+{
+ char *buf;
+ int ret;
+
+ ret = asprintf(&buf, "%s:%s", system, event);
+ if (ret < 0)
+ die("Can not allocate event");
+ func(buf, strlen(buf), flags);
+ free(buf);
+}
+
+static void show_system(process_file_func func, const char *system, int flags)
+{
+ char **events;
+ int e;
+
+ events = tracefs_system_events(NULL, system);
+ if (!events) /* die? */
+ return;
+
+ for (e = 0; events[e]; e++)
+ show_event(func, system, events[e], flags);
+}
+
+static void show_event_systems(process_file_func func, char **systems, int flags)
+{
+ int s;
+
+ for (s = 0; systems[s]; s++)
+ show_system(func, systems[s], flags);
+}
+
+static void match_system_events(process_file_func func, const char *system,
+ regex_t *reg, int flags)
+{
+ char **events;
+ int e;
+
+ events = tracefs_system_events(NULL, system);
+ if (!events) /* die? */
+ return;
+ for (e = 0; events[e]; e++) {
+ if (regexec(reg, events[e], 0, NULL, 0) == 0)
+ show_event(func, system, events[e], flags);
+ }
+ tracefs_list_free(events);
+}
+
+static void process_events(process_file_func func, const char *re, int flags)
+{
+ const char *ftrace = "ftrace";
+ regex_t system_reg;
+ regex_t event_reg;
+ char *str;
+ size_t l = strlen(re);
+ bool just_systems = true;
+ char **systems;
+ char *system;
+ char *event;
+ int s;
+
+ systems = tracefs_event_systems(NULL);
+ if (!systems)
+ return process_file_re(func, "available_events", re, flags);
+
+ if (!re || l == 0) {
+ show_event_systems(func, systems, flags);
+ return;
+ }
+
+ str = strdup(re);
+ if (!str)
+ die("Can not allocate momory for regex");
+
+ system = strtok(str, ":");
+ event = strtok(NULL, "");
+
+ if (regcomp(&system_reg, system, REG_ICASE|REG_NOSUB))
+ die("invalid regex '%s'", system);
+
+ if (event) {
+ if (regcomp(&event_reg, event, REG_ICASE|REG_NOSUB))
+ die("invalid regex '%s'", event);
+ } else {
+ /*
+ * If the regex ends with ":", then event would be null,
+ * but we do not want to match events.
+ */
+ if (re[l-1] != ':')
+ just_systems = false;
+ }
+ free(str);
+
+ /*
+ * See if this matches the special ftrace system, as ftrace is not included
+ * in the systems list, but can get events from tracefs_system_events().
+ */
+ if (regexec(&system_reg, ftrace, 0, NULL, 0) == 0) {
+ if (!event)
+ show_system(func, ftrace, flags);
+ else
+ match_system_events(func, ftrace, &event_reg, flags);
+ } else if (!just_systems) {
+ match_system_events(func, ftrace, &system_reg, flags);
+ }
+
+ for (s = 0; systems[s]; s++) {
+
+ if (regexec(&system_reg, systems[s], 0, NULL, 0) == 0) {
+ if (!event) {
+ show_system(func, systems[s], flags);
+ continue;
+ }
+ match_system_events(func, systems[s], &event_reg, flags);
+ continue;
+ }
+ if (just_systems)
+ continue;
+
+ match_system_events(func, systems[s], &system_reg, flags);
+ }
+ tracefs_list_free(systems);
+
+ regfree(&system_reg);
+ if (event)
+ regfree(&event_reg);
+}
+
+static int show_file_write(char *buf, int len, int flags)
+{
+ return fwrite(buf, 1, len, stdout);
+}
+
+static void show_file_re(const char *name, const char *re)
+{
+ process_file_re(show_file_write, name, re, 0);
+}
+
+static char *get_event_file(const char *type, char *buf, int len)
+{
+ char *system;
+ char *event;
+ char *path;
+ char *file;
+ int ret;
+
+ if (buf[len-1] == '\n')
+ buf[len-1] = '\0';
+
+ system = strtok(buf, ":");
+ if (!system)
+ die("no system found in %s", buf);
+
+ event = strtok(NULL, ":");
+ if (!event)
+ die("no event found in %s\n", buf);
+
+ path = tracefs_get_tracing_file("events");
+ ret = asprintf(&file, "%s/%s/%s/%s", path, system, event, type);
+ if (ret < 0)
+ die("Failed to allocate event file %s %s", system, event);
+
+ tracefs_put_tracing_file(path);
+
+ return file;
+}
+
+static int event_filter_write(char *buf, int len, int flags)
+{
+ char *file;
+
+ if (buf[len-1] == '\n')
+ buf[len-1] = '\0';
+
+ printf("%s\n", buf);
+
+ file = get_event_file("filter", buf, len);
+ dump_file_content(file);
+ free(file);
+ printf("\n");
+
+ return 0;
+}
+
+static int event_trigger_write(char *buf, int len, int flags)
+{
+ char *file;
+
+ if (buf[len-1] == '\n')
+ buf[len-1] = '\0';
+
+ printf("%s\n", buf);
+
+ file = get_event_file("trigger", buf, len);
+ dump_file_content(file);
+ free(file);
+ printf("\n");
+
+ return 0;
+}
+
+static int event_format_write(char *fbuf, int len, int flags)
+{
+ char *file = get_event_file("format", fbuf, len);
+ char *buf = NULL;
+ size_t l;
+ FILE *fp;
+ bool full;
+ int n;
+
+ full = flags & SHOW_EVENT_FULL;
+
+ /* The get_event_file() crops system in fbuf */
+ printf("system: %s\n", fbuf);
+
+ /* Don't print the print fmt, it's ugly */
+
+ fp = fopen(file, "r");
+ if (!fp)
+ die("reading %s", file);
+
+ do {
+ n = getline(&buf, &l, fp);
+ if (n > 0) {
+ if (!full && strncmp(buf, "print fmt", 9) == 0)
+ break;
+ fwrite(buf, 1, n, stdout);
+ }
+ } while (n > 0);
+ fclose(fp);
+ free(buf);
+ free(file);
+
+ return 0;
+}
+
+static int event_name(char *buf, int len, int flags)
+{
+ printf("%s\n", buf);
+
+ return 0;
+}
+
+static void show_event_filter_re(const char *re)
+{
+ process_events(event_filter_write, re, 0);
+}
+
+
+static void show_event_trigger_re(const char *re)
+{
+ process_events(event_trigger_write, re, 0);
+}
+
+
+static void show_event_format_re(const char *re, int flags)
+{
+ process_events(event_format_write, re, flags);
+}
+
+static void show_event_names_re(const char *re)
+{
+ process_events(event_name, re, 0);
+}
+
+static void show_events(const char *eventre, int flags)
+{
+ if (flags && !eventre)
+ die("When specifying event files, an event must be named");
+
+ if (eventre) {
+ if (flags & SHOW_EVENT_FORMAT)
+ show_event_format_re(eventre, flags);
+
+ else if (flags & SHOW_EVENT_FILTER)
+ show_event_filter_re(eventre);
+
+ else if (flags & SHOW_EVENT_TRIGGER)
+ show_event_trigger_re(eventre);
+ else
+ show_event_names_re(eventre);
+ } else
+ show_file("available_events");
+}
+
+
+static void show_tracers(void)
+{
+ show_file("available_tracers");
+}
+
+void show_options(const char *prefix, struct buffer_instance *buffer)
+{
+ struct tracefs_instance *instance = buffer ? buffer->tracefs : NULL;
+ struct dirent *dent;
+ struct stat st;
+ char *path;
+ DIR *dir;
+
+ if (!prefix)
+ prefix = "";
+
+ path = tracefs_instance_get_file(instance, "options");
+ if (!path)
+ goto show_file;
+ if (stat(path, &st) < 0)
+ goto show_file;
+
+ if ((st.st_mode & S_IFMT) != S_IFDIR)
+ goto show_file;
+
+ dir = opendir(path);
+ if (!dir)
+ die("Can not read instance directory");
+
+ while ((dent = readdir(dir))) {
+ const char *name = dent->d_name;
+ long long val;
+ char *file;
+ int ret;
+
+ if (strcmp(name, ".") == 0 ||
+ strcmp(name, "..") == 0)
+ continue;
+
+ ret = asprintf(&file, "options/%s", name);
+ if (ret < 0)
+ die("Failed to allocate file name");
+ ret = tracefs_instance_file_read_number(instance, file, &val);
+ if (!ret) {
+ if (val)
+ printf("%s%s\n", prefix, name);
+ else
+ printf("%sno%s\n", prefix, name);
+ }
+ free(file);
+ }
+ closedir(dir);
+ tracefs_put_tracing_file(path);
+ return;
+
+ show_file:
+ tracefs_put_tracing_file(path);
+ show_file("trace_options");
+}
+
+static void show_clocks(void)
+{
+ char *clocks;
+ int size;
+
+ clocks = tracefs_instance_file_read(NULL, "trace_clock", &size);
+ if (!clocks)
+ die("getting clocks");
+ if (clocks[size - 1] == '\n')
+ clocks[size - 1] = 0;
+
+ if (trace_tsc2nsec_is_supported())
+ printf("%s %s\n", clocks, TSCNSEC_CLOCK);
+ else
+ printf("%s\n", clocks);
+
+ free(clocks);
+}
+
+
+static void show_functions(const char *funcre)
+{
+ if (funcre)
+ show_file_re("available_filter_functions", funcre);
+ else
+ show_file("available_filter_functions");
+}
+
+
+static void show_buffers(void)
+{
+ struct dirent *dent;
+ DIR *dir;
+ char *path;
+ int printed = 0;
+
+ path = tracefs_get_tracing_file("instances");
+ dir = opendir(path);
+ tracefs_put_tracing_file(path);
+ if (!dir)
+ die("Can not read instance directory");
+
+ while ((dent = readdir(dir))) {
+ const char *name = dent->d_name;
+
+ if (strcmp(name, ".") == 0 ||
+ strcmp(name, "..") == 0)
+ continue;
+
+ printf("%s\n", name);
+ printed = 1;
+ }
+ closedir(dir);
+
+ if (!printed)
+ printf("No buffer instances defined\n");
+}
+
+
+static void show_systems(void)
+{
+ struct dirent *dent;
+ char *path;
+ DIR *dir;
+
+ path = tracefs_get_tracing_file("events");
+ dir = opendir(path);
+
+ if (!dir)
+ die("Can not read events directory");
+
+ while ((dent = readdir(dir))) {
+ const char *name = dent->d_name;
+ struct stat st;
+ char *spath;
+ int ret;
+
+ if (strcmp(name, ".") == 0 ||
+ strcmp(name, "..") == 0)
+ continue;
+
+ if (asprintf(&spath, "%s/%s", path, name) < 0)
+ continue;
+ ret = stat(spath, &st);
+ if (!ret && S_ISDIR(st.st_mode))
+ printf("%s\n", name);
+
+ free(spath);
+ }
+
+ printf("\n");
+ closedir(dir);
+ tracefs_put_tracing_file(path);
+}
+
+static void show_plugin_options(void)
+{
+ struct tep_handle *pevent;
+ struct tep_plugin_list *list;
+ struct trace_seq s;
+
+ tracecmd_ftrace_load_options();
+
+ pevent = tep_alloc();
+ if (!pevent)
+ die("Can not allocate pevent\n");
+
+ trace_seq_init(&s);
+
+ list = trace_load_plugins(pevent, 0);
+ tep_plugin_print_options(&s);
+ trace_seq_do_printf(&s);
+ tep_unload_plugins(list, pevent);
+ tep_free(pevent);
+}
+
+
+void trace_option(int argc, char **argv)
+{
+ show_plugin_options();
+}
+
+
+static void show_plugins(void)
+{
+ struct tep_handle *pevent;
+ struct tep_plugin_list *list;
+ struct trace_seq s;
+
+ pevent = tep_alloc();
+ if (!pevent)
+ die("Can not allocate pevent\n");
+
+ trace_seq_init(&s);
+
+ list = trace_load_plugins(pevent, 0);
+ tep_print_plugins(&s, " ", "\n", list);
+
+ trace_seq_do_printf(&s);
+ tep_unload_plugins(list, pevent);
+ tep_free(pevent);
+}
+
+static void show_compression(void)
+{
+ char **versions, **names;
+ int c, i;
+
+ c = tracecmd_compress_protos_get(&names, &versions);
+ if (c <= 0) {
+ printf("No compression algorithms are supported\n");
+ return;
+ }
+ printf("Supported compression algorithms:\n");
+ for (i = 0; i < c; i++)
+ printf("\t%s, %s\n", names[i], versions[i]);
+
+ free(names);
+ free(versions);
+}
+
+void trace_list(int argc, char **argv)
+{
+ int events = 0;
+ int tracer = 0;
+ int options = 0;
+ int funcs = 0;
+ int buffers = 0;
+ int clocks = 0;
+ int plug = 0;
+ int plug_op = 0;
+ int flags = 0;
+ int systems = 0;
+ int show_all = 1;
+ int compression = 0;
+ int i;
+ const char *arg;
+ const char *funcre = NULL;
+ const char *eventre = NULL;
+
+ for (i = 2; i < argc; i++) {
+ arg = NULL;
+ if (argv[i][0] == '-') {
+ if (i < argc - 1) {
+ if (argv[i+1][0] != '-')
+ arg = argv[i+1];
+ }
+ switch (argv[i][1]) {
+ case 'h':
+ usage(argv);
+ break;
+ case 'e':
+ events = 1;
+ eventre = arg;
+ show_all = 0;
+ break;
+ case 'B':
+ buffers = 1;
+ show_all = 0;
+ break;
+ case 'C':
+ clocks = 1;
+ show_all = 0;
+ break;
+ case 'F':
+ flags |= SHOW_EVENT_FORMAT;
+ break;
+ case 'R':
+ flags |= SHOW_EVENT_TRIGGER;
+ break;
+ case 'l':
+ flags |= SHOW_EVENT_FILTER;
+ break;
+ case 'p':
+ case 't':
+ tracer = 1;
+ show_all = 0;
+ break;
+ case 'P':
+ plug = 1;
+ show_all = 0;
+ break;
+ case 'O':
+ plug_op = 1;
+ show_all = 0;
+ break;
+ case 'o':
+ options = 1;
+ show_all = 0;
+ break;
+ case 'f':
+ funcs = 1;
+ funcre = arg;
+ show_all = 0;
+ break;
+ case 's':
+ systems = 1;
+ show_all = 0;
+ break;
+ case 'c':
+ compression = 1;
+ show_all = 0;
+ break;
+ case '-':
+ if (strcmp(argv[i], "--debug") == 0) {
+ tracecmd_set_debug(true);
+ break;
+ }
+ if (strcmp(argv[i], "--full") == 0) {
+ flags |= SHOW_EVENT_FULL;
+ break;
+ }
+ fprintf(stderr, "list: invalid option -- '%s'\n",
+ argv[i]);
+ default:
+ fprintf(stderr, "list: invalid option -- '%c'\n",
+ argv[i][1]);
+ usage(argv);
+ }
+ }
+ }
+
+ if (events)
+ show_events(eventre, flags);
+
+ if (tracer)
+ show_tracers();
+
+ if (options)
+ show_options(NULL, NULL);
+
+ if (plug)
+ show_plugins();
+
+ if (plug_op)
+ show_plugin_options();
+
+ if (funcs)
+ show_functions(funcre);
+
+ if (buffers)
+ show_buffers();
+
+ if (clocks)
+ show_clocks();
+ if (systems)
+ show_systems();
+ if (compression)
+ show_compression();
+ if (show_all) {
+ printf("event systems:\n");
+ show_systems();
+ printf("events:\n");
+ show_events(NULL, 0);
+ printf("\ntracers:\n");
+ show_tracers();
+ printf("\noptions:\n");
+ show_options(NULL, NULL);
+ show_compression();
+ }
+
+ return;
+
+}
diff --git a/tracecmd/trace-listen.c b/tracecmd/trace-listen.c
new file mode 100644
index 00000000..86d2b9e9
--- /dev/null
+++ b/tracecmd/trace-listen.c
@@ -0,0 +1,1201 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#define _LARGEFILE64_SOURCE
+#include <dirent.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+#include <netdb.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <errno.h>
+
+#ifdef VSOCK
+#include <linux/vm_sockets.h>
+#endif
+
+#include "trace-local.h"
+#include "trace-msg.h"
+
+#define dprint(fmt, ...) tracecmd_debug(fmt, ##__VA_ARGS__)
+
+#define MAX_OPTION_SIZE 4096
+
+#define _VAR_DIR_Q(dir) #dir
+#define VAR_DIR_Q(dir) _VAR_DIR_Q(dir)
+
+#define VAR_RUN_DIR VAR_DIR_Q(VAR_DIR) "/run"
+
+static char *default_output_dir = ".";
+static char *output_dir;
+static char *default_output_file = "trace";
+static char *output_file;
+
+static bool use_vsock;
+
+static int backlog = 5;
+
+static int do_daemon;
+
+/* Used for signaling INT to finish */
+static struct tracecmd_msg_handle *stop_msg_handle;
+static bool done;
+
+#define pdie(fmt, ...) \
+ do { \
+ tracecmd_plog_error(fmt, ##__VA_ARGS__);\
+ remove_pid_file(); \
+ exit(-1); \
+ } while (0)
+
+#define TEMP_FILE_STR "%s.%s:%s.cpu%d", output_file, host, port, cpu
+static char *get_temp_file(const char *host, const char *port, int cpu)
+{
+ char *file = NULL;
+ int size;
+
+ size = snprintf(file, 0, TEMP_FILE_STR);
+ file = malloc(size + 1);
+ if (!file)
+ return NULL;
+ sprintf(file, TEMP_FILE_STR);
+
+ return file;
+}
+
+static void put_temp_file(char *file)
+{
+ free(file);
+}
+
+static void signal_setup(int sig, sighandler_t handle)
+{
+ struct sigaction action;
+
+ sigaction(sig, NULL, &action);
+ /* Make accept return EINTR */
+ action.sa_flags &= ~SA_RESTART;
+ action.sa_handler = handle;
+ sigaction(sig, &action, NULL);
+}
+
+static void delete_temp_file(const char *host, const char *port, int cpu)
+{
+ char file[PATH_MAX];
+
+ snprintf(file, PATH_MAX, TEMP_FILE_STR);
+ unlink(file);
+}
+
+static int read_string(int fd, char *buf, size_t size)
+{
+ size_t i;
+ int n;
+
+ for (i = 0; i < size; i++) {
+ n = read(fd, buf+i, 1);
+ if (!buf[i] || n <= 0)
+ break;
+ }
+
+ return i;
+}
+
+static int process_option(struct tracecmd_msg_handle *msg_handle, char *option)
+{
+ /* currently the only option we have is to us TCP */
+ if (strcmp(option, "TCP") == 0) {
+ msg_handle->flags |= TRACECMD_MSG_FL_USE_TCP;
+ return 1;
+ }
+ return 0;
+}
+
+static void finish(int sig)
+{
+ if (stop_msg_handle)
+ tracecmd_msg_set_done(stop_msg_handle);
+ done = true;
+}
+
+static void make_pid_name(int mode, char *buf)
+{
+ snprintf(buf, PATH_MAX, VAR_RUN_DIR "/trace-cmd-net.pid");
+}
+
+static void remove_pid_file(void)
+{
+ char buf[PATH_MAX];
+ int mode = do_daemon;
+
+ if (!do_daemon)
+ return;
+
+ make_pid_name(mode, buf);
+
+ unlink(buf);
+}
+
+static int process_child(int sfd, const char *host, const char *port,
+ int cpu, int page_size, enum port_type type)
+{
+ struct sockaddr_storage peer_addr;
+#ifdef VSOCK
+ struct sockaddr_vm vm_addr;
+#endif
+ struct sockaddr *addr;
+ socklen_t addr_len;
+ char buf[page_size];
+ char *tempfile;
+ int left;
+ int cfd;
+ int fd;
+ int r, w;
+ int once = 0;
+
+ signal_setup(SIGUSR1, finish);
+
+ tempfile = get_temp_file(host, port, cpu);
+ if (!tempfile)
+ return -ENOMEM;
+
+ fd = open(tempfile, O_WRONLY | O_TRUNC | O_CREAT, 0644);
+ if (fd < 0)
+ pdie("creating %s", tempfile);
+
+ if (type == USE_TCP) {
+ addr = (struct sockaddr *)&peer_addr;
+ addr_len = sizeof(peer_addr);
+#ifdef VSOCK
+ } else if (type == USE_VSOCK) {
+ addr = (struct sockaddr *)&vm_addr;
+ addr_len = sizeof(vm_addr);
+#endif
+ }
+
+ if (type == USE_TCP || type == USE_VSOCK) {
+ if (listen(sfd, backlog) < 0)
+ pdie("listen");
+
+ cfd = accept(sfd, addr, &addr_len);
+ if (cfd < 0 && errno == EINTR)
+ goto done;
+ if (cfd < 0)
+ pdie("accept");
+ close(sfd);
+ sfd = cfd;
+ }
+
+ for (;;) {
+ /* TODO, make this copyless! */
+ r = read(sfd, buf, page_size);
+ if (r < 0) {
+ if (errno == EINTR)
+ break;
+ pdie("reading pages from client");
+ }
+ if (!r)
+ break;
+ /* UDP requires that we get the full size in one go */
+ if (type == USE_UDP && r < page_size && !once) {
+ once = 1;
+ warning("read %d bytes, expected %d", r, page_size);
+ }
+
+ left = r;
+ do {
+ w = write(fd, buf + (r - left), left);
+ if (w > 0)
+ left -= w;
+ } while (w >= 0 && left);
+ }
+
+ done:
+ put_temp_file(tempfile);
+ exit(0);
+}
+
+static int setup_vsock_port(int start_port, int *sfd)
+{
+ int sd;
+
+ sd = trace_vsock_make(start_port);
+ if (sd < 0)
+ return -errno;
+ *sfd = sd;
+
+ return start_port;
+}
+
+int trace_net_make(int port, enum port_type type)
+{
+ struct addrinfo hints;
+ struct addrinfo *result, *rp;
+ char buf[BUFSIZ];
+ int sd;
+ int s;
+
+ snprintf(buf, BUFSIZ, "%d", port);
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = AF_UNSPEC;
+ hints.ai_flags = AI_PASSIVE;
+
+ switch (type) {
+ case USE_TCP:
+ hints.ai_socktype = SOCK_STREAM;
+ break;
+ case USE_UDP:
+ hints.ai_socktype = SOCK_DGRAM;
+ break;
+ default:
+ return -1;
+ }
+
+ s = getaddrinfo(NULL, buf, &hints, &result);
+ if (s != 0)
+ pdie("getaddrinfo: error opening socket");
+
+ for (rp = result; rp != NULL; rp = rp->ai_next) {
+ sd = socket(rp->ai_family, rp->ai_socktype,
+ rp->ai_protocol);
+ if (sd < 0)
+ continue;
+
+ if (bind(sd, rp->ai_addr, rp->ai_addrlen) == 0)
+ break;
+
+ close(sd);
+ }
+ freeaddrinfo(result);
+
+ if (rp == NULL)
+ return -1;
+
+ dprint("Create listen port: %d fd:%d\n", port, sd);
+
+ return sd;
+}
+
+int trace_net_search(int start_port, int *sfd, enum port_type type)
+{
+ int num_port = start_port;
+
+ if (type == USE_VSOCK)
+ return setup_vsock_port(start_port, sfd);
+ again:
+ *sfd = trace_net_make(num_port, type);
+ if (*sfd < 0) {
+ if (++num_port > MAX_PORT_SEARCH)
+ pdie("No available ports to bind");
+ goto again;
+ }
+
+ return num_port;
+}
+
+static void fork_reader(int sfd, const char *node, const char *port,
+ int *pid, int cpu, int pagesize, enum port_type type)
+{
+ int ret;
+
+ *pid = fork();
+
+ if (*pid < 0)
+ pdie("creating reader");
+
+ if (!*pid) {
+ ret = process_child(sfd, node, port, cpu, pagesize, type);
+ if (ret < 0)
+ pdie("Problem with reader %d", ret);
+ }
+
+ close(sfd);
+}
+
+static int open_port(const char *node, const char *port, int *pid,
+ int cpu, int pagesize, int start_port, enum port_type type)
+{
+ int sfd;
+ int num_port;
+
+ /*
+ * trace_net_search() currently does not return an error, but if that
+ * changes in the future, we have a check for it now.
+ */
+ num_port = trace_net_search(start_port, &sfd, type);
+ if (num_port < 0)
+ return num_port;
+
+ fork_reader(sfd, node, port, pid, cpu, pagesize, type);
+
+ return num_port;
+}
+
+static int communicate_with_client(struct tracecmd_msg_handle *msg_handle)
+{
+ char *last_proto = NULL;
+ char buf[BUFSIZ];
+ char *option;
+ int pagesize = 0;
+ int options;
+ int size;
+ int cpus;
+ int n, s, t, i;
+ int ret = -EINVAL;
+ int fd = msg_handle->fd;
+
+ /* Let the client know what we are */
+ write(fd, "tracecmd", 8);
+
+ try_again:
+ /* read back the CPU count */
+ n = read_string(fd, buf, BUFSIZ);
+ if (n == BUFSIZ)
+ /** ERROR **/
+ return -EINVAL;
+
+ cpus = atoi(buf);
+
+ /* Is the client using the new protocol? */
+ if (cpus == -1) {
+ if (memcmp(buf, V3_CPU, n) != 0) {
+ /* If it did not send a version, then bail */
+ if (memcmp(buf, "-1V", 3)) {
+ tracecmd_plog("Unknown string %s\n", buf);
+ goto out;
+ }
+ /* Skip "-1" */
+ tracecmd_plog("Cannot handle the protocol %s\n", buf+2);
+
+ /* If it returned the same command as last time, bail! */
+ if (last_proto && strncmp(last_proto, buf, n) == 0) {
+ tracecmd_plog("Repeat of version %s sent\n", last_proto);
+ goto out;
+ }
+ free(last_proto);
+ last_proto = malloc(n + 1);
+ if (last_proto) {
+ memcpy(last_proto, buf, n);
+ last_proto[n] = 0;
+ }
+ /* Return the highest protocol we can use */
+ write(fd, "V3", 3);
+ goto try_again;
+ }
+
+ /* Let the client know we use v3 protocol */
+ write(fd, "V3", 3);
+
+ /* read the rest of dummy data */
+ n = read(fd, buf, sizeof(V3_MAGIC));
+ if (memcmp(buf, V3_MAGIC, n) != 0)
+ goto out;
+
+ /* We're off! */
+ write(fd, "OK", 2);
+
+ msg_handle->version = V3_PROTOCOL;
+
+ /* read the CPU count, the page size, and options */
+ if ((pagesize = tracecmd_msg_initial_setting(msg_handle)) < 0)
+ goto out;
+ } else {
+ /* The client is using the v1 protocol */
+
+ tracecmd_plog("cpus=%d\n", cpus);
+ if (cpus < 0)
+ goto out;
+
+ msg_handle->cpu_count = cpus;
+
+ /* next read the page size */
+ n = read_string(fd, buf, BUFSIZ);
+ if (n == BUFSIZ)
+ /** ERROR **/
+ goto out;
+
+ pagesize = atoi(buf);
+
+ tracecmd_plog("pagesize=%d\n", pagesize);
+ if (pagesize <= 0)
+ goto out;
+
+ /* Now the number of options */
+ n = read_string(fd, buf, BUFSIZ);
+ if (n == BUFSIZ)
+ /** ERROR **/
+ return -EINVAL;
+
+ options = atoi(buf);
+
+ for (i = 0; i < options; i++) {
+ /* next is the size of the options */
+ n = read_string(fd, buf, BUFSIZ);
+ if (n == BUFSIZ)
+ /** ERROR **/
+ goto out;
+ size = atoi(buf);
+ /* prevent a client from killing us */
+ if (size > MAX_OPTION_SIZE)
+ goto out;
+
+ ret = -ENOMEM;
+ option = malloc(size);
+ if (!option)
+ goto out;
+
+ ret = -EIO;
+ do {
+ t = size;
+ s = 0;
+ s = read(fd, option+s, t);
+ if (s <= 0)
+ goto out;
+ t -= s;
+ s = size - t;
+ } while (t);
+
+ s = process_option(msg_handle, option);
+ free(option);
+ /* do we understand this option? */
+ ret = -EINVAL;
+ if (!s)
+ goto out;
+ }
+ }
+
+ if (msg_handle->flags & TRACECMD_MSG_FL_USE_TCP)
+ tracecmd_plog("Using TCP for live connection\n");
+
+ ret = pagesize;
+ out:
+ free(last_proto);
+
+ return ret;
+}
+
+static int create_client_file(const char *node, const char *port)
+{
+ char buf[BUFSIZ];
+ int ofd;
+
+ snprintf(buf, BUFSIZ, "%s.%s:%s.dat", output_file, node, port);
+
+ ofd = open(buf, O_RDWR | O_CREAT | O_TRUNC, 0644);
+ if (ofd < 0)
+ pdie("Can not create file %s", buf);
+ return ofd;
+}
+
+static void destroy_all_readers(int cpus, int *pid_array, const char *node,
+ const char *port)
+{
+ int cpu;
+
+ for (cpu = 0; cpu < cpus; cpu++) {
+ if (pid_array[cpu] > 0) {
+ kill(pid_array[cpu], SIGKILL);
+ waitpid(pid_array[cpu], NULL, 0);
+ delete_temp_file(node, port, cpu);
+ pid_array[cpu] = 0;
+ }
+ }
+
+ free(pid_array);
+}
+
+static int *create_all_readers(const char *node, const char *port,
+ int pagesize, struct tracecmd_msg_handle *msg_handle)
+{
+ enum port_type port_type = USE_UDP;
+ char buf[BUFSIZ];
+ unsigned int *port_array;
+ int *pid_array;
+ unsigned int start_port;
+ unsigned int connect_port;
+ int cpus = msg_handle->cpu_count;
+ int cpu;
+ int pid;
+
+ if (!pagesize)
+ return NULL;
+
+ if (msg_handle->flags & TRACECMD_MSG_FL_USE_TCP)
+ port_type = USE_TCP;
+ else if (msg_handle->flags & TRACECMD_MSG_FL_USE_VSOCK)
+ port_type = USE_VSOCK;
+
+ port_array = malloc(sizeof(*port_array) * cpus);
+ if (!port_array)
+ return NULL;
+
+ pid_array = malloc(sizeof(*pid_array) * cpus);
+ if (!pid_array) {
+ free(port_array);
+ return NULL;
+ }
+
+ memset(pid_array, 0, sizeof(int) * cpus);
+
+ start_port = START_PORT_SEARCH;
+
+ /* Now create a port for each CPU */
+ for (cpu = 0; cpu < cpus; cpu++) {
+ connect_port = open_port(node, port, &pid, cpu,
+ pagesize, start_port, port_type);
+ if (connect_port < 0)
+ goto out_free;
+ port_array[cpu] = connect_port;
+ pid_array[cpu] = pid;
+ /*
+ * Due to some bugging finding ports,
+ * force search after last port
+ */
+ start_port = connect_port + 1;
+ }
+
+ if (msg_handle->version == V3_PROTOCOL) {
+ /* send set of port numbers to the client */
+ if (tracecmd_msg_send_port_array(msg_handle, port_array) < 0) {
+ tracecmd_plog("Failed sending port array\n");
+ goto out_free;
+ }
+ } else {
+ /* send the client a comma deliminated set of port numbers */
+ for (cpu = 0; cpu < cpus; cpu++) {
+ snprintf(buf, BUFSIZ, "%s%d",
+ cpu ? "," : "", port_array[cpu]);
+ write(msg_handle->fd, buf, strlen(buf));
+ }
+ /* end with null terminator */
+ write(msg_handle->fd, "\0", 1);
+ }
+
+ free(port_array);
+ return pid_array;
+
+ out_free:
+ free(port_array);
+ destroy_all_readers(cpus, pid_array, node, port);
+ return NULL;
+}
+
+static int
+collect_metadata_from_client(struct tracecmd_msg_handle *msg_handle,
+ int ofd)
+{
+ char buf[BUFSIZ];
+ int n, s, t;
+ int ifd = msg_handle->fd;
+ int ret = 0;
+
+ do {
+ n = read(ifd, buf, BUFSIZ);
+ if (n < 0) {
+ if (errno == EINTR)
+ continue;
+ ret = -errno;
+ break;
+ }
+ t = n;
+ s = 0;
+ do {
+ s = write(ofd, buf+s, t);
+ if (s < 0) {
+ if (errno == EINTR)
+ break;
+ ret = -errno;
+ goto out;
+ }
+ t -= s;
+ s = n - t;
+ } while (t);
+ } while (n > 0 && !tracecmd_msg_done(msg_handle));
+
+out:
+ return ret;
+}
+
+static void stop_all_readers(int cpus, int *pid_array)
+{
+ int cpu;
+
+ for (cpu = 0; cpu < cpus; cpu++) {
+ if (pid_array[cpu] > 0)
+ kill(pid_array[cpu], SIGUSR1);
+ }
+}
+
+static int put_together_file(int cpus, int ofd, const char *node,
+ const char *port, bool write_options)
+{
+ struct tracecmd_output *handle = NULL;
+ char **temp_files;
+ int cpu;
+ int ret = -ENOMEM;
+
+ /* Now put together the file */
+ temp_files = malloc(sizeof(*temp_files) * cpus);
+ if (!temp_files)
+ return -ENOMEM;
+
+ for (cpu = 0; cpu < cpus; cpu++) {
+ temp_files[cpu] = get_temp_file(node, port, cpu);
+ if (!temp_files[cpu])
+ goto out;
+ }
+
+ handle = tracecmd_get_output_handle_fd(ofd);
+ if (!handle) {
+ ret = -1;
+ goto out;
+ }
+
+ if (write_options) {
+ ret = tracecmd_write_cpus(handle, cpus);
+ if (ret)
+ goto out;
+ ret = tracecmd_write_buffer_info(handle);
+ if (ret)
+ goto out;
+ ret = tracecmd_write_options(handle);
+ if (ret)
+ goto out;
+ }
+ ret = tracecmd_write_cpu_data(handle, cpus, temp_files, NULL);
+
+out:
+ tracecmd_output_close(handle);
+ for (cpu--; cpu >= 0; cpu--) {
+ put_temp_file(temp_files[cpu]);
+ }
+ free(temp_files);
+ return ret;
+}
+
+static int process_client(struct tracecmd_msg_handle *msg_handle,
+ const char *node, const char *port)
+{
+ int *pid_array;
+ int pagesize;
+ int cpus;
+ int ofd;
+ int ret;
+
+ pagesize = communicate_with_client(msg_handle);
+ if (pagesize < 0)
+ return pagesize;
+
+ ofd = create_client_file(node, port);
+
+ pid_array = create_all_readers(node, port, pagesize, msg_handle);
+ if (!pid_array)
+ return -ENOMEM;
+
+ /* on signal stop this msg */
+ stop_msg_handle = msg_handle;
+
+ /* Now we are ready to start reading data from the client */
+ if (msg_handle->version == V3_PROTOCOL)
+ ret = tracecmd_msg_collect_data(msg_handle, ofd);
+ else
+ ret = collect_metadata_from_client(msg_handle, ofd);
+ stop_msg_handle = NULL;
+
+ /* wait a little to let our readers finish reading */
+ sleep(1);
+
+ cpus = msg_handle->cpu_count;
+
+ /* stop our readers */
+ stop_all_readers(cpus, pid_array);
+
+ /* wait a little to have the readers clean up */
+ sleep(1);
+
+ if (!ret)
+ ret = put_together_file(cpus, ofd, node, port,
+ msg_handle->version < V3_PROTOCOL);
+
+ destroy_all_readers(cpus, pid_array, node, port);
+
+ return ret;
+}
+
+static int do_fork(int cfd)
+{
+ pid_t pid;
+
+ /* in debug mode, we do not fork off children */
+ if (tracecmd_get_debug())
+ return 0;
+
+ pid = fork();
+ if (pid < 0) {
+ warning("failed to create child");
+ return -1;
+ }
+
+ if (pid > 0) {
+ close(cfd);
+ return pid;
+ }
+
+ signal_setup(SIGINT, finish);
+
+ return 0;
+}
+
+bool trace_net_cmp_connection(struct sockaddr_storage *addr, const char *name)
+{
+ char host[NI_MAXHOST], nhost[NI_MAXHOST];
+ char service[NI_MAXSERV];
+ socklen_t addr_len = sizeof(*addr);
+ struct addrinfo *result, *rp;
+ struct addrinfo hints;
+ bool found = false;
+ int s;
+
+ if (getnameinfo((struct sockaddr *)addr, addr_len,
+ host, NI_MAXHOST,
+ service, NI_MAXSERV, NI_NUMERICSERV))
+ return -1;
+
+ if (strcmp(host, name) == 0)
+ return true;
+
+ /* Check other IPs that name could be for */
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = AF_UNSPEC;
+ hints.ai_socktype = SOCK_STREAM;
+
+ /* Check other IPs that name could be for */
+ s = getaddrinfo(name, NULL, &hints, &result);
+ if (s != 0)
+ return false;
+
+ for (rp = result; rp != NULL; rp = rp->ai_next) {
+ if (getnameinfo(rp->ai_addr, rp->ai_addrlen,
+ nhost, NI_MAXHOST,
+ service, NI_MAXSERV, NI_NUMERICSERV))
+ continue;
+ if (strcmp(host, nhost) == 0) {
+ found = 1;
+ break;
+ }
+ }
+
+ freeaddrinfo(result);
+ return found;
+}
+
+bool trace_net_cmp_connection_fd(int fd, const char *name)
+{
+ struct sockaddr_storage addr;
+ socklen_t addr_len = sizeof(addr);
+
+ if (getpeername(fd, (struct sockaddr *)&addr, &addr_len))
+ return false;
+
+ return trace_net_cmp_connection(&addr, name);
+};
+
+int trace_net_print_connection(int fd)
+{
+ char host[NI_MAXHOST], service[NI_MAXSERV];
+ struct sockaddr_storage net_addr;
+ socklen_t addr_len;
+
+ addr_len = sizeof(net_addr);
+ if (getpeername(fd, (struct sockaddr *)&net_addr, &addr_len))
+ return -1;
+
+ if (getnameinfo((struct sockaddr *)&net_addr, addr_len,
+ host, NI_MAXHOST,
+ service, NI_MAXSERV, NI_NUMERICSERV))
+ return -1;
+
+ if (tracecmd_get_debug())
+ tracecmd_debug("Connected to %s:%s fd:%d\n", host, service, fd);
+ else
+ tracecmd_plog("Connected to %s:%s\n", host, service);
+ return 0;
+}
+
+static int do_connection(int cfd, struct sockaddr *addr,
+ socklen_t addr_len)
+{
+ struct tracecmd_msg_handle *msg_handle;
+ char host[NI_MAXHOST], service[NI_MAXSERV];
+ int s;
+ int ret;
+
+ ret = do_fork(cfd);
+ if (ret)
+ return ret;
+
+ msg_handle = tracecmd_msg_handle_alloc(cfd, 0);
+
+ if (use_vsock) {
+#ifdef VSOCK
+ struct sockaddr_vm *vm_addr = (struct sockaddr_vm *)addr;
+ snprintf(host, NI_MAXHOST, "V%d", vm_addr->svm_cid);
+ snprintf(service, NI_MAXSERV, "%d", vm_addr->svm_port);
+#endif
+ } else {
+ s = getnameinfo((struct sockaddr *)addr, addr_len,
+ host, NI_MAXHOST,
+ service, NI_MAXSERV, NI_NUMERICSERV);
+
+ if (s == 0)
+ tracecmd_plog("Connected with %s:%s\n", host, service);
+ else {
+ tracecmd_plog("Error with getnameinfo: %s\n", gai_strerror(s));
+ close(cfd);
+ tracecmd_msg_handle_close(msg_handle);
+ return -1;
+ }
+ }
+
+ process_client(msg_handle, host, service);
+
+ tracecmd_msg_handle_close(msg_handle);
+
+ if (!tracecmd_get_debug())
+ exit(0);
+
+ return 0;
+}
+
+static int *client_pids;
+static int free_pids;
+static int saved_pids;
+
+static void add_process(int pid)
+{
+ int *client = NULL;
+ int i;
+
+ if (free_pids) {
+ for (i = 0; i < saved_pids; i++) {
+ if (!client_pids[i]) {
+ client = &client_pids[i];
+ break;
+ }
+ }
+ free_pids--;
+ if (!client)
+ warning("Could not find free pid");
+ }
+ if (!client) {
+ client_pids = realloc(client_pids,
+ sizeof(*client_pids) * (saved_pids + 1));
+ if (!client_pids)
+ pdie("allocating pids");
+ client = &client_pids[saved_pids++];
+ }
+ *client = pid;
+}
+
+static void remove_process(int pid)
+{
+ int i;
+
+ for (i = 0; i < saved_pids; i++) {
+ if (client_pids[i] == pid)
+ break;
+ }
+
+ if (i == saved_pids)
+ return;
+
+ client_pids[i] = 0;
+ free_pids++;
+}
+
+static void kill_clients(void)
+{
+ int status;
+ int i;
+
+ for (i = 0; i < saved_pids; i++) {
+ if (!client_pids[i])
+ continue;
+ /* Only kill the clients if we received SIGINT or SIGTERM */
+ if (done)
+ kill(client_pids[i], SIGINT);
+ waitpid(client_pids[i], &status, 0);
+ }
+
+ saved_pids = 0;
+}
+
+static void clean_up(void)
+{
+ int status;
+ int ret;
+
+ /* Clean up any children that has started before */
+ do {
+ ret = waitpid(0, &status, WNOHANG);
+ if (ret > 0)
+ remove_process(ret);
+ } while (ret > 0);
+}
+
+static void do_accept_loop(int sfd)
+{
+ struct sockaddr_storage peer_addr;
+#ifdef VSOCK
+ struct sockaddr_vm vm_addr;
+#endif
+ struct sockaddr *addr;
+ socklen_t addr_len;
+ int cfd, pid;
+
+ if (use_vsock) {
+#ifdef VSOCK
+ addr = (struct sockaddr *)&vm_addr;
+ addr_len = sizeof(vm_addr);
+#endif
+ } else {
+ addr = (struct sockaddr *)&peer_addr;
+ addr_len = sizeof(peer_addr);
+ }
+
+ do {
+ cfd = accept(sfd, addr, &addr_len);
+ if (cfd < 0 && errno == EINTR) {
+ clean_up();
+ continue;
+ }
+ if (cfd < 0)
+ pdie("connecting");
+
+ pid = do_connection(cfd, addr, addr_len);
+ if (pid > 0)
+ add_process(pid);
+
+ } while (!done);
+ /* Get any final stragglers */
+ clean_up();
+}
+
+static void make_pid_file(void)
+{
+ char buf[PATH_MAX];
+ int mode = do_daemon;
+ int fd;
+
+ if (!do_daemon)
+ return;
+
+ make_pid_name(mode, buf);
+
+ fd = open(buf, O_WRONLY | O_CREAT | O_TRUNC, 0644);
+ if (fd < 0) {
+ perror(buf);
+ return;
+ }
+
+ sprintf(buf, "%d\n", getpid());
+ write(fd, buf, strlen(buf));
+ close(fd);
+}
+
+static void sigstub(int sig)
+{
+}
+
+static int get_vsock(const char *port)
+{
+ unsigned int cid;
+ int sd;
+
+ sd = trace_vsock_make(atoi(port));
+ if (sd < 0)
+ return sd;
+
+ cid = trace_vsock_local_cid();
+ if (cid >= 0)
+ printf("listening on @%u:%s\n", cid, port);
+
+ return sd;
+}
+
+static int get_network(char *port)
+{
+ struct addrinfo hints;
+ struct addrinfo *result, *rp;
+ int sfd, s;
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = AF_UNSPEC;
+ hints.ai_socktype = SOCK_STREAM;
+ hints.ai_flags = AI_PASSIVE;
+
+ s = getaddrinfo(NULL, port, &hints, &result);
+ if (s != 0)
+ pdie("getaddrinfo: error opening %s", port);
+
+ for (rp = result; rp != NULL; rp = rp->ai_next) {
+ sfd = socket(rp->ai_family, rp->ai_socktype,
+ rp->ai_protocol);
+ if (sfd < 0)
+ continue;
+
+ if (bind(sfd, rp->ai_addr, rp->ai_addrlen) == 0)
+ break;
+
+ close(sfd);
+ }
+
+ if (rp == NULL)
+ pdie("Could not bind");
+
+ freeaddrinfo(result);
+
+ return sfd;
+}
+
+static void do_listen(char *port)
+{
+ int sfd;
+
+ if (!tracecmd_get_debug())
+ signal_setup(SIGCHLD, sigstub);
+
+ make_pid_file();
+
+ if (use_vsock)
+ sfd = get_vsock(port);
+ else
+ sfd = get_network(port);
+
+
+ if (listen(sfd, backlog) < 0)
+ pdie("listen");
+
+ do_accept_loop(sfd);
+
+ kill_clients();
+
+ remove_pid_file();
+}
+
+static void start_daemon(void)
+{
+ do_daemon = 1;
+
+ if (daemon(1, 0) < 0)
+ die("starting daemon");
+}
+
+enum {
+ OPT_verbose = 254,
+ OPT_debug = 255,
+};
+
+void trace_listen(int argc, char **argv)
+{
+ char *logfile = NULL;
+ char *port = NULL;
+ int daemon = 0;
+ int c;
+
+ if (argc < 2)
+ usage(argv);
+
+ if (strcmp(argv[1], "listen") != 0)
+ usage(argv);
+
+ for (;;) {
+ int option_index = 0;
+ static struct option long_options[] = {
+ {"port", required_argument, NULL, 'p'},
+ {"help", no_argument, NULL, '?'},
+ {"debug", no_argument, NULL, OPT_debug},
+ {"verbose", optional_argument, NULL, OPT_verbose},
+ {NULL, 0, NULL, 0}
+ };
+
+ c = getopt_long (argc-1, argv+1, "+hp:Vo:d:l:D",
+ long_options, &option_index);
+ if (c == -1)
+ break;
+ switch (c) {
+ case 'h':
+ usage(argv);
+ break;
+ case 'p':
+ port = optarg;
+ break;
+ case 'd':
+ output_dir = optarg;
+ break;
+ case 'V':
+ use_vsock = true;
+ break;
+ case 'o':
+ output_file = optarg;
+ break;
+ case 'l':
+ logfile = optarg;
+ break;
+ case 'D':
+ daemon = 1;
+ break;
+ case OPT_debug:
+ tracecmd_set_debug(true);
+ break;
+ case OPT_verbose:
+ if (trace_set_verbose(optarg) < 0)
+ die("invalid verbose level %s", optarg);
+ break;
+ default:
+ usage(argv);
+ }
+ }
+
+ if (!port)
+ usage(argv);
+
+ if ((argc - optind) >= 2)
+ usage(argv);
+
+ if (!output_file)
+ output_file = default_output_file;
+
+ if (!output_dir)
+ output_dir = default_output_dir;
+
+ if (logfile) {
+ /* set the writes to a logfile instead */
+ if (tracecmd_set_logfile(logfile) < 0)
+ die("creating log file %s", logfile);
+ }
+
+ if (chdir(output_dir) < 0)
+ die("Can't access directory %s", output_dir);
+
+ if (daemon)
+ start_daemon();
+
+ signal_setup(SIGINT, finish);
+ signal_setup(SIGTERM, finish);
+
+ do_listen(port);
+
+ return;
+}
diff --git a/tracecmd/trace-mem.c b/tracecmd/trace-mem.c
new file mode 100644
index 00000000..25eb0861
--- /dev/null
+++ b/tracecmd/trace-mem.c
@@ -0,0 +1,564 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2013 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ *
+ * This code was inspired by Ezequiel Garcia's trace_analyze program:
+ * git://github.com/ezequielgarcia/trace_analyze.git
+ *
+ * Unfortuntately, I hate working with Python, and I also had trouble
+ * getting it to work, as I had an old python on my Fedora 13, and it
+ * was written for the newer version. I decided to do some of it here
+ * in C.
+ */
+#define _LARGEFILE64_SOURCE
+#include <dirent.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <signal.h>
+
+#include "trace-local.h"
+#include "trace-hash-local.h"
+#include "list.h"
+
+static int kmalloc_type;
+static int kmalloc_node_type;
+static int kfree_type;
+static int kmem_cache_alloc_type;
+static int kmem_cache_alloc_node_type;
+static int kmem_cache_free_type;
+
+static struct tep_format_field *common_type_mem;
+
+static struct tep_format_field *kmalloc_callsite_field;
+static struct tep_format_field *kmalloc_bytes_req_field;
+static struct tep_format_field *kmalloc_bytes_alloc_field;
+static struct tep_format_field *kmalloc_ptr_field;
+
+static struct tep_format_field *kmalloc_node_callsite_field;
+static struct tep_format_field *kmalloc_node_bytes_req_field;
+static struct tep_format_field *kmalloc_node_bytes_alloc_field;
+static struct tep_format_field *kmalloc_node_ptr_field;
+
+static struct tep_format_field *kfree_ptr_field;
+
+static struct tep_format_field *kmem_cache_callsite_field;
+static struct tep_format_field *kmem_cache_bytes_req_field;
+static struct tep_format_field *kmem_cache_bytes_alloc_field;
+static struct tep_format_field *kmem_cache_ptr_field;
+
+static struct tep_format_field *kmem_cache_node_callsite_field;
+static struct tep_format_field *kmem_cache_node_bytes_req_field;
+static struct tep_format_field *kmem_cache_node_bytes_alloc_field;
+static struct tep_format_field *kmem_cache_node_ptr_field;
+
+static struct tep_format_field *kmem_cache_free_ptr_field;
+
+static void *zalloc(size_t size)
+{
+ return calloc(1, size);
+}
+
+static struct tep_event *
+update_event(struct tep_handle *pevent,
+ const char *sys, const char *name, int *id)
+{
+ struct tep_event *event;
+
+ event = tep_find_event_by_name(pevent, sys, name);
+ if (!event)
+ return NULL;
+
+ *id = event->id;
+
+ return event;
+}
+
+static void update_kmalloc(struct tep_handle *pevent)
+{
+ struct tep_event *event;
+
+ event = update_event(pevent, "kmem", "kmalloc", &kmalloc_type);
+ if (!event)
+ return;
+
+ kmalloc_callsite_field = tep_find_field(event, "call_site");
+ kmalloc_bytes_req_field = tep_find_field(event, "bytes_req");
+ kmalloc_bytes_alloc_field = tep_find_field(event, "bytes_alloc");
+ kmalloc_ptr_field = tep_find_field(event, "ptr");
+}
+
+static void update_kmalloc_node(struct tep_handle *pevent)
+{
+ struct tep_event *event;
+
+ event = update_event(pevent, "kmem", "kmalloc_node", &kmalloc_node_type);
+ if (!event)
+ return;
+
+ kmalloc_node_callsite_field = tep_find_field(event, "call_site");
+ kmalloc_node_bytes_req_field = tep_find_field(event, "bytes_req");
+ kmalloc_node_bytes_alloc_field = tep_find_field(event, "bytes_alloc");
+ kmalloc_node_ptr_field = tep_find_field(event, "ptr");
+}
+
+static void update_kfree(struct tep_handle *pevent)
+{
+ struct tep_event *event;
+
+ event = update_event(pevent, "kmem", "kfree", &kfree_type);
+ if (!event)
+ return;
+
+ kfree_ptr_field = tep_find_field(event, "ptr");
+}
+
+static void update_kmem_cache_alloc(struct tep_handle *pevent)
+{
+ struct tep_event *event;
+
+ event = update_event(pevent, "kmem", "kmem_cache_alloc", &kmem_cache_alloc_type);
+ if (!event)
+ return;
+
+ kmem_cache_callsite_field = tep_find_field(event, "call_site");
+ kmem_cache_bytes_req_field = tep_find_field(event, "bytes_req");
+ kmem_cache_bytes_alloc_field = tep_find_field(event, "bytes_alloc");
+ kmem_cache_ptr_field = tep_find_field(event, "ptr");
+}
+
+static void update_kmem_cache_alloc_node(struct tep_handle *pevent)
+{
+ struct tep_event *event;
+
+ event = update_event(pevent, "kmem", "kmem_cache_alloc_node",
+ &kmem_cache_alloc_node_type);
+ if (!event)
+ return;
+
+ kmem_cache_node_callsite_field = tep_find_field(event, "call_site");
+ kmem_cache_node_bytes_req_field = tep_find_field(event, "bytes_req");
+ kmem_cache_node_bytes_alloc_field = tep_find_field(event, "bytes_alloc");
+ kmem_cache_node_ptr_field = tep_find_field(event, "ptr");
+}
+
+static void update_kmem_cache_free(struct tep_handle *pevent)
+{
+ struct tep_event *event;
+
+ event = update_event(pevent, "kmem", "kmem_cache_free", &kmem_cache_free_type);
+ if (!event)
+ return;
+
+ kmem_cache_free_ptr_field = tep_find_field(event, "ptr");
+}
+
+struct func_descr {
+ struct func_descr *next;
+ const char *func;
+ unsigned long total_alloc;
+ unsigned long total_req;
+ unsigned long current_alloc;
+ unsigned long current_req;
+ unsigned long max_alloc;
+ unsigned long max_req;
+ unsigned long waste;
+ unsigned long max_waste;
+};
+
+struct ptr_descr {
+ struct ptr_descr *next;
+ struct func_descr *func;
+ unsigned long long ptr;
+ unsigned long alloc;
+ unsigned long req;
+};
+
+#define HASH_BITS 12
+#define HASH_SIZE (1 << HASH_BITS)
+#define HASH_MASK (HASH_SIZE - 1);
+
+static struct func_descr *func_hash[HASH_SIZE];
+static struct ptr_descr *ptr_hash[HASH_SIZE];
+static struct func_descr **func_list;
+
+static unsigned func_count;
+
+static int make_key(const void *ptr, int size)
+{
+ int key = 0;
+ int i;
+ char *kp = (char *)&key;
+ const char *indx = ptr;
+
+ for (i = 0; i < size; i++)
+ kp[i & 3] ^= indx[i];
+
+ return trace_hash(key);
+}
+
+static struct func_descr *find_func(const char *func)
+{
+ struct func_descr *funcd;
+ int key = make_key(func, strlen(func)) & HASH_MASK;
+
+ for (funcd = func_hash[key]; funcd; funcd = funcd->next) {
+ /*
+ * As func is always a constant to one pointer,
+ * we can use a direct compare instead of strcmp.
+ */
+ if (funcd->func == func)
+ return funcd;
+ }
+
+ return NULL;
+}
+
+static struct func_descr *create_func(const char *func)
+{
+ struct func_descr *funcd;
+ int key = make_key(func, strlen(func)) & HASH_MASK;
+
+ funcd = zalloc(sizeof(*funcd));
+ if (!funcd)
+ die("malloc");
+
+ funcd->func = func;
+ funcd->next = func_hash[key];
+ func_hash[key] = funcd;
+
+ func_count++;
+
+ return funcd;
+}
+
+static struct ptr_descr *find_ptr(unsigned long long ptr)
+{
+ struct ptr_descr *ptrd;
+ int key = make_key(&ptr, sizeof(ptr)) & HASH_MASK;
+
+ for (ptrd = ptr_hash[key]; ptrd; ptrd = ptrd->next) {
+ if (ptrd->ptr == ptr)
+ return ptrd;
+ }
+
+ return NULL;
+}
+
+static struct ptr_descr *create_ptr(unsigned long long ptr)
+{
+ struct ptr_descr *ptrd;
+ int key = make_key(&ptr, sizeof(ptr)) & HASH_MASK;
+
+ ptrd = zalloc(sizeof(*ptrd));
+ if (!ptrd)
+ die("malloc");
+
+ ptrd->ptr = ptr;
+ ptrd->next = ptr_hash[key];
+ ptr_hash[key] = ptrd;
+
+ return ptrd;
+}
+
+static void remove_ptr(unsigned long long ptr)
+{
+ struct ptr_descr *ptrd, **last;
+ int key = make_key(&ptr, sizeof(ptr)) & HASH_MASK;
+
+ last = &ptr_hash[key];
+ for (ptrd = ptr_hash[key]; ptrd; ptrd = ptrd->next) {
+ if (ptrd->ptr == ptr)
+ break;
+ last = &ptrd->next;
+ }
+
+ if (!ptrd)
+ return;
+
+ *last = ptrd->next;
+ free(ptrd);
+}
+
+static void add_kmalloc(const char *func, unsigned long long ptr,
+ unsigned int req, int alloc)
+{
+ struct func_descr *funcd;
+ struct ptr_descr *ptrd;
+
+ funcd = find_func(func);
+ if (!funcd)
+ funcd = create_func(func);
+
+ funcd->total_alloc += alloc;
+ funcd->total_req += req;
+ funcd->current_alloc += alloc;
+ funcd->current_req += req;
+ if (funcd->current_alloc > funcd->max_alloc)
+ funcd->max_alloc = funcd->current_alloc;
+ if (funcd->current_req > funcd->max_req)
+ funcd->max_req = funcd->current_req;
+
+ ptrd = find_ptr(ptr);
+ if (!ptrd)
+ ptrd = create_ptr(ptr);
+
+ ptrd->alloc = alloc;
+ ptrd->req = req;
+ ptrd->func = funcd;
+}
+
+static void remove_kmalloc(unsigned long long ptr)
+{
+ struct func_descr *funcd;
+ struct ptr_descr *ptrd;
+
+ ptrd = find_ptr(ptr);
+ if (!ptrd)
+ return;
+
+ funcd = ptrd->func;
+ funcd->current_alloc -= ptrd->alloc;
+ funcd->current_req -= ptrd->req;
+
+ remove_ptr(ptr);
+}
+
+static void
+process_kmalloc(struct tep_handle *pevent, struct tep_record *record,
+ struct tep_format_field *callsite_field,
+ struct tep_format_field *bytes_req_field,
+ struct tep_format_field *bytes_alloc_field,
+ struct tep_format_field *ptr_field)
+{
+ unsigned long long callsite;
+ unsigned long long val;
+ unsigned long long ptr;
+ unsigned int req;
+ int alloc;
+ const char *func;
+
+ tep_read_number_field(callsite_field, record->data, &callsite);
+ tep_read_number_field(bytes_req_field, record->data, &val);
+ req = val;
+ tep_read_number_field(bytes_alloc_field, record->data, &val);
+ alloc = val;
+ tep_read_number_field(ptr_field, record->data, &ptr);
+
+ func = tep_find_function(pevent, callsite);
+
+ add_kmalloc(func, ptr, req, alloc);
+}
+
+static void
+process_kfree(struct tep_handle *pevent, struct tep_record *record,
+ struct tep_format_field *ptr_field)
+{
+ unsigned long long ptr;
+
+ tep_read_number_field(ptr_field, record->data, &ptr);
+
+ remove_kmalloc(ptr);
+}
+
+static void
+process_record(struct tep_handle *pevent, struct tep_record *record)
+{
+ unsigned long long val;
+ int type;
+
+ tep_read_number_field(common_type_mem, record->data, &val);
+ type = val;
+
+ if (type == kmalloc_type)
+ return process_kmalloc(pevent, record,
+ kmalloc_callsite_field,
+ kmalloc_bytes_req_field,
+ kmalloc_bytes_alloc_field,
+ kmalloc_ptr_field);
+ if (type == kmalloc_node_type)
+ return process_kmalloc(pevent, record,
+ kmalloc_node_callsite_field,
+ kmalloc_node_bytes_req_field,
+ kmalloc_node_bytes_alloc_field,
+ kmalloc_node_ptr_field);
+ if (type == kfree_type)
+ return process_kfree(pevent, record, kfree_ptr_field);
+
+ if (type == kmem_cache_alloc_type)
+ return process_kmalloc(pevent, record,
+ kmem_cache_callsite_field,
+ kmem_cache_bytes_req_field,
+ kmem_cache_bytes_alloc_field,
+ kmem_cache_ptr_field);
+ if (type == kmem_cache_alloc_node_type)
+ return process_kmalloc(pevent, record,
+ kmem_cache_node_callsite_field,
+ kmem_cache_node_bytes_req_field,
+ kmem_cache_node_bytes_alloc_field,
+ kmem_cache_node_ptr_field);
+ if (type == kmem_cache_free_type)
+ return process_kfree(pevent, record, kmem_cache_free_ptr_field);
+}
+
+static int func_cmp(const void *a, const void *b)
+{
+ const struct func_descr *fa = *(const struct func_descr **)a;
+ const struct func_descr *fb = *(const struct func_descr **)b;
+
+ if (fa->waste > fb->waste)
+ return -1;
+ if (fa->waste < fb->waste)
+ return 1;
+ return 0;
+}
+
+static void sort_list(void)
+{
+ struct func_descr *funcd;
+ int h;
+ int i = 0;
+
+ func_list = zalloc(sizeof(*func_list) * func_count);
+
+ for (h = 0; h < HASH_SIZE; h++) {
+ for (funcd = func_hash[h]; funcd; funcd = funcd->next) {
+ funcd->waste = funcd->current_alloc - funcd->current_req;
+ funcd->max_waste = funcd->max_alloc - funcd->max_req;
+ if (i == func_count)
+ die("more funcs than expected\n");
+ func_list[i++] = funcd;
+ }
+ }
+
+ qsort(func_list, func_count, sizeof(*func_list), func_cmp);
+}
+
+static void print_list(void)
+{
+ struct func_descr *funcd;
+ int i;
+
+ printf(" Function \t");
+ printf("Waste\tAlloc\treq\t\tTotAlloc TotReq\t\tMaxAlloc MaxReq\t");
+ printf("MaxWaste\n");
+ printf(" -------- \t");
+ printf("-----\t-----\t---\t\t-------- ------\t\t-------- ------\t");
+ printf("--------\n");
+
+ for (i = 0; i < func_count; i++) {
+ funcd = func_list[i];
+
+ printf("%32s\t%ld\t%ld\t%ld\t\t%8ld %8ld\t\t%8ld %8ld\t%ld\n",
+ funcd->func, funcd->waste,
+ funcd->current_alloc, funcd->current_req,
+ funcd->total_alloc, funcd->total_req,
+ funcd->max_alloc, funcd->max_req, funcd->max_waste);
+ }
+}
+
+static void do_trace_mem(struct tracecmd_input *handle)
+{
+ struct tep_handle *pevent = tracecmd_get_tep(handle);
+ struct tep_record *record;
+ struct tep_event *event;
+ int missed_events = 0;
+ int cpus;
+ int cpu;
+ int ret;
+
+ ret = tracecmd_init_data(handle);
+ if (ret < 0)
+ die("failed to init data");
+
+ if (ret > 0)
+ die("trace-cmd mem does not work with latency traces\n");
+
+ cpus = tracecmd_cpus(handle);
+
+ /* Need to get any event */
+ for (cpu = 0; cpu < cpus; cpu++) {
+ record = tracecmd_peek_data(handle, cpu);
+ if (record)
+ break;
+ }
+ if (!record)
+ die("No records found in file");
+
+ ret = tep_data_type(pevent, record);
+ event = tep_find_event(pevent, ret);
+
+ common_type_mem = tep_find_common_field(event, "common_type");
+ if (!common_type_mem)
+ die("Can't find a 'type' field?");
+
+ update_kmalloc(pevent);
+ update_kmalloc_node(pevent);
+ update_kfree(pevent);
+ update_kmem_cache_alloc(pevent);
+ update_kmem_cache_alloc_node(pevent);
+ update_kmem_cache_free(pevent);
+
+ while ((record = tracecmd_read_next_data(handle, &cpu))) {
+
+ /* record missed event */
+ if (!missed_events && record->missed_events)
+ missed_events = 1;
+
+ process_record(pevent, record);
+ tracecmd_free_record(record);
+ }
+
+ sort_list();
+ print_list();
+}
+
+void trace_mem(int argc, char **argv)
+{
+ struct tracecmd_input *handle;
+ const char *input_file = NULL;
+ int ret;
+
+ for (;;) {
+ int c;
+
+ c = getopt(argc-1, argv+1, "+hi:");
+ if (c == -1)
+ break;
+ switch (c) {
+ case 'h':
+ usage(argv);
+ break;
+ case 'i':
+ if (input_file)
+ die("Only one input for mem");
+ input_file = optarg;
+ break;
+ default:
+ usage(argv);
+ }
+ }
+
+ if ((argc - optind) >= 2) {
+ if (input_file)
+ usage(argv);
+ input_file = argv[optind + 1];
+ }
+
+ if (!input_file)
+ input_file = DEFAULT_INPUT_FILE;
+
+ handle = tracecmd_alloc(input_file, 0);
+ if (!handle)
+ die("can't open %s\n", input_file);
+
+ ret = tracecmd_read_headers(handle, 0);
+ if (ret)
+ return;
+
+ do_trace_mem(handle);
+
+ tracecmd_close(handle);
+}
diff --git a/tracecmd/trace-profile.c b/tracecmd/trace-profile.c
new file mode 100644
index 00000000..6a2cc3d0
--- /dev/null
+++ b/tracecmd/trace-profile.c
@@ -0,0 +1,2455 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2014 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+
+/** FIXME: Convert numbers based on machine and file */
+#define _LARGEFILE64_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#ifndef NO_AUDIT
+#include <libaudit.h>
+#endif
+#include "trace-local.h"
+#include "trace-hash.h"
+#include "trace-hash-local.h"
+#include "list.h"
+
+#include <linux/time64.h>
+
+#ifdef WARN_NO_AUDIT
+# warning "lib audit not found, using raw syscalls " \
+ "(install audit-libs-devel(for fedora) or libaudit-dev(for debian/ubuntu) and try again)"
+#endif
+
+#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWP"
+#define TASK_STATE_MAX 1024
+
+#define task_from_item(item) container_of(item, struct task_data, hash)
+#define start_from_item(item) container_of(item, struct start_data, hash)
+#define event_from_item(item) container_of(item, struct event_hash, hash)
+#define stack_from_item(item) container_of(item, struct stack_data, hash)
+#define group_from_item(item) container_of(item, struct group_data, hash)
+#define event_data_from_item(item) container_of(item, struct event_data, hash)
+
+static unsigned long long nsecs_per_sec(unsigned long long ts)
+{
+ return ts / NSEC_PER_SEC;
+}
+
+static unsigned long long mod_to_usec(unsigned long long ts)
+{
+ return ((ts % NSEC_PER_SEC) + NSEC_PER_USEC / 2) / NSEC_PER_USEC;
+}
+
+struct handle_data;
+struct event_hash;
+struct event_data;
+
+typedef void (*event_data_print)(struct trace_seq *s, struct event_hash *hash);
+typedef int (*handle_event_func)(struct handle_data *h, unsigned long long pid,
+ struct event_data *data,
+ struct tep_record *record, int cpu);
+
+enum event_data_type {
+ EVENT_TYPE_UNDEFINED,
+ EVENT_TYPE_STACK,
+ EVENT_TYPE_SCHED_SWITCH,
+ EVENT_TYPE_WAKEUP,
+ EVENT_TYPE_FUNC,
+ EVENT_TYPE_SYSCALL,
+ EVENT_TYPE_IRQ,
+ EVENT_TYPE_SOFTIRQ,
+ EVENT_TYPE_SOFTIRQ_RAISE,
+ EVENT_TYPE_PROCESS_EXEC,
+ EVENT_TYPE_USER_MATE,
+};
+
+struct event_data {
+ struct trace_hash_item hash;
+ int id;
+ int trace;
+ struct tep_event *event;
+
+ struct event_data *end;
+ struct event_data *start;
+
+ struct tep_format_field *pid_field;
+ struct tep_format_field *start_match_field; /* match with start */
+ struct tep_format_field *end_match_field; /* match with end */
+ struct tep_format_field *data_field; /* optional */
+
+ event_data_print print_func;
+ handle_event_func handle_event;
+ void *private;
+ int migrate; /* start/end pairs can migrate cpus */
+ int global; /* use global tasks */
+ enum event_data_type type;
+};
+
+struct stack_data {
+ struct trace_hash_item hash;
+ unsigned long long count;
+ unsigned long long time;
+ unsigned long long time_min;
+ unsigned long long ts_min;
+ unsigned long long time_max;
+ unsigned long long ts_max;
+ unsigned long long time_avg;
+ unsigned long size;
+ char caller[];
+};
+
+struct stack_holder {
+ unsigned long size;
+ void *caller;
+ struct tep_record *record;
+};
+
+struct start_data {
+ struct trace_hash_item hash;
+ struct event_data *event_data;
+ struct list_head list;
+ struct task_data *task;
+ unsigned long long timestamp;
+ unsigned long long search_val;
+ unsigned long long val;
+ int cpu;
+
+ struct stack_holder stack;
+};
+
+struct event_hash {
+ struct trace_hash_item hash;
+ struct event_data *event_data;
+ unsigned long long search_val;
+ unsigned long long val;
+ unsigned long long count;
+ unsigned long long time_total;
+ unsigned long long time_avg;
+ unsigned long long time_max;
+ unsigned long long ts_max;
+ unsigned long long time_min;
+ unsigned long long ts_min;
+ unsigned long long time_std;
+ unsigned long long last_time;
+
+ struct trace_hash stacks;
+};
+
+struct group_data {
+ struct trace_hash_item hash;
+ char *comm;
+ struct trace_hash event_hash;
+};
+
+struct task_data {
+ struct trace_hash_item hash;
+ int pid;
+ int sleeping;
+
+ char *comm;
+
+ struct trace_hash start_hash;
+ struct trace_hash event_hash;
+
+ struct task_data *proxy;
+ struct start_data *last_start;
+ struct event_hash *last_event;
+ struct tep_record *last_stack;
+ struct handle_data *handle;
+ struct group_data *group;
+};
+
+struct cpu_info {
+ int current;
+};
+
+struct sched_switch_data {
+ struct tep_format_field *prev_state;
+ int match_state;
+};
+
+struct handle_data {
+ struct handle_data *next;
+ struct tracecmd_input *handle;
+ struct tep_handle *pevent;
+
+ struct trace_hash events;
+ struct trace_hash group_hash;
+
+ struct cpu_info **cpu_data;
+
+ struct tep_format_field *common_pid;
+ struct tep_format_field *wakeup_comm;
+ struct tep_format_field *switch_prev_comm;
+ struct tep_format_field *switch_next_comm;
+
+ struct sched_switch_data sched_switch_blocked;
+ struct sched_switch_data sched_switch_preempt;
+
+ struct trace_hash task_hash;
+ struct list_head *cpu_starts;
+ struct list_head migrate_starts;
+
+ struct task_data *global_task;
+ struct task_data *global_percpu_tasks;
+
+ int cpus;
+};
+
+static struct handle_data *handles;
+static struct event_data *stacktrace_event;
+static bool merge_like_comms = false;
+
+void trace_profile_set_merge_like_comms(void)
+{
+ merge_like_comms = true;
+}
+
+static struct start_data *
+add_start(struct task_data *task,
+ struct event_data *event_data, struct tep_record *record,
+ unsigned long long search_val, unsigned long long val)
+{
+ struct start_data *start;
+
+ start = malloc(sizeof(*start));
+ if (!start)
+ return NULL;
+ memset(start, 0, sizeof(*start));
+ start->hash.key = trace_hash(search_val);
+ start->search_val = search_val;
+ start->val = val;
+ start->timestamp = record->ts;
+ start->event_data = event_data;
+ start->cpu = record->cpu;
+ start->task = task;
+ trace_hash_add(&task->start_hash, &start->hash);
+ if (event_data->migrate)
+ list_add(&start->list, &task->handle->migrate_starts);
+ else
+ list_add(&start->list, &task->handle->cpu_starts[record->cpu]);
+ return start;
+}
+
+struct event_data_match {
+ struct event_data *event_data;
+ unsigned long long search_val;
+ unsigned long long val;
+};
+
+static int match_start(struct trace_hash_item *item, void *data)
+{
+ struct start_data *start = start_from_item(item);
+ struct event_data_match *edata = data;
+
+ return start->event_data == edata->event_data &&
+ start->search_val == edata->search_val;
+}
+
+static int match_event(struct trace_hash_item *item, void *data)
+{
+ struct event_data_match *edata = data;
+ struct event_hash *event = event_from_item(item);
+
+ return event->event_data == edata->event_data &&
+ event->search_val == edata->search_val &&
+ event->val == edata->val;
+}
+
+static struct event_hash *
+find_event_hash(struct task_data *task, struct event_data_match *edata)
+{
+ struct event_hash *event_hash;
+ struct trace_hash_item *item;
+ unsigned long long key;
+
+ key = (unsigned long)edata->event_data +
+ (unsigned long)edata->search_val +
+ (unsigned long)edata->val;
+ key = trace_hash(key);
+ item = trace_hash_find(&task->event_hash, key, match_event, edata);
+ if (item)
+ return event_from_item(item);
+
+ event_hash = malloc(sizeof(*event_hash));
+ if (!event_hash)
+ return NULL;
+ memset(event_hash, 0, sizeof(*event_hash));
+
+ event_hash->event_data = edata->event_data;
+ event_hash->search_val = edata->search_val;
+ event_hash->val = edata->val;
+ event_hash->hash.key = key;
+ trace_hash_init(&event_hash->stacks, 32);
+
+ trace_hash_add(&task->event_hash, &event_hash->hash);
+
+ return event_hash;
+}
+
+static struct event_hash *
+find_start_event_hash(struct task_data *task, struct event_data *event_data,
+ struct start_data *start)
+{
+ struct event_data_match edata;
+
+ edata.event_data = event_data;
+ edata.search_val = start->search_val;
+ edata.val = start->val;
+
+ return find_event_hash(task, &edata);
+}
+
+static struct start_data *
+find_start(struct task_data *task, struct event_data *event_data,
+ unsigned long long search_val)
+{
+ unsigned long long key = trace_hash(search_val);
+ struct event_data_match edata;
+ void *data = &edata;
+ struct trace_hash_item *item;
+ struct start_data *start;
+
+ edata.event_data = event_data;
+ edata.search_val = search_val;
+
+ item = trace_hash_find(&task->start_hash, key, match_start, data);
+ if (!item)
+ return NULL;
+
+ start = start_from_item(item);
+ return start;
+}
+
+struct stack_match {
+ void *caller;
+ unsigned long size;
+};
+
+static int match_stack(struct trace_hash_item *item, void *data)
+{
+ struct stack_data *stack = stack_from_item(item);
+ struct stack_match *match = data;
+
+ if (match->size != stack->size)
+ return 0;
+
+ return memcmp(stack->caller, match->caller, stack->size) == 0;
+}
+
+
+static void add_event_stack(struct event_hash *event_hash,
+ void *caller, unsigned long size,
+ unsigned long long time, unsigned long long ts)
+{
+ unsigned long long key;
+ struct stack_data *stack;
+ struct stack_match match;
+ struct trace_hash_item *item;
+ int i;
+
+ match.caller = caller;
+ match.size = size;
+
+ if (size < sizeof(int))
+ die("Stack size of less than sizeof(int)??");
+
+ for (key = 0, i = 0; i <= size - sizeof(int); i += sizeof(int))
+ key += trace_hash(*(int *)(caller + i));
+
+ item = trace_hash_find(&event_hash->stacks, key, match_stack, &match);
+ if (!item) {
+ stack = malloc(sizeof(*stack) + size);
+ if (!stack) {
+ warning("Could not allocate stack");
+ return;
+ }
+ memset(stack, 0, sizeof(*stack));
+ memcpy(&stack->caller, caller, size);
+ stack->size = size;
+ stack->hash.key = key;
+ trace_hash_add(&event_hash->stacks, &stack->hash);
+ } else
+ stack = stack_from_item(item);
+
+ stack->count++;
+ stack->time += time;
+ if (stack->count == 1 || time < stack->time_min) {
+ stack->time_min = time;
+ stack->ts_min = ts;
+ }
+ if (time > stack->time_max) {
+ stack->time_max = time;
+ stack->ts_max = ts;
+ }
+}
+
+static void free_start(struct start_data *start)
+{
+ if (start->task->last_start == start)
+ start->task->last_start = NULL;
+ if (start->stack.record)
+ tracecmd_free_record(start->stack.record);
+ trace_hash_del(&start->hash);
+ list_del(&start->list);
+ free(start);
+}
+
+static struct event_hash *
+add_and_free_start(struct task_data *task, struct start_data *start,
+ struct event_data *event_data, unsigned long long ts)
+{
+ struct event_hash *event_hash;
+ long long delta;
+
+ delta = ts - start->timestamp;
+
+ /*
+ * It's possible on a live trace, because of timestamps being
+ * different on different CPUs, we can go back in time. When
+ * that happens, just zero out the delta.
+ */
+ if (delta < 0)
+ delta = 0;
+
+ event_hash = find_start_event_hash(task, event_data, start);
+ if (!event_hash)
+ return NULL;
+ event_hash->count++;
+ event_hash->time_total += delta;
+ event_hash->last_time = delta;
+
+ if (delta > event_hash->time_max) {
+ event_hash->time_max = delta;
+ event_hash->ts_max = ts;
+ }
+
+ if (event_hash->count == 1 || delta < event_hash->time_min) {
+ event_hash->time_min = delta;
+ event_hash->ts_min = ts;
+ }
+
+ if (start->stack.record) {
+ unsigned long size;
+ void *caller;
+
+ size = start->stack.size;
+ caller = start->stack.caller;
+
+ add_event_stack(event_hash, caller, size, delta,
+ start->stack.record->ts);
+ tracecmd_free_record(start->stack.record);
+ start->stack.record = NULL;
+ }
+
+ free_start(start);
+
+ return event_hash;
+}
+
+static struct event_hash *
+find_and_update_start(struct task_data *task, struct event_data *event_data,
+ unsigned long long ts, unsigned long long search_val)
+{
+ struct start_data *start;
+
+ start = find_start(task, event_data, search_val);
+ if (!start)
+ return NULL;
+ return add_and_free_start(task, start, event_data, ts);
+}
+
+static int match_task(struct trace_hash_item *item, void *data)
+{
+ struct task_data *task = task_from_item(item);
+ int pid = *(unsigned long *)data;
+
+ return task->pid == pid;
+}
+
+static void init_task(struct handle_data *h, struct task_data *task)
+{
+ task->handle = h;
+
+ trace_hash_init(&task->start_hash, 16);
+ trace_hash_init(&task->event_hash, 32);
+}
+
+static struct task_data *
+add_task(struct handle_data *h, int pid)
+{
+ unsigned long long key = trace_hash(pid);
+ struct task_data *task;
+
+ task = malloc(sizeof(*task));
+ if (!task) {
+ warning("Could not allocate task");
+ return NULL;
+ }
+ memset(task, 0, sizeof(*task));
+
+ task->pid = pid;
+ task->hash.key = key;
+ trace_hash_add(&h->task_hash, &task->hash);
+
+ init_task(h, task);
+
+ return task;
+}
+
+static struct task_data *
+find_task(struct handle_data *h, int pid)
+{
+ unsigned long long key = trace_hash(pid);
+ struct trace_hash_item *item;
+ static struct task_data *last_task;
+ void *data = (unsigned long *)&pid;
+
+ if (last_task && last_task->pid == pid)
+ return last_task;
+
+ item = trace_hash_find(&h->task_hash, key, match_task, data);
+
+ if (item)
+ last_task = task_from_item(item);
+ else
+ last_task = add_task(h, pid);
+
+ return last_task;
+}
+
+static int match_group(struct trace_hash_item *item, void *data)
+{
+ struct group_data *group = group_from_item(item);
+
+ return strcmp(group->comm, (char *)data) == 0;
+}
+
+
+static void
+add_task_comm(struct task_data *task, struct tep_format_field *field,
+ struct tep_record *record)
+{
+ const char *comm;
+
+ task->comm = malloc(field->size + 1);
+ if (!task->comm) {
+ warning("Could not allocate task comm");
+ return;
+ }
+ comm = record->data + field->offset;
+ memcpy(task->comm, comm, field->size);
+ task->comm[field->size] = 0;
+}
+
+/* Account for tasks that don't have starts */
+static void account_task(struct task_data *task, struct event_data *event_data,
+ struct tep_record *record)
+{
+ struct event_data_match edata;
+ struct event_hash *event_hash;
+ struct task_data *proxy = NULL;
+ unsigned long long search_val = 0;
+ unsigned long long val = 0;
+ unsigned long long pid;
+
+ /*
+ * If an event has the pid_field set, then find that task for
+ * this event instead. Let this task proxy for it to handle
+ * stack traces on this event.
+ */
+ if (event_data->pid_field) {
+ tep_read_number_field(event_data->pid_field,
+ record->data, &pid);
+ proxy = task;
+ task = find_task(task->handle, pid);
+ if (!task)
+ return;
+ proxy->proxy = task;
+ }
+
+ /*
+ * If data_field is defined, use that for val,
+ * if the start_field is defined, use that for search_val.
+ */
+ if (event_data->data_field) {
+ tep_read_number_field(event_data->data_field,
+ record->data, &val);
+ }
+ if (event_data->start_match_field) {
+ tep_read_number_field(event_data->start_match_field,
+ record->data, &search_val);
+ }
+
+ edata.event_data = event_data;
+ edata.search_val = val;
+ edata.val = val;
+
+ event_hash = find_event_hash(task, &edata);
+ if (!event_hash) {
+ warning("failed to allocate event_hash");
+ return;
+ }
+
+ event_hash->count++;
+ task->last_event = event_hash;
+}
+
+static struct task_data *
+find_event_task(struct handle_data *h, struct event_data *event_data,
+ struct tep_record *record, unsigned long long pid)
+{
+ if (event_data->global) {
+ if (event_data->migrate)
+ return h->global_task;
+ else
+ return &h->global_percpu_tasks[record->cpu];
+ }
+
+ /* If pid_field is defined, use that to find the task */
+ if (event_data->pid_field)
+ tep_read_number_field(event_data->pid_field,
+ record->data, &pid);
+ return find_task(h, pid);
+}
+
+static struct task_data *
+handle_end_event(struct handle_data *h, struct event_data *event_data,
+ struct tep_record *record, int pid)
+{
+ struct event_hash *event_hash;
+ struct task_data *task;
+ unsigned long long val;
+
+ task = find_event_task(h, event_data, record, pid);
+ if (!task)
+ return NULL;
+
+ tep_read_number_field(event_data->start_match_field, record->data,
+ &val);
+ event_hash = find_and_update_start(task, event_data->start, record->ts, val);
+ task->last_start = NULL;
+ task->last_event = event_hash;
+
+ return task;
+}
+
+static struct task_data *
+handle_start_event(struct handle_data *h, struct event_data *event_data,
+ struct tep_record *record, unsigned long long pid)
+{
+ struct start_data *start;
+ struct task_data *task;
+ unsigned long long val;
+
+ task = find_event_task(h, event_data, record, pid);
+ if (!task)
+ return NULL;
+
+ tep_read_number_field(event_data->end_match_field, record->data,
+ &val);
+ start = add_start(task, event_data, record, val, val);
+ if (!start) {
+ warning("Failed to allocate start of task");
+ return NULL;
+ }
+
+ task->last_start = start;
+ task->last_event = NULL;
+
+ return task;
+}
+
+static int handle_event_data(struct handle_data *h,
+ unsigned long long pid,
+ struct event_data *event_data,
+ struct tep_record *record, int cpu)
+{
+ struct task_data *task = NULL;
+
+ /* If this is the end of a event pair (start is set) */
+ if (event_data->start)
+ task = handle_end_event(h, event_data, record, pid);
+
+ /* If this is the start of a event pair (end is set) */
+ if (event_data->end) {
+ task = handle_start_event(h, event_data, record, pid);
+ /* handle_start_event only returns NULL on error */
+ if (!task)
+ return -1;
+ }
+
+ if (!task) {
+ task = find_task(h, pid);
+ if (!task)
+ return -1;
+ task->proxy = NULL;
+ task->last_start = NULL;
+ task->last_event = NULL;
+ account_task(task, event_data, record);
+ }
+
+ return 0;
+}
+
+static void handle_missed_events(struct handle_data *h, int cpu)
+{
+ struct start_data *start;
+ struct start_data *n;
+
+ /* Clear all starts on this CPU */
+ list_for_each_entry_safe(start, n, &h->cpu_starts[cpu], list) {
+ free_start(start);
+ }
+
+ /* Now clear all starts whose events can migrate */
+ list_for_each_entry_safe(start, n, &h->migrate_starts, list) {
+ free_start(start);
+ }
+}
+
+static int match_event_data(struct trace_hash_item *item, void *data)
+{
+ struct event_data *event_data = event_data_from_item(item);
+ int id = (int)(unsigned long)data;
+
+ return event_data->id == id;
+}
+
+static struct event_data *
+find_event_data(struct handle_data *h, int id)
+{
+ struct trace_hash_item *item;
+ unsigned long long key = trace_hash(id);
+ void *data = (void *)(unsigned long)id;
+
+ item = trace_hash_find(&h->events, key, match_event_data, data);
+ if (item)
+ return event_data_from_item(item);
+ return NULL;
+}
+
+static void trace_profile_record(struct tracecmd_input *handle,
+ struct tep_record *record)
+{
+ static struct handle_data *last_handle;
+ struct tep_record *stack_record;
+ struct event_data *event_data;
+ struct task_data *task;
+ struct handle_data *h;
+ struct tep_handle *pevent;
+ unsigned long long pid;
+ int cpu = record->cpu;
+ int id;
+
+ if (last_handle && last_handle->handle == handle)
+ h = last_handle;
+ else {
+ for (h = handles; h; h = h->next) {
+ if (h->handle == handle)
+ break;
+ }
+ if (!h)
+ die("Handle not found?");
+ last_handle = h;
+ }
+
+ if (record->missed_events)
+ handle_missed_events(h, cpu);
+
+ pevent = h->pevent;
+
+ id = tep_data_type(pevent, record);
+
+ event_data = find_event_data(h, id);
+
+ if (!event_data)
+ return;
+
+
+ /* Get this current PID */
+ tep_read_number_field(h->common_pid, record->data, &pid);
+
+ task = find_task(h, pid);
+ if (!task)
+ return;
+ stack_record = task->last_stack;
+
+ if (event_data->handle_event)
+ event_data->handle_event(h, pid, event_data, record, cpu);
+ else
+ handle_event_data(h, pid, event_data, record, cpu);
+
+ /* If the last stack hasn't changed, free it */
+ if (stack_record && task->last_stack == stack_record) {
+ tracecmd_free_record(stack_record);
+ task->last_stack = NULL;
+ }
+}
+
+static struct event_data *
+add_event(struct handle_data *h, const char *system, const char *event_name,
+ enum event_data_type type)
+{
+ struct event_data *event_data;
+ struct tep_event *event;
+
+ event = tep_find_event_by_name(h->pevent, system, event_name);
+ if (!event)
+ return NULL;
+
+ if (!h->common_pid) {
+ h->common_pid = tep_find_common_field(event, "common_pid");
+ if (!h->common_pid)
+ die("No 'common_pid' found in event");
+ }
+
+ event_data = malloc(sizeof(*event_data));
+ if (!event_data) {
+ warning("Could not allocate event_data");
+ return NULL;
+ }
+ memset(event_data, 0, sizeof(*event_data));
+ event_data->id = event->id;
+ event_data->event = event;
+ event_data->type = type;
+ event_data->hash.key = trace_hash(event_data->event->id);
+
+ trace_hash_add(&h->events, &event_data->hash);
+
+ return event_data;
+}
+
+static void
+mate_events(struct handle_data *h, struct event_data *start,
+ const char *pid_field, const char *end_match_field,
+ struct event_data *end, const char *start_match_field,
+ int migrate, int global)
+{
+ start->end = end;
+ end->start = start;
+
+ if (pid_field) {
+ start->pid_field = tep_find_field(start->event, pid_field);
+ if (!start->pid_field)
+ die("Event: %s does not have field %s",
+ start->event->name, pid_field);
+ }
+
+ /* Field to match with end */
+ start->end_match_field = tep_find_field(start->event, end_match_field);
+ if (!start->end_match_field)
+ die("Event: %s does not have field %s",
+ start->event->name, end_match_field);
+
+ /* Field to match with start */
+ end->start_match_field = tep_find_field(end->event, start_match_field);
+ if (!end->start_match_field)
+ die("Event: %s does not have field %s",
+ end->event->name, start_match_field);
+
+ start->migrate = migrate;
+ start->global = global;
+ end->migrate = migrate;
+ end->global = global;
+}
+
+/**
+ * tracecmd_mate_events - match events to profile against
+ * @handle: The input handle where the events exist.
+ * @start_event: The event that starts the transaction
+ * @pid_field: Use this over common_pid (may be NULL to use common_pid)
+ * @end_match_field: The field that matches the end events @start_match_field
+ * @end_event: The event that ends the transaction
+ * @start_match_field: The end event field that matches start's @end_match_field
+ * @migrate: Can the transaction switch CPUs? 1 for yes, 0 for no
+ * @global: The events are global and not per task
+ */
+void tracecmd_mate_events(struct tracecmd_input *handle,
+ struct tep_event *start_event,
+ const char *pid_field, const char *end_match_field,
+ struct tep_event *end_event,
+ const char *start_match_field,
+ int migrate, int global)
+{
+ struct handle_data *h;
+ struct event_data *start;
+ struct event_data *end;
+
+ for (h = handles; h; h = h->next) {
+ if (h->handle == handle)
+ break;
+ }
+ if (!h)
+ die("Handle not found for trace profile");
+
+ start = add_event(h, start_event->system, start_event->name,
+ EVENT_TYPE_USER_MATE);
+
+ end = add_event(h, end_event->system, end_event->name,
+ EVENT_TYPE_USER_MATE);
+
+ if (!start || !end)
+ return;
+
+ mate_events(h, start, pid_field, end_match_field, end, start_match_field,
+ migrate, global);
+}
+
+static void func_print(struct trace_seq *s, struct event_hash *event_hash)
+{
+ const char *func;
+
+ func = tep_find_function(event_hash->event_data->event->tep,
+ event_hash->val);
+ if (func)
+ trace_seq_printf(s, "func: %s()", func);
+ else
+ trace_seq_printf(s, "func: 0x%llx", event_hash->val);
+}
+
+static void syscall_print(struct trace_seq *s, struct event_hash *event_hash)
+{
+#ifndef NO_AUDIT
+ const char *name = NULL;
+ int machine;
+
+ machine = audit_detect_machine();
+ if (machine < 0)
+ goto fail;
+ name = audit_syscall_to_name(event_hash->val, machine);
+ if (!name)
+ goto fail;
+ trace_seq_printf(s, "syscall:%s", name);
+ return;
+fail:
+#endif
+ trace_seq_printf(s, "%s:%d", event_hash->event_data->event->name,
+ (int)event_hash->val);
+}
+
+/* From Linux include/linux/interrupt.h */
+#define SOFTIRQS \
+ C(HI), \
+ C(TIMER), \
+ C(NET_TX), \
+ C(NET_RX), \
+ C(BLOCK), \
+ C(BLOCK_IOPOLL), \
+ C(TASKLET), \
+ C(SCHED), \
+ C(HRTIMER), \
+ C(RCU), \
+ C(NR),
+
+#undef C
+#define C(a) a##_SOFTIRQ
+
+enum { SOFTIRQS };
+
+#undef C
+#define C(a) #a
+
+static const char *softirq_map[] = { SOFTIRQS };
+
+static void softirq_print(struct trace_seq *s, struct event_hash *event_hash)
+{
+ int softirq = (int)event_hash->val;
+
+ if (softirq < NR_SOFTIRQ)
+ trace_seq_printf(s, "%s:%s", event_hash->event_data->event->name,
+ softirq_map[softirq]);
+ else
+ trace_seq_printf(s, "%s:%d", event_hash->event_data->event->name,
+ softirq);
+}
+
+static void sched_switch_print(struct trace_seq *s, struct event_hash *event_hash)
+{
+ const char states[] = TASK_STATE_TO_CHAR_STR;
+ int i;
+
+ trace_seq_printf(s, "%s:", event_hash->event_data->event->name);
+
+ if (event_hash->val) {
+ int val = event_hash->val;
+
+ for (i = 0; val && i < sizeof(states) - 1; i++, val >>= 1) {
+ if (val & 1)
+ trace_seq_putc(s, states[i+1]);
+ }
+ } else
+ trace_seq_putc(s, 'R');
+}
+
+static int handle_sched_switch_event(struct handle_data *h,
+ unsigned long long pid,
+ struct event_data *event_data,
+ struct tep_record *record, int cpu)
+{
+ struct task_data *task;
+ unsigned long long prev_pid;
+ unsigned long long prev_state;
+ unsigned long long next_pid;
+ struct start_data *start;
+
+ /* pid_field holds prev_pid, data_field holds prev_state */
+ tep_read_number_field(event_data->pid_field,
+ record->data, &prev_pid);
+
+ tep_read_number_field(event_data->data_field,
+ record->data, &prev_state);
+
+ /* only care about real states */
+ prev_state &= TASK_STATE_MAX - 1;
+
+ /* end_match_field holds next_pid */
+ tep_read_number_field(event_data->end_match_field,
+ record->data, &next_pid);
+
+ task = find_task(h, prev_pid);
+ if (!task)
+ return -1;
+ if (!task->comm)
+ add_task_comm(task, h->switch_prev_comm, record);
+
+ if (prev_state)
+ task->sleeping = 1;
+ else
+ task->sleeping = 0;
+
+ /* task is being scheduled out. prev_state tells why */
+ start = add_start(task, event_data, record, prev_pid, prev_state);
+ task->last_start = start;
+ task->last_event = NULL;
+
+ task = find_task(h, next_pid);
+ if (!task)
+ return -1;
+
+ if (!task->comm)
+ add_task_comm(task, h->switch_next_comm, record);
+
+ /*
+ * If the next task was blocked, it required a wakeup to
+ * restart, and there should be one.
+ * But if it was preempted, we look for the previous sched switch.
+ * Unfortunately, we have to look for both types of events as
+ * we do not know why next_pid scheduled out.
+ *
+ * event_data->start holds the sched_wakeup event data.
+ */
+ find_and_update_start(task, event_data->start, record->ts, next_pid);
+
+ /* Look for this task if it was preempted (no wakeup found). */
+ find_and_update_start(task, event_data, record->ts, next_pid);
+
+ return 0;
+}
+
+static int handle_stacktrace_event(struct handle_data *h,
+ unsigned long long pid,
+ struct event_data *event_data,
+ struct tep_record *record, int cpu)
+{
+ struct task_data *orig_task;
+ struct task_data *proxy;
+ struct task_data *task;
+ unsigned long long size;
+ struct event_hash *event_hash;
+ struct start_data *start;
+ void *caller;
+
+ task = find_task(h, pid);
+ if (!task)
+ return -1;
+
+ if (task->last_stack) {
+ tracecmd_free_record(task->last_stack);
+ task->last_stack = NULL;
+ }
+
+ if ((proxy = task->proxy)) {
+ task->proxy = NULL;
+ orig_task = task;
+ task = proxy;
+ }
+
+ if (!task->last_start && !task->last_event) {
+ /*
+ * Save this stack in case function graph needs it.
+ * Need the original task, not a proxy.
+ */
+ if (proxy)
+ task = orig_task;
+ tracecmd_record_ref(record);
+ task->last_stack = record;
+ return 0;
+ }
+
+ /*
+ * start_match_field holds the size.
+ * data_field holds the caller location.
+ */
+ size = record->size - event_data->data_field->offset;
+ caller = record->data + event_data->data_field->offset;
+
+ /*
+ * If there's a "start" then don't add the stack until
+ * it finds a matching "end".
+ */
+ if ((start = task->last_start)) {
+ tracecmd_record_ref(record);
+ start->stack.record = record;
+ start->stack.size = size;
+ start->stack.caller = caller;
+ task->last_start = NULL;
+ task->last_event = NULL;
+ return 0;
+ }
+
+ event_hash = task->last_event;
+ task->last_event = NULL;
+
+ add_event_stack(event_hash, caller, size, event_hash->last_time,
+ record->ts);
+
+ return 0;
+}
+
+static int handle_fgraph_entry_event(struct handle_data *h,
+ unsigned long long pid,
+ struct event_data *event_data,
+ struct tep_record *record, int cpu)
+{
+ unsigned long long size;
+ struct start_data *start;
+ struct task_data *task;
+ void *caller;
+
+ task = handle_start_event(h, event_data, record, pid);
+ if (!task)
+ return -1;
+
+ /*
+ * If a stack trace hasn't been used for a previous task,
+ * then it could be a function trace that we can use for
+ * the function graph. But stack traces come before the function
+ * graph events (unfortunately). So we need to attach the previous
+ * stack trace (if there is one) to this start event.
+ */
+ if (task->last_stack) {
+ start = task->last_start;
+ record = task->last_stack;
+ size = record->size - stacktrace_event->data_field->offset;
+ caller = record->data + stacktrace_event->data_field->offset;
+ start->stack.record = record;
+ start->stack.size = size;
+ start->stack.caller = caller;
+ task->last_stack = NULL;
+ task->last_event = NULL;
+ }
+
+ /* Do not map stacks after this event to this event */
+ task->last_start = NULL;
+
+ return 0;
+}
+
+static int handle_fgraph_exit_event(struct handle_data *h,
+ unsigned long long pid,
+ struct event_data *event_data,
+ struct tep_record *record, int cpu)
+{
+ struct task_data *task;
+
+ task = handle_end_event(h, event_data, record, pid);
+ if (!task)
+ return -1;
+ /* Do not match stacks with function graph exit events */
+ task->last_event = NULL;
+
+ return 0;
+}
+
+static int handle_process_exec(struct handle_data *h,
+ unsigned long long pid,
+ struct event_data *event_data,
+ struct tep_record *record, int cpu)
+{
+ struct task_data *task;
+ unsigned long long val;
+
+ /* Task has execed, remove the comm for it */
+ if (event_data->data_field) {
+ tep_read_number_field(event_data->data_field,
+ record->data, &val);
+ pid = val;
+ }
+
+ task = find_task(h, pid);
+ if (!task)
+ return -1;
+
+ free(task->comm);
+ task->comm = NULL;
+
+ return 0;
+}
+
+static int handle_sched_wakeup_event(struct handle_data *h,
+ unsigned long long pid,
+ struct event_data *event_data,
+ struct tep_record *record, int cpu)
+{
+ struct task_data *proxy;
+ struct task_data *task = NULL;
+ struct start_data *start;
+ unsigned long long success;
+
+ proxy = find_task(h, pid);
+ if (!proxy)
+ return -1;
+
+ /* If present, data_field holds "success" */
+ if (event_data->data_field) {
+ tep_read_number_field(event_data->data_field,
+ record->data, &success);
+
+ /* If not a successful wakeup, ignore this */
+ if (!success)
+ return 0;
+ }
+
+ tep_read_number_field(event_data->pid_field,
+ record->data, &pid);
+
+ task = find_task(h, pid);
+ if (!task)
+ return -1;
+
+ if (!task->comm)
+ add_task_comm(task, h->wakeup_comm, record);
+
+ /* if the task isn't sleeping, then ignore the wake up */
+ if (!task->sleeping) {
+ /* Ignore any following stack traces */
+ proxy->proxy = NULL;
+ proxy->last_start = NULL;
+ proxy->last_event = NULL;
+ return 0;
+ }
+
+ /* It's being woken up */
+ task->sleeping = 0;
+
+ /*
+ * We need the stack trace to be hooked to the woken up
+ * task, not the waker.
+ */
+ proxy->proxy = task;
+
+ /* There should be a blocked schedule out of this task */
+ find_and_update_start(task, event_data->start, record->ts, pid);
+
+ /* Set this up for timing how long the wakeup takes */
+ start = add_start(task, event_data, record, pid, pid);
+ task->last_event = NULL;
+ task->last_start = start;
+
+ return 0;
+}
+
+void trace_init_profile(struct tracecmd_input *handle, struct hook_list *hook,
+ int global)
+{
+ struct tep_handle *pevent = tracecmd_get_tep(handle);
+ struct tep_format_field **fields;
+ struct handle_data *h;
+ struct event_data *event_data;
+ struct event_data *sched_switch;
+ struct event_data *sched_wakeup;
+ struct event_data *irq_entry;
+ struct event_data *irq_exit;
+ struct event_data *softirq_entry;
+ struct event_data *softirq_exit;
+ struct event_data *softirq_raise;
+ struct event_data *fgraph_entry;
+ struct event_data *fgraph_exit;
+ struct event_data *syscall_enter;
+ struct event_data *syscall_exit;
+ struct event_data *process_exec;
+ struct event_data *start_event;
+ struct event_data *end_event;
+ struct tep_event **events;
+ int ret;
+ int i;
+
+ tracecmd_set_show_data_func(handle, trace_profile_record);
+ h = malloc(sizeof(*h));
+ if (!h) {
+ warning("Could not allocate handle");
+ return;
+ };
+ memset(h, 0, sizeof(*h));
+ h->next = handles;
+ handles = h;
+
+ trace_hash_init(&h->task_hash, 1024);
+ trace_hash_init(&h->events, 1024);
+ trace_hash_init(&h->group_hash, 512);
+
+ h->handle = handle;
+ h->pevent = pevent;
+
+ h->cpus = tracecmd_cpus(handle);
+
+ /*
+ * For streaming profiling, cpus will not be set up yet.
+ * In this case, we simply use the number of cpus on the
+ * system.
+ */
+ if (!h->cpus)
+ h->cpus = tracecmd_count_cpus();
+
+ list_head_init(&h->migrate_starts);
+ h->cpu_starts = malloc(sizeof(*h->cpu_starts) * h->cpus);
+ if (!h->cpu_starts)
+ goto free_handle;
+
+ for (i = 0; i < h->cpus; i++)
+ list_head_init(&h->cpu_starts[i]);
+
+ h->cpu_data = malloc(h->cpus * sizeof(*h->cpu_data));
+ if (!h->cpu_data)
+ goto free_starts;
+
+ memset(h->cpu_data, 0, h->cpus * sizeof(h->cpu_data));
+
+ h->global_task = malloc(sizeof(struct task_data));
+ if (!h->global_task)
+ goto free_data;
+
+ memset(h->global_task, 0, sizeof(struct task_data));
+ init_task(h, h->global_task);
+ h->global_task->comm = strdup("Global Events");
+ if (!h->global_task->comm)
+ die("malloc");
+ h->global_task->pid = -1;
+
+ h->global_percpu_tasks = calloc(h->cpus, sizeof(struct task_data));
+ if (!h->global_percpu_tasks)
+ die("malloc");
+ for (i = 0; i < h->cpus; i++) {
+ init_task(h, &h->global_percpu_tasks[i]);
+ ret = asprintf(&h->global_percpu_tasks[i].comm,
+ "Global CPU[%d] Events", i);
+ if (ret < 0)
+ die("malloc");
+ h->global_percpu_tasks[i].pid = -1 - i;
+ }
+
+ irq_entry = add_event(h, "irq", "irq_handler_entry", EVENT_TYPE_IRQ);
+ irq_exit = add_event(h, "irq", "irq_handler_exit", EVENT_TYPE_IRQ);
+ softirq_entry = add_event(h, "irq", "softirq_entry", EVENT_TYPE_SOFTIRQ);
+ softirq_exit = add_event(h, "irq", "softirq_exit", EVENT_TYPE_SOFTIRQ);
+ softirq_raise = add_event(h, "irq", "softirq_raise", EVENT_TYPE_SOFTIRQ_RAISE);
+ sched_wakeup = add_event(h, "sched", "sched_wakeup", EVENT_TYPE_WAKEUP);
+ sched_switch = add_event(h, "sched", "sched_switch", EVENT_TYPE_SCHED_SWITCH);
+ fgraph_entry = add_event(h, "ftrace", "funcgraph_entry", EVENT_TYPE_FUNC);
+ fgraph_exit = add_event(h, "ftrace", "funcgraph_exit", EVENT_TYPE_FUNC);
+ syscall_enter = add_event(h, "raw_syscalls", "sys_enter", EVENT_TYPE_SYSCALL);
+ syscall_exit = add_event(h, "raw_syscalls", "sys_exit", EVENT_TYPE_SYSCALL);
+
+ process_exec = add_event(h, "sched", "sched_process_exec",
+ EVENT_TYPE_PROCESS_EXEC);
+
+ stacktrace_event = add_event(h, "ftrace", "kernel_stack", EVENT_TYPE_STACK);
+ if (stacktrace_event) {
+ stacktrace_event->handle_event = handle_stacktrace_event;
+
+ stacktrace_event->data_field = tep_find_field(stacktrace_event->event,
+ "caller");
+ if (!stacktrace_event->data_field)
+ die("Event: %s does not have field caller",
+ stacktrace_event->event->name);
+ }
+
+ if (process_exec) {
+ process_exec->handle_event = handle_process_exec;
+ process_exec->data_field = tep_find_field(process_exec->event,
+ "old_pid");
+ }
+
+ if (sched_switch) {
+ sched_switch->handle_event = handle_sched_switch_event;
+ sched_switch->data_field = tep_find_field(sched_switch->event,
+ "prev_state");
+ if (!sched_switch->data_field)
+ die("Event: %s does not have field prev_state",
+ sched_switch->event->name);
+
+ h->switch_prev_comm = tep_find_field(sched_switch->event,
+ "prev_comm");
+ if (!h->switch_prev_comm)
+ die("Event: %s does not have field prev_comm",
+ sched_switch->event->name);
+
+ h->switch_next_comm = tep_find_field(sched_switch->event,
+ "next_comm");
+ if (!h->switch_next_comm)
+ die("Event: %s does not have field next_comm",
+ sched_switch->event->name);
+
+ sched_switch->print_func = sched_switch_print;
+ }
+
+ if (sched_switch && sched_wakeup) {
+ mate_events(h, sched_switch, "prev_pid", "next_pid",
+ sched_wakeup, "pid", 1, 0);
+ mate_events(h, sched_wakeup, "pid", "pid",
+ sched_switch, "prev_pid", 1, 0);
+ sched_wakeup->handle_event = handle_sched_wakeup_event;
+
+ /* The 'success' field may or may not be present */
+ sched_wakeup->data_field = tep_find_field(sched_wakeup->event,
+ "success");
+
+ h->wakeup_comm = tep_find_field(sched_wakeup->event, "comm");
+ if (!h->wakeup_comm)
+ die("Event: %s does not have field comm",
+ sched_wakeup->event->name);
+ }
+
+ if (irq_entry && irq_exit)
+ mate_events(h, irq_entry, NULL, "irq", irq_exit, "irq", 0, global);
+
+ if (softirq_entry)
+ softirq_entry->print_func = softirq_print;
+
+ if (softirq_exit)
+ softirq_exit->print_func = softirq_print;
+
+ if (softirq_raise)
+ softirq_raise->print_func = softirq_print;
+
+ if (softirq_entry && softirq_exit)
+ mate_events(h, softirq_entry, NULL, "vec", softirq_exit, "vec",
+ 0, global);
+
+ if (softirq_entry && softirq_raise)
+ mate_events(h, softirq_raise, NULL, "vec", softirq_entry, "vec",
+ 0, global);
+
+ if (fgraph_entry && fgraph_exit) {
+ mate_events(h, fgraph_entry, NULL, "func", fgraph_exit, "func", 1, 0);
+ fgraph_entry->handle_event = handle_fgraph_entry_event;
+ fgraph_exit->handle_event = handle_fgraph_exit_event;
+ fgraph_entry->print_func = func_print;
+ }
+
+ if (syscall_enter && syscall_exit) {
+ mate_events(h, syscall_enter, NULL, "id", syscall_exit, "id", 1, 0);
+ syscall_enter->print_func = syscall_print;
+ syscall_exit->print_func = syscall_print;
+ }
+
+ events = tep_list_events(pevent, TEP_EVENT_SORT_ID);
+ if (!events)
+ die("malloc");
+
+ /* Add some other events */
+ event_data = add_event(h, "ftrace", "function", EVENT_TYPE_FUNC);
+ if (event_data) {
+ event_data->data_field =
+ tep_find_field(event_data->event, "ip");
+ }
+
+ /* Add any user defined hooks */
+ for (; hook; hook = hook->next) {
+ start_event = add_event(h, hook->start_system, hook->start_event,
+ EVENT_TYPE_USER_MATE);
+ end_event = add_event(h, hook->end_system, hook->end_event,
+ EVENT_TYPE_USER_MATE);
+ if (!start_event) {
+ warning("Event %s not found", hook->start_event);
+ continue;
+ }
+ if (!end_event) {
+ warning("Event %s not found", hook->end_event);
+ continue;
+ }
+ mate_events(h, start_event, hook->pid, hook->start_match,
+ end_event, hook->end_match, hook->migrate,
+ hook->global);
+ }
+
+ /* Now add any defined event that we haven't processed */
+ for (i = 0; events[i]; i++) {
+ event_data = find_event_data(h, events[i]->id);
+ if (event_data)
+ continue;
+
+ event_data = add_event(h, events[i]->system, events[i]->name,
+ EVENT_TYPE_UNDEFINED);
+
+ fields = tep_event_fields(events[i]);
+ if (!fields)
+ die("malloc");
+
+ if (fields[0])
+ event_data->data_field = fields[0];
+
+ free(fields);
+ }
+ return;
+
+ free_data:
+ free(h->cpu_data);
+ free_starts:
+ free(h->cpu_starts);
+ free_handle:
+ handles = h->next;
+ free(h);
+ warning("Failed handle allocations");
+}
+
+static void output_event_stack(struct tep_handle *pevent, struct stack_data *stack)
+{
+ int longsize = tep_get_long_size(pevent);
+ unsigned long long val;
+ const char *func;
+ unsigned long long stop = -1ULL;
+ void *ptr;
+ int i;
+
+ if (longsize < 8)
+ stop &= (1ULL << (longsize * 8)) - 1;
+
+ if (stack->count)
+ stack->time_avg = stack->time / stack->count;
+
+ printf(" <stack> %lld total:%lld min:%lld(ts:%lld.%06lld) max:%lld(ts:%lld.%06lld) avg=%lld\n",
+ stack->count, stack->time, stack->time_min,
+ nsecs_per_sec(stack->ts_min), mod_to_usec(stack->ts_min),
+ stack->time_max,
+ nsecs_per_sec(stack->ts_max), mod_to_usec(stack->ts_max),
+ stack->time_avg);
+
+ for (i = 0; i < stack->size; i += longsize) {
+ ptr = stack->caller + i;
+ switch (longsize) {
+ case 4:
+ /* todo, read value from pevent */
+ val = *(unsigned int *)ptr;
+ break;
+ case 8:
+ val = *(unsigned long long *)ptr;
+ break;
+ default:
+ die("Strange long size %d", longsize);
+ }
+ if (val == stop)
+ break;
+ func = tep_find_function(pevent, val);
+ if (func)
+ printf(" => %s (0x%llx)\n", func, val);
+ else
+ printf(" => 0x%llx\n", val);
+ }
+}
+
+struct stack_chain {
+ struct stack_chain *children;
+ unsigned long long val;
+ unsigned long long time;
+ unsigned long long time_min;
+ unsigned long long ts_min;
+ unsigned long long time_max;
+ unsigned long long ts_max;
+ unsigned long long time_avg;
+ unsigned long long count;
+ int percent;
+ int nr_children;
+};
+
+static int compare_chains(const void *a, const void *b)
+{
+ const struct stack_chain * A = a;
+ const struct stack_chain * B = b;
+
+ if (A->time > B->time)
+ return -1;
+ if (A->time < B->time)
+ return 1;
+ /* If stacks don't use time, then use count */
+ if (A->count > B->count)
+ return -1;
+ if (A->count < B->count)
+ return 1;
+ return 0;
+}
+
+static int calc_percent(unsigned long long val, unsigned long long total)
+{
+ return (val * 100 + total / 2) / total;
+}
+
+static int stack_overflows(struct stack_data *stack, int longsize, int level)
+{
+ return longsize * level > stack->size - longsize;
+}
+
+static unsigned long long
+stack_value(struct stack_data *stack, int longsize, int level)
+{
+ void *ptr;
+
+ ptr = &stack->caller[longsize * level];
+ return longsize == 8 ? *(u64 *)ptr : *(unsigned *)ptr;
+}
+
+static struct stack_chain *
+make_stack_chain(struct stack_data **stacks, int cnt, int longsize, int level,
+ int *nr_children)
+{
+ struct stack_chain *chain;
+ unsigned long long total_time = 0;
+ unsigned long long total_count = 0;
+ unsigned long long time;
+ unsigned long long time_min;
+ unsigned long long ts_min;
+ unsigned long long time_max;
+ unsigned long long ts_max;
+ unsigned long long count;
+ unsigned long long stop = -1ULL;
+ int nr_chains = 0;
+ u64 last = 0;
+ u64 val;
+ int start;
+ int i;
+ int x;
+
+ if (longsize < 8)
+ stop &= (1ULL << (longsize * 8)) - 1;
+
+ /* First find out how many diffs there are */
+ for (i = 0; i < cnt; i++) {
+ if (stack_overflows(stacks[i], longsize, level))
+ continue;
+
+ val = stack_value(stacks[i], longsize, level);
+
+ if (val == stop)
+ continue;
+
+ if (!nr_chains || val != last)
+ nr_chains++;
+ last = val;
+ }
+
+ if (!nr_chains) {
+ *nr_children = 0;
+ return NULL;
+ }
+
+ chain = malloc(sizeof(*chain) * nr_chains);
+ if (!chain) {
+ warning("Could not allocate chain");
+ return NULL;
+ }
+ memset(chain, 0, sizeof(*chain) * nr_chains);
+
+ x = 0;
+ count = 0;
+ start = 0;
+ time = 0;
+ time_min = 0;
+ time_max = 0;
+
+ for (i = 0; i < cnt; i++) {
+ if (stack_overflows(stacks[i], longsize, level)) {
+ start = i+1;
+ continue;
+ }
+
+ val = stack_value(stacks[i], longsize, level);
+
+ if (val == stop) {
+ start = i+1;
+ continue;
+ }
+
+ count += stacks[i]->count;
+ time += stacks[i]->time;
+ if (stacks[i]->time_max > time_max) {
+ time_max = stacks[i]->time_max;
+ ts_max = stacks[i]->ts_max;
+ }
+ if (i == start || stacks[i]->time_min < time_min) {
+ time_min = stacks[i]->time_min;
+ ts_min = stacks[i]->ts_min;
+ }
+ if (i == cnt - 1 ||
+ stack_overflows(stacks[i+1], longsize, level) ||
+ val != stack_value(stacks[i+1], longsize, level)) {
+
+ total_time += time;
+ total_count += count;
+ chain[x].val = val;
+ chain[x].time_avg = time / count;
+ chain[x].count = count;
+ chain[x].time = time;
+ chain[x].time_min = time_min;
+ chain[x].ts_min = ts_min;
+ chain[x].time_max = time_max;
+ chain[x].ts_max = ts_max;
+ chain[x].children =
+ make_stack_chain(&stacks[start], (i - start) + 1,
+ longsize, level+1,
+ &chain[x].nr_children);
+ x++;
+ start = i + 1;
+ count = 0;
+ time = 0;
+ time_min = 0;
+ time_max = 0;
+ }
+ }
+
+ qsort(chain, nr_chains, sizeof(*chain), compare_chains);
+
+ *nr_children = nr_chains;
+
+ /* Should never happen */
+ if (!total_time && !total_count)
+ return chain;
+
+
+ /* Now calculate percentage */
+ time = 0;
+ for (i = 0; i < nr_chains; i++) {
+ if (total_time)
+ chain[i].percent = calc_percent(chain[i].time, total_time);
+ /* In case stacks don't have time */
+ else if (total_count)
+ chain[i].percent = calc_percent(chain[i].count, total_count);
+ }
+
+ return chain;
+}
+
+static void free_chain(struct stack_chain *chain, int nr_chains)
+{
+ int i;
+
+ if (!chain)
+ return;
+
+ for (i = 0; i < nr_chains; i++)
+ free_chain(chain[i].children, chain[i].nr_children);
+
+ free(chain);
+}
+
+#define INDENT 5
+
+static void print_indent(int level, unsigned long long mask)
+{
+ char line;
+ int p;
+
+ for (p = 0; p < level + 1; p++) {
+ if (mask & (1ULL << p))
+ line = '|';
+ else
+ line = ' ';
+ printf("%*c ", INDENT, line);
+ }
+}
+
+static void print_chain_func(struct tep_handle *pevent, struct stack_chain *chain)
+{
+ unsigned long long val = chain->val;
+ const char *func;
+
+ func = tep_find_function(pevent, val);
+ if (func)
+ printf("%s (0x%llx)\n", func, val);
+ else
+ printf("0x%llx\n", val);
+}
+
+static void output_chain(struct tep_handle *pevent, struct stack_chain *chain, int level,
+ int nr_chains, unsigned long long *mask)
+{
+ struct stack_chain *child;
+ int nr_children;
+ int i;
+ char line = '|';
+
+ if (!nr_chains)
+ return;
+
+ *mask |= (1ULL << (level + 1));
+ print_indent(level + 1, *mask);
+ printf("\n");
+
+ for (i = 0; i < nr_chains; i++) {
+
+ print_indent(level, *mask);
+
+ printf("%*c ", INDENT, '+');
+
+ if (i == nr_chains - 1) {
+ *mask &= ~(1ULL << (level + 1));
+ line = ' ';
+ }
+
+ print_chain_func(pevent, &chain[i]);
+
+ print_indent(level, *mask);
+
+ printf("%*c ", INDENT, line);
+ printf(" %d%% (%lld)", chain[i].percent, chain[i].count);
+ if (chain[i].time)
+ printf(" time:%lld max:%lld(ts:%lld.%06lld) min:%lld(ts:%lld.%06lld) avg:%lld",
+ chain[i].time, chain[i].time_max,
+ nsecs_per_sec(chain[i].ts_max),
+ mod_to_usec(chain[i].ts_max),
+ chain[i].time_min,
+ nsecs_per_sec(chain[i].ts_min),
+ mod_to_usec(chain[i].ts_min),
+ chain[i].time_avg);
+ printf("\n");
+
+ for (child = chain[i].children, nr_children = chain[i].nr_children;
+ child && nr_children == 1;
+ nr_children = child->nr_children, child = child->children) {
+ print_indent(level, *mask);
+ printf("%*c ", INDENT, line);
+ printf(" ");
+ print_chain_func(pevent, child);
+ }
+
+ if (child)
+ output_chain(pevent, child, level+1, nr_children, mask);
+
+ print_indent(level + 1, *mask);
+ printf("\n");
+ }
+ *mask &= ~(1ULL << (level + 1));
+ print_indent(level, *mask);
+ printf("\n");
+}
+
+static int compare_stacks(const void *a, const void *b)
+{
+ struct stack_data * const *A = a;
+ struct stack_data * const *B = b;
+ unsigned int sa, sb;
+ int size;
+ int i;
+
+ /* only compare up to the smaller size of the two */
+ if ((*A)->size > (*B)->size)
+ size = (*B)->size;
+ else
+ size = (*A)->size;
+
+ for (i = 0; i < size; i += sizeof(sa)) {
+ sa = *(unsigned *)&(*A)->caller[i];
+ sb = *(unsigned *)&(*B)->caller[i];
+ if (sa > sb)
+ return 1;
+ if (sa < sb)
+ return -1;
+ }
+
+ /* They are the same up to size. Then bigger size wins */
+ if ((*A)->size > (*B)->size)
+ return 1;
+ if ((*A)->size < (*B)->size)
+ return -1;
+ return 0;
+}
+
+static void output_stacks(struct tep_handle *pevent, struct trace_hash *stack_hash)
+{
+ struct trace_hash_item **bucket;
+ struct trace_hash_item *item;
+ struct stack_data **stacks;
+ struct stack_chain *chain;
+ unsigned long long mask = 0;
+ int nr_chains;
+ int longsize = tep_get_long_size(pevent);
+ int nr_stacks;
+ int i;
+
+ nr_stacks = 0;
+ trace_hash_for_each_bucket(bucket, stack_hash) {
+ trace_hash_for_each_item(item, bucket) {
+ nr_stacks++;
+ }
+ }
+
+ stacks = malloc(sizeof(*stacks) * nr_stacks);
+ if (!stacks) {
+ warning("Could not allocate stacks");
+ return;
+ }
+
+ nr_stacks = 0;
+ trace_hash_for_each_bucket(bucket, stack_hash) {
+ trace_hash_for_each_item(item, bucket) {
+ stacks[nr_stacks++] = stack_from_item(item);
+ }
+ }
+
+ qsort(stacks, nr_stacks, sizeof(*stacks), compare_stacks);
+
+ chain = make_stack_chain(stacks, nr_stacks, longsize, 0, &nr_chains);
+
+ output_chain(pevent, chain, 0, nr_chains, &mask);
+
+ if (0)
+ for (i = 0; i < nr_stacks; i++)
+ output_event_stack(pevent, stacks[i]);
+
+ free(stacks);
+ free_chain(chain, nr_chains);
+}
+
+static void output_event(struct event_hash *event_hash)
+{
+ struct event_data *event_data = event_hash->event_data;
+ struct tep_handle *pevent = event_data->event->tep;
+ struct trace_seq s;
+
+ trace_seq_init(&s);
+
+ if (event_data->print_func)
+ event_data->print_func(&s, event_hash);
+ else if (event_data->type == EVENT_TYPE_FUNC)
+ func_print(&s, event_hash);
+ else
+ trace_seq_printf(&s, "%s:0x%llx",
+ event_data->event->name,
+ event_hash->val);
+ trace_seq_terminate(&s);
+
+ printf(" Event: %s (%lld)",
+ s.buffer, event_hash->count);
+
+ trace_seq_destroy(&s);
+
+ if (event_hash->time_total) {
+ event_hash->time_avg = event_hash->time_total / event_hash->count;
+ printf(" Total: %lld Avg: %lld Max: %lld(ts:%lld.%06lld) Min:%lld(ts:%lld.%06lld)",
+ event_hash->time_total, event_hash->time_avg,
+ event_hash->time_max,
+ nsecs_per_sec(event_hash->ts_max),
+ mod_to_usec(event_hash->ts_max),
+ event_hash->time_min,
+ nsecs_per_sec(event_hash->ts_min),
+ mod_to_usec(event_hash->ts_min));
+ }
+ printf("\n");
+
+ output_stacks(pevent, &event_hash->stacks);
+}
+
+static int compare_events(const void *a, const void *b)
+{
+ struct event_hash * const *A = a;
+ struct event_hash * const *B = b;
+ const struct event_data *event_data_a = (*A)->event_data;
+ const struct event_data *event_data_b = (*B)->event_data;
+
+ /* Schedule switch goes first */
+ if (event_data_a->type == EVENT_TYPE_SCHED_SWITCH) {
+ if (event_data_b->type != EVENT_TYPE_SCHED_SWITCH)
+ return -1;
+ /* lower the state the better */
+ if ((*A)->val > (*B)->val)
+ return 1;
+ if ((*A)->val < (*B)->val)
+ return -1;
+ return 0;
+ } else if (event_data_b->type == EVENT_TYPE_SCHED_SWITCH)
+ return 1;
+
+ /* Wakeups are next */
+ if (event_data_a->type == EVENT_TYPE_WAKEUP) {
+ if (event_data_b->type != EVENT_TYPE_WAKEUP)
+ return -1;
+ return 0;
+ } else if (event_data_b->type == EVENT_TYPE_WAKEUP)
+ return 1;
+
+ if (event_data_a->id > event_data_b->id)
+ return 1;
+ if (event_data_a->id < event_data_b->id)
+ return -1;
+ if ((*A)->time_total > (*B)->time_total)
+ return -1;
+ if ((*A)->time_total < (*B)->time_total)
+ return 1;
+ return 0;
+}
+
+static void output_task(struct handle_data *h, struct task_data *task)
+{
+ struct trace_hash_item **bucket;
+ struct trace_hash_item *item;
+ struct event_hash **events;
+ const char *comm;
+ int nr_events = 0;
+ int i;
+
+ if (task->group)
+ return;
+
+ if (task->comm)
+ comm = task->comm;
+ else
+ comm = tep_data_comm_from_pid(h->pevent, task->pid);
+
+ if (task->pid < 0)
+ printf("%s\n", task->comm);
+ else
+ printf("\ntask: %s-%d\n", comm, task->pid);
+
+ trace_hash_for_each_bucket(bucket, &task->event_hash) {
+ trace_hash_for_each_item(item, bucket) {
+ nr_events++;
+ }
+ }
+
+ events = malloc(sizeof(*events) * nr_events);
+ if (!events) {
+ warning("Could not allocate events");
+ return;
+ }
+
+ i = 0;
+ trace_hash_for_each_bucket(bucket, &task->event_hash) {
+ trace_hash_for_each_item(item, bucket) {
+ events[i++] = event_from_item(item);
+ }
+ }
+
+ qsort(events, nr_events, sizeof(*events), compare_events);
+
+ for (i = 0; i < nr_events; i++)
+ output_event(events[i]);
+
+ free(events);
+}
+
+static void output_group(struct handle_data *h, struct group_data *group)
+{
+ struct trace_hash_item **bucket;
+ struct trace_hash_item *item;
+ struct event_hash **events;
+ int nr_events = 0;
+ int i;
+
+ printf("\ngroup: %s\n", group->comm);
+
+ trace_hash_for_each_bucket(bucket, &group->event_hash) {
+ trace_hash_for_each_item(item, bucket) {
+ nr_events++;
+ }
+ }
+
+ events = malloc(sizeof(*events) * nr_events);
+ if (!events) {
+ warning("Could not allocate events");
+ return;
+ }
+
+ i = 0;
+ trace_hash_for_each_bucket(bucket, &group->event_hash) {
+ trace_hash_for_each_item(item, bucket) {
+ events[i++] = event_from_item(item);
+ }
+ }
+
+ qsort(events, nr_events, sizeof(*events), compare_events);
+
+ for (i = 0; i < nr_events; i++)
+ output_event(events[i]);
+
+ free(events);
+}
+
+static int compare_tasks(const void *a, const void *b)
+{
+ struct task_data * const *A = a;
+ struct task_data * const *B = b;
+
+ if ((*A)->pid > (*B)->pid)
+ return 1;
+ else if ((*A)->pid < (*B)->pid)
+ return -1;
+ return 0;
+}
+
+static int compare_groups(const void *a, const void *b)
+{
+ const char *A = a;
+ const char *B = b;
+
+ return strcmp(A, B);
+}
+
+static void free_event_hash(struct event_hash *event_hash)
+{
+ struct trace_hash_item **bucket;
+ struct trace_hash_item *item;
+ struct stack_data *stack;
+
+ trace_hash_for_each_bucket(bucket, &event_hash->stacks) {
+ trace_hash_while_item(item, bucket) {
+ stack = stack_from_item(item);
+ trace_hash_del(&stack->hash);
+ free(stack);
+ }
+ }
+ trace_hash_free(&event_hash->stacks);
+ free(event_hash);
+}
+
+static void __free_task(struct task_data *task)
+{
+ struct trace_hash_item **bucket;
+ struct trace_hash_item *item;
+ struct start_data *start;
+ struct event_hash *event_hash;
+
+ free(task->comm);
+
+ trace_hash_for_each_bucket(bucket, &task->start_hash) {
+ trace_hash_while_item(item, bucket) {
+ start = start_from_item(item);
+ if (start->stack.record)
+ tracecmd_free_record(start->stack.record);
+ list_del(&start->list);
+ trace_hash_del(item);
+ free(start);
+ }
+ }
+ trace_hash_free(&task->start_hash);
+
+ trace_hash_for_each_bucket(bucket, &task->event_hash) {
+ trace_hash_while_item(item, bucket) {
+ event_hash = event_from_item(item);
+ trace_hash_del(item);
+ free_event_hash(event_hash);
+ }
+ }
+ trace_hash_free(&task->event_hash);
+
+ if (task->last_stack)
+ tracecmd_free_record(task->last_stack);
+}
+
+static void free_task(struct task_data *task)
+{
+ __free_task(task);
+ free(task);
+}
+
+static void free_group(struct group_data *group)
+{
+ struct trace_hash_item **bucket;
+ struct trace_hash_item *item;
+ struct event_hash *event_hash;
+
+ free(group->comm);
+
+ trace_hash_for_each_bucket(bucket, &group->event_hash) {
+ trace_hash_while_item(item, bucket) {
+ event_hash = event_from_item(item);
+ trace_hash_del(item);
+ free_event_hash(event_hash);
+ }
+ }
+ trace_hash_free(&group->event_hash);
+ free(group);
+}
+
+static void show_global_task(struct handle_data *h,
+ struct task_data *task)
+{
+ if (trace_hash_empty(&task->event_hash))
+ return;
+
+ output_task(h, task);
+}
+
+static void output_tasks(struct handle_data *h)
+{
+ struct trace_hash_item **bucket;
+ struct trace_hash_item *item;
+ struct task_data **tasks;
+ int nr_tasks = 0;
+ int i;
+
+ trace_hash_for_each_bucket(bucket, &h->task_hash) {
+ trace_hash_for_each_item(item, bucket) {
+ nr_tasks++;
+ }
+ }
+
+ tasks = malloc(sizeof(*tasks) * nr_tasks);
+ if (!tasks) {
+ warning("Could not allocate tasks");
+ return;
+ }
+
+ nr_tasks = 0;
+
+ trace_hash_for_each_bucket(bucket, &h->task_hash) {
+ trace_hash_while_item(item, bucket) {
+ tasks[nr_tasks++] = task_from_item(item);
+ trace_hash_del(item);
+ }
+ }
+
+ qsort(tasks, nr_tasks, sizeof(*tasks), compare_tasks);
+
+ for (i = 0; i < nr_tasks; i++) {
+ output_task(h, tasks[i]);
+ free_task(tasks[i]);
+ }
+
+ free(tasks);
+}
+
+static void output_groups(struct handle_data *h)
+{
+ struct trace_hash_item **bucket;
+ struct trace_hash_item *item;
+ struct group_data **groups;
+ int nr_groups = 0;
+ int i;
+
+ trace_hash_for_each_bucket(bucket, &h->group_hash) {
+ trace_hash_for_each_item(item, bucket) {
+ nr_groups++;
+ }
+ }
+
+ if (nr_groups == 0)
+ return;
+
+ groups = malloc(sizeof(*groups) * nr_groups);
+ if (!groups) {
+ warning("Could not allocate groups");
+ return;
+ }
+
+ nr_groups = 0;
+
+ trace_hash_for_each_bucket(bucket, &h->group_hash) {
+ trace_hash_while_item(item, bucket) {
+ groups[nr_groups++] = group_from_item(item);
+ trace_hash_del(item);
+ }
+ }
+
+ qsort(groups, nr_groups, sizeof(*groups), compare_groups);
+
+ for (i = 0; i < nr_groups; i++) {
+ output_group(h, groups[i]);
+ free_group(groups[i]);
+ }
+
+ free(groups);
+}
+
+static void output_handle(struct handle_data *h)
+{
+ int i;
+
+ show_global_task(h, h->global_task);
+ for (i = 0; i < h->cpus; i++)
+ show_global_task(h, &h->global_percpu_tasks[i]);
+
+ output_groups(h);
+ output_tasks(h);
+}
+
+static void merge_event_stack(struct event_hash *event,
+ struct stack_data *stack)
+{
+ struct stack_data *exist;
+ struct trace_hash_item *item;
+ struct stack_match match;
+
+ match.caller = stack->caller;
+ match.size = stack->size;
+ item = trace_hash_find(&event->stacks, stack->hash.key, match_stack,
+ &match);
+ if (!item) {
+ trace_hash_add(&event->stacks, &stack->hash);
+ return;
+ }
+ exist = stack_from_item(item);
+ exist->count += stack->count;
+ exist->time += stack->time;
+
+ if (exist->time_max < stack->time_max) {
+ exist->time_max = stack->time_max;
+ exist->ts_max = stack->ts_max;
+ }
+ if (exist->time_min > stack->time_min) {
+ exist->time_min = stack->time_min;
+ exist->ts_min = stack->ts_min;
+ }
+ free(stack);
+}
+
+static void merge_stacks(struct event_hash *exist, struct event_hash *event)
+{
+ struct stack_data *stack;
+ struct trace_hash_item *item;
+ struct trace_hash_item **bucket;
+
+ trace_hash_for_each_bucket(bucket, &event->stacks) {
+ trace_hash_while_item(item, bucket) {
+ stack = stack_from_item(item);
+ trace_hash_del(&stack->hash);
+ merge_event_stack(exist, stack);
+ }
+ }
+}
+
+static void merge_event_into_group(struct group_data *group,
+ struct event_hash *event)
+{
+ struct event_hash *exist;
+ struct trace_hash_item *item;
+ struct event_data_match edata;
+ unsigned long long key;
+
+ if (event->event_data->type == EVENT_TYPE_WAKEUP) {
+ edata.event_data = event->event_data;
+ event->search_val = 0;
+ event->val = 0;
+ key = trace_hash((unsigned long)event->event_data);
+ } else if (event->event_data->type == EVENT_TYPE_SCHED_SWITCH) {
+ edata.event_data = event->event_data;
+ event->search_val = event->val;
+ key = (unsigned long)event->event_data +
+ ((unsigned long)event->val * 2);
+ key = trace_hash(key);
+ } else {
+ key = event->hash.key;
+ }
+
+ edata.event_data = event->event_data;
+ edata.search_val = event->search_val;
+ edata.val = event->val;
+
+ item = trace_hash_find(&group->event_hash, key, match_event, &edata);
+ if (!item) {
+ event->hash.key = key;
+ trace_hash_add(&group->event_hash, &event->hash);
+ return;
+ }
+
+ exist = event_from_item(item);
+ exist->count += event->count;
+ exist->time_total += event->time_total;
+
+ if (exist->time_max < event->time_max) {
+ exist->time_max = event->time_max;
+ exist->ts_max = event->ts_max;
+ }
+ if (exist->time_min > event->time_min) {
+ exist->time_min = event->time_min;
+ exist->ts_min = event->ts_min;
+ }
+
+ merge_stacks(exist, event);
+ free_event_hash(event);
+}
+
+static void add_group(struct handle_data *h, struct task_data *task)
+{
+ unsigned long long key;
+ struct trace_hash_item *item;
+ struct group_data *grp;
+ struct trace_hash_item **bucket;
+ void *data = task->comm;
+
+ if (!task->comm)
+ return;
+
+ key = trace_hash_str(task->comm);
+
+ item = trace_hash_find(&h->group_hash, key, match_group, data);
+ if (item) {
+ grp = group_from_item(item);
+ } else {
+ grp = malloc(sizeof(*grp));
+ if (!grp) {
+ warning("Could not allocate group");
+ return;
+ }
+ memset(grp, 0, sizeof(*grp));
+
+ grp->comm = strdup(task->comm);
+ if (!grp->comm)
+ die("strdup");
+ grp->hash.key = key;
+ trace_hash_add(&h->group_hash, &grp->hash);
+ trace_hash_init(&grp->event_hash, 32);
+ }
+ task->group = grp;
+
+ trace_hash_for_each_bucket(bucket, &task->event_hash) {
+ trace_hash_while_item(item, bucket) {
+ struct event_hash *event_hash;
+
+ event_hash = event_from_item(item);
+ trace_hash_del(&event_hash->hash);
+ merge_event_into_group(grp, event_hash);
+ }
+ }
+}
+
+static void merge_tasks(struct handle_data *h)
+{
+ struct trace_hash_item **bucket;
+ struct trace_hash_item *item;
+
+ if (!merge_like_comms)
+ return;
+
+ trace_hash_for_each_bucket(bucket, &h->task_hash) {
+ trace_hash_for_each_item(item, bucket)
+ add_group(h, task_from_item(item));
+ }
+}
+
+int do_trace_profile(void)
+{
+ struct handle_data *h;
+
+ for (h = handles; h; h = h->next) {
+ if (merge_like_comms)
+ merge_tasks(h);
+ output_handle(h);
+ trace_hash_free(&h->task_hash);
+ }
+
+ return 0;
+}
diff --git a/tracecmd/trace-read.c b/tracecmd/trace-read.c
new file mode 100644
index 00000000..df559d2a
--- /dev/null
+++ b/tracecmd/trace-read.c
@@ -0,0 +1,1984 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#define _LARGEFILE64_SOURCE
+#include <dirent.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <errno.h>
+
+#include "trace-local.h"
+#include "trace-hash.h"
+#include "trace-hash-local.h"
+#include "kbuffer.h"
+#include "list.h"
+
+/*
+ * tep_func_repeat_format is defined as a weak variable in the
+ * libtraceevent library function plugin, to allow applications
+ * to override the format of the timestamp it prints for the
+ * last function that repeated.
+ */
+const char *tep_func_repeat_format;
+
+static struct filter_str {
+ struct filter_str *next;
+ char *filter;
+ int neg;
+} *filter_strings;
+static struct filter_str **filter_next = &filter_strings;
+
+struct filter {
+ struct filter *next;
+ struct tep_event_filter *filter;
+};
+
+struct event_str {
+ struct event_str *next;
+ const char *event;
+};
+
+struct handle_list {
+ struct list_head list;
+ struct tracecmd_input *handle;
+ const char *file;
+ int cpus;
+ int done;
+ struct tep_record *record;
+ struct filter *event_filters;
+ struct filter *event_filter_out;
+ unsigned long long *last_timestamp;
+};
+static struct list_head handle_list;
+
+struct input_files {
+ struct list_head list;
+ const char *file;
+ long long tsoffset;
+ unsigned long long ts2secs;
+};
+static struct list_head input_files;
+static struct input_files *last_input_file;
+
+struct pid_list {
+ struct pid_list *next;
+ char *pid;
+ int free;
+} *pid_list;
+
+struct pid_list *comm_list;
+
+static unsigned int page_size;
+static int input_fd;
+static const char *default_input_file = DEFAULT_INPUT_FILE;
+static const char *input_file;
+static int multi_inputs;
+static int max_file_size;
+
+static int instances;
+
+static int *filter_cpus;
+static int nr_filter_cpus;
+static int test_filters_mode;
+
+static int show_wakeup;
+static int wakeup_id;
+static int wakeup_new_id;
+static int sched_id;
+static int stacktrace_id;
+
+static int profile;
+
+static int buffer_breaks = 0;
+
+static int no_irqs;
+static int no_softirqs;
+
+static int tsdiff;
+static int tscheck;
+
+static int latency_format;
+static bool raw_format;
+static const char *format_type = TEP_PRINT_INFO;
+
+static struct tep_format_field *wakeup_task;
+static struct tep_format_field *wakeup_success;
+static struct tep_format_field *wakeup_new_task;
+static struct tep_format_field *wakeup_new_success;
+static struct tep_format_field *sched_task;
+static struct tep_format_field *sched_prio;
+
+static unsigned long long total_wakeup_lat;
+static unsigned long wakeup_lat_count;
+
+static unsigned long long total_wakeup_rt_lat;
+static unsigned long wakeup_rt_lat_count;
+
+struct wakeup_info {
+ struct trace_hash_item hash;
+ unsigned long long start;
+ int pid;
+};
+
+static struct hook_list *hooks;
+static struct hook_list *last_hook;
+
+#define WAKEUP_HASH_SIZE 1024
+static struct trace_hash wakeup_hash;
+
+static void print_event_name(struct trace_seq *s, struct tep_event *event)
+{
+ static const char *spaces = " "; /* 20 spaces */
+ const char *name;
+ int len;
+
+ name = event ? event->name : "(NULL)";
+
+ trace_seq_printf(s, " %s: ", name);
+
+ /* Space out the event names evenly. */
+ len = strlen(name);
+ if (len < 20)
+ trace_seq_printf(s, "%.*s", 20 - len, spaces);
+}
+
+enum time_fmt {
+ TIME_FMT_LAT = 1,
+ TIME_FMT_NORMAL = 2,
+};
+
+static const char *time_format(struct tracecmd_input *handle, enum time_fmt tf)
+{
+ struct tep_handle *tep = tracecmd_get_tep(handle);
+
+ switch (tf) {
+ case TIME_FMT_LAT:
+ if (latency_format)
+ return "%8.8s-%-5d %3d";
+ return "%16s-%-5d [%03d]";
+ default:
+ if (tracecmd_get_flags(handle) & TRACECMD_FL_IN_USECS) {
+ if (tep_test_flag(tep, TEP_NSEC_OUTPUT))
+ return " %9.1d:";
+ else
+ return " %6.1000d:";
+ } else
+ return "%12d:";
+ }
+}
+
+static void print_event(struct trace_seq *s, struct tracecmd_input *handle,
+ struct tep_record *record)
+{
+ struct tep_handle *tep = tracecmd_get_tep(handle);
+ struct tep_event *event;
+ const char *lfmt = time_format(handle, TIME_FMT_LAT);
+ const char *tfmt = time_format(handle, TIME_FMT_NORMAL);
+
+ event = tep_find_event_by_record(tep, record);
+ tep_print_event(tep, s, record, lfmt, TEP_PRINT_COMM,
+ TEP_PRINT_PID, TEP_PRINT_CPU);
+ tep_print_event(tep, s, record, tfmt, TEP_PRINT_TIME);
+ print_event_name(s, event);
+ tep_print_event(tep, s, record, "%s", format_type);
+}
+
+/* Debug variables for testing tracecmd_read_at */
+#define TEST_READ_AT 0
+#if TEST_READ_AT
+#define DO_TEST
+static off64_t test_read_at_offset;
+static int test_read_at_copy = 100;
+static int test_read_at_index;
+static void show_test(struct tracecmd_input *handle)
+{
+ struct tep_record *record;
+ struct trace_seq s;
+
+ if (!test_read_at_offset) {
+ printf("\nNO RECORD COPIED\n");
+ return;
+ }
+
+ record = tracecmd_read_at(handle, test_read_at_offset, NULL);
+ printf("\nHERE'S THE COPY RECORD\n");
+ trace_seq_init(&s);
+ print_event(&s, handle, record);
+ trace_seq_do_printf(&s);
+ trace_seq_destroy(&s);
+ printf("\n");
+
+ tracecmd_free_record(record);
+}
+
+static void test_save(struct tep_record *record, int cpu)
+{
+ if (test_read_at_index++ == test_read_at_copy) {
+ test_read_at_offset = record->offset;
+ printf("\nUSING THIS RECORD\n");
+ }
+}
+#endif /* TEST_READ_AT */
+
+/* Debug variables for testing tracecmd_set_cpu_at_timestamp */
+#define TEST_AT_TIMESTAMP 0
+#if TEST_AT_TIMESTAMP
+#define DO_TEST
+static unsigned long long test_at_timestamp_ts;
+static int test_at_timestamp_copy = 100;
+static int test_at_timestamp_cpu = -1;
+static int test_at_timestamp_index;
+static void show_test(struct tracecmd_input *handle)
+{
+ struct tep_record *record;
+ struct trace_seq s;
+ int cpu = test_at_timestamp_cpu;
+
+ if (!test_at_timestamp_ts) {
+ printf("\nNO RECORD COPIED\n");
+ return;
+ }
+
+ if (tracecmd_set_cpu_to_timestamp(handle, cpu, test_at_timestamp_ts))
+ return;
+
+ record = tracecmd_read_data(handle, cpu);
+ printf("\nHERE'S THE COPY RECORD with page %p offset=%p\n",
+ (void *)(record->offset & ~(page_size - 1)),
+ (void *)record->offset);
+ trace_seq_init(&s);
+ print_event(&s, handle, record);
+ trace_seq_do_printf(&s);
+ trace_seq_destroy(&s);
+ printf("\n");
+
+ tracecmd_free_record(record);
+}
+
+static void test_save(struct tep_record *record, int cpu)
+{
+ if (test_at_timestamp_index++ == test_at_timestamp_copy) {
+ test_at_timestamp_ts = record->ts;
+ test_at_timestamp_cpu = cpu;
+ printf("\nUSING THIS RECORD page=%p offset=%p\n",
+ (void *)(record->offset & ~(page_size - 1)),
+ (void *)record->offset);
+ }
+}
+#endif /* TEST_AT_TIMESTAMP */
+
+#define TEST_FIRST_LAST 0
+#if TEST_FIRST_LAST
+#define DO_TEST
+static void show_test(struct tracecmd_input *handle)
+{
+ struct tep_record *record;
+ struct trace_seq s;
+ int cpu = 0;
+
+ record = tracecmd_read_cpu_first(handle, cpu);
+ if (!record) {
+ printf("No first record?\n");
+ return;
+ }
+
+ printf("\nHERE'S THE FIRST RECORD with offset %p\n",
+ (void *)record->offset);
+ trace_seq_init(&s);
+ print_event(&s, handle, record);
+ trace_seq_do_printf(&s);
+ trace_seq_destroy(&s);
+ printf("\n");
+
+ tracecmd_free_record(record);
+
+ record = tracecmd_read_cpu_last(handle, cpu);
+ if (!record) {
+ printf("No last record?\n");
+ return;
+ }
+
+ printf("\nHERE'S THE LAST RECORD with offset %p\n",
+ (void *)record->offset);
+ trace_seq_init(&s);
+ print_event(&s, handle, record);
+ trace_seq_do_printf(&s);
+ trace_seq_destroy(&s);
+ printf("\n");
+
+ tracecmd_free_record(record);
+}
+static void test_save(struct tep_record *record, int cpu)
+{
+}
+#endif /* TEST_FIRST_LAST */
+
+#ifndef DO_TEST
+static void show_test(struct tracecmd_input *handle)
+{
+ /* quiet the compiler */
+ if (0)
+ print_event(NULL, NULL, NULL);
+}
+static void test_save(struct tep_record *record, int cpu)
+{
+}
+#endif
+
+static void add_input(const char *file)
+{
+ struct input_files *item;
+
+ item = malloc(sizeof(*item));
+ if (!item)
+ die("Failed to allocate for %s", file);
+ memset(item, 0, sizeof(*item));
+ item->file = file;
+ list_add_tail(&item->list, &input_files);
+ last_input_file = item;
+}
+
+static void add_handle(struct tracecmd_input *handle, const char *file)
+{
+ struct handle_list *item;
+
+ item = malloc(sizeof(*item));
+ if (!item)
+ die("Failed ot allocate for %s", file);
+ memset(item, 0, sizeof(*item));
+ item->handle = handle;
+ if (file) {
+ item->file = file + strlen(file);
+ /* we want just the base name */
+ while (item->file >= file && *item->file != '/')
+ item->file--;
+ item->file++;
+ if (strlen(item->file) > max_file_size)
+ max_file_size = strlen(item->file);
+ }
+ list_add_tail(&item->list, &handle_list);
+}
+
+static void free_inputs(void)
+{
+ struct input_files *item;
+
+ while (!list_empty(&input_files)) {
+ item = container_of(input_files.next, struct input_files, list);
+ list_del(&item->list);
+ free(item);
+ }
+}
+
+static void free_handles(void)
+{
+ struct handle_list *item;
+
+ while (!list_empty(&handle_list)) {
+ item = container_of(handle_list.next, struct handle_list, list);
+ list_del(&item->list);
+ free(item);
+ }
+}
+
+static void add_filter(const char *filter, int neg)
+{
+ struct filter_str *ftr;
+
+ ftr = malloc(sizeof(*ftr));
+ if (!ftr)
+ die("Failed to allocate for filter %s", filter);
+ ftr->filter = strdup(filter);
+ if (!ftr->filter)
+ die("malloc");
+ ftr->next = NULL;
+ ftr->neg = neg;
+
+ /* must maintain order of command line */
+ *filter_next = ftr;
+ filter_next = &ftr->next;
+}
+
+static void __add_filter(struct pid_list **head, const char *arg)
+{
+ struct pid_list *list;
+ char *pids = strdup(arg);
+ char *pid;
+ char *sav;
+ int free = 1;
+
+ if (!pids)
+ die("malloc");
+
+ pid = strtok_r(pids, ",", &sav);
+ while (pid) {
+ list = malloc(sizeof(*list));
+ if (!list)
+ die("Failed to allocate for arg %s", arg);
+ list->pid = pid;
+ list->free = free;
+ list->next = *head;
+ *head = list;
+ /* The first pid needs to be freed */
+ free = 0;
+ pid = strtok_r(NULL, ",", &sav);
+ }
+}
+
+static void add_comm_filter(const char *arg)
+{
+ __add_filter(&comm_list, arg);
+}
+
+static void add_pid_filter(const char *arg)
+{
+ __add_filter(&pid_list, arg);
+}
+
+static char *append_pid_filter(char *curr_filter, char *pid)
+{
+ char *filter;
+ int len, curr_len;
+
+#define FILTER_FMT "(common_pid==" __STR ")||(pid==" __STR ")||(next_pid==" __STR ")"
+
+#undef __STR
+#define __STR ""
+
+ /* strlen(".*:") > strlen("||") */
+ len = strlen(".*:" FILTER_FMT) + strlen(pid) * 3 + 1;
+
+#undef __STR
+#define __STR "%s"
+
+ if (!curr_filter) {
+ filter = malloc(len);
+ if (!filter)
+ die("Failed to allocate for filter %s", curr_filter);
+ sprintf(filter, ".*:" FILTER_FMT, pid, pid, pid);
+ } else {
+ curr_len = strlen(curr_filter);
+ len += curr_len;
+
+ filter = realloc(curr_filter, len);
+ if (!filter)
+ die("realloc");
+ sprintf(filter + curr_len, "||" FILTER_FMT, pid, pid, pid);
+ }
+
+ return filter;
+}
+
+static void convert_comm_filter(struct tracecmd_input *handle)
+{
+ struct tep_cmdline *cmdline;
+ struct tep_handle *pevent;
+ struct pid_list *list;
+
+ char pidstr[100];
+
+ if (!comm_list)
+ return;
+
+ pevent = tracecmd_get_tep(handle);
+
+ /* Seach for comm names and get their pids */
+ for (list = comm_list; list; list = list->next) {
+ cmdline = tep_data_pid_from_comm(pevent, list->pid, NULL);
+ if (!cmdline) {
+ warning("comm: %s not in cmdline list", list->pid);
+ continue;
+ }
+ do {
+ sprintf(pidstr, "%d", tep_cmdline_pid(pevent, cmdline));
+ add_pid_filter(pidstr);
+ cmdline = tep_data_pid_from_comm(pevent, list->pid,
+ cmdline);
+ } while (cmdline);
+ }
+
+ while (comm_list) {
+ list = comm_list;
+ comm_list = comm_list->next;
+ if (list->free)
+ free(list->pid);
+ free(list);
+ }
+}
+
+static void make_pid_filter(struct tracecmd_input *handle)
+{
+ struct pid_list *list;
+ char *str = NULL;
+
+ convert_comm_filter(handle);
+
+ if (!pid_list)
+ return;
+
+ /* First do all common pids */
+ for (list = pid_list; list; list = list->next) {
+ str = append_pid_filter(str, list->pid);
+ }
+
+ add_filter(str, 0);
+ free(str);
+
+ while (pid_list) {
+ list = pid_list;
+ pid_list = pid_list->next;
+ if (list->free)
+ free(list->pid);
+ free(list);
+ }
+}
+
+static void process_filters(struct handle_list *handles)
+{
+ struct filter **filter_next = &handles->event_filters;
+ struct filter **filter_out_next = &handles->event_filter_out;
+ struct filter *event_filter;
+ struct filter_str *filter;
+ struct tep_handle *pevent;
+ char errstr[200];
+ int filters = 0;
+ int ret;
+
+ pevent = tracecmd_get_tep(handles->handle);
+
+ make_pid_filter(handles->handle);
+
+ while (filter_strings) {
+ filter = filter_strings;
+ filter_strings = filter->next;
+
+ event_filter = malloc(sizeof(*event_filter));
+ if (!event_filter)
+ die("Failed to allocate for event filter");
+ event_filter->next = NULL;
+ event_filter->filter = tep_filter_alloc(pevent);
+ if (!event_filter->filter)
+ die("malloc");
+
+ ret = tep_filter_add_filter_str(event_filter->filter,
+ filter->filter);
+ if (ret < 0) {
+ tep_strerror(pevent, ret, errstr, sizeof(errstr));
+ die("Error filtering: %s\n%s",
+ filter->filter, errstr);
+ }
+
+ if (filter->neg) {
+ *filter_out_next = event_filter;
+ filter_out_next = &event_filter->next;
+ } else {
+ *filter_next = event_filter;
+ filter_next = &event_filter->next;
+ }
+ filters++;
+ free(filter->filter);
+ free(filter);
+ }
+ if (filters && test_filters_mode)
+ exit(0);
+}
+
+static void init_wakeup(struct tracecmd_input *handle)
+{
+ struct tep_handle *pevent;
+ struct tep_event *event;
+
+ if (!show_wakeup)
+ return;
+
+ pevent = tracecmd_get_tep(handle);
+
+ trace_hash_init(&wakeup_hash, WAKEUP_HASH_SIZE);
+
+ event = tep_find_event_by_name(pevent, "sched", "sched_wakeup");
+ if (!event)
+ goto fail;
+ wakeup_id = event->id;
+ wakeup_task = tep_find_field(event, "pid");
+ if (!wakeup_task)
+ goto fail;
+ wakeup_success = tep_find_field(event, "success");
+
+ event = tep_find_event_by_name(pevent, "sched", "sched_switch");
+ if (!event)
+ goto fail;
+ sched_id = event->id;
+ sched_task = tep_find_field(event, "next_pid");
+ if (!sched_task)
+ goto fail;
+
+ sched_prio = tep_find_field(event, "next_prio");
+ if (!sched_prio)
+ goto fail;
+
+
+ wakeup_new_id = -1;
+
+ event = tep_find_event_by_name(pevent, "sched", "sched_wakeup_new");
+ if (!event)
+ goto skip;
+ wakeup_new_id = event->id;
+ wakeup_new_task = tep_find_field(event, "pid");
+ if (!wakeup_new_task)
+ goto fail;
+ wakeup_new_success = tep_find_field(event, "success");
+
+ skip:
+ return;
+
+ fail:
+ show_wakeup = 0;
+}
+
+static void add_wakeup(unsigned int val, unsigned long long start)
+{
+ unsigned int key = trace_hash(val);
+ struct wakeup_info *info;
+ struct trace_hash_item *item;
+
+ item = trace_hash_find(&wakeup_hash, key, NULL, NULL);
+ if (item) {
+ info = container_of(item, struct wakeup_info, hash);
+ /* Hmm, double wakeup? */
+ info->start = start;
+ return;
+ }
+
+ info = malloc(sizeof(*info));
+ if (!info)
+ die("Failed to allocate wakeup info");
+ info->hash.key = key;
+ info->start = start;
+ trace_hash_add(&wakeup_hash, &info->hash);
+}
+
+static unsigned long long max_lat = 0;
+static unsigned long long max_time;
+static unsigned long long min_lat = -1;
+static unsigned long long min_time;
+
+static unsigned long long max_rt_lat = 0;
+static unsigned long long max_rt_time;
+static unsigned long long min_rt_lat = -1;
+static unsigned long long min_rt_time;
+
+static void add_sched(unsigned int val, unsigned long long end, int rt)
+{
+ struct trace_hash_item *item;
+ unsigned int key = trace_hash(val);
+ struct wakeup_info *info;
+ unsigned long long cal;
+
+ item = trace_hash_find(&wakeup_hash, key, NULL, NULL);
+ if (!item)
+ return;
+
+ info = container_of(item, struct wakeup_info, hash);
+
+ cal = end - info->start;
+
+ if (cal > max_lat) {
+ max_lat = cal;
+ max_time = end;
+ }
+ if (cal < min_lat) {
+ min_lat = cal;
+ min_time = end;
+ }
+
+ if (rt) {
+ if (cal > max_rt_lat) {
+ max_rt_lat = cal;
+ max_rt_time = end;
+ }
+ if (cal < min_rt_lat) {
+ min_rt_lat = cal;
+ min_rt_time = end;
+ }
+ }
+
+ printf(" Latency: %llu.%03llu usecs", cal / 1000, cal % 1000);
+
+ total_wakeup_lat += cal;
+ wakeup_lat_count++;
+
+ if (rt) {
+ total_wakeup_rt_lat += cal;
+ wakeup_rt_lat_count++;
+ }
+
+ trace_hash_del(item);
+ free(info);
+}
+
+static void process_wakeup(struct tep_handle *pevent, struct tep_record *record)
+{
+ unsigned long long val;
+ int id;
+
+ if (!show_wakeup)
+ return;
+
+ id = tep_data_type(pevent, record);
+ if (id == wakeup_id) {
+ if (tep_read_number_field(wakeup_success, record->data, &val) == 0) {
+ if (!val)
+ return;
+ }
+ if (tep_read_number_field(wakeup_task, record->data, &val))
+ return;
+ add_wakeup(val, record->ts);
+ } else if (id == wakeup_new_id) {
+ if (tep_read_number_field(wakeup_new_success, record->data, &val) == 0) {
+ if (!val)
+ return;
+ }
+ if (tep_read_number_field(wakeup_new_task, record->data, &val))
+ return;
+ add_wakeup(val, record->ts);
+ } else if (id == sched_id) {
+ int rt = 1;
+ if (tep_read_number_field(sched_prio, record->data, &val))
+ return;
+ if (val > 99)
+ rt = 0;
+ if (tep_read_number_field(sched_task, record->data, &val))
+ return;
+ add_sched(val, record->ts, rt);
+ }
+}
+
+static void
+show_wakeup_timings(unsigned long long total, unsigned long count,
+ unsigned long long lat_max, unsigned long long time_max,
+ unsigned long long lat_min, unsigned long long time_min)
+{
+
+ total /= count;
+
+ printf("\nAverage wakeup latency: %llu.%03llu usecs\n",
+ total / 1000,
+ total % 1000);
+ printf("Maximum Latency: %llu.%03llu usecs at ", lat_max / 1000, lat_max % 1000);
+ printf("timestamp: %llu.%06llu\n",
+ time_max / 1000000000, ((time_max + 500) % 1000000000) / 1000);
+ printf("Minimum Latency: %llu.%03llu usecs at ", lat_min / 1000, lat_min % 1000);
+ printf("timestamp: %llu.%06llu\n\n", time_min / 1000000000,
+ ((time_min + 500) % 1000000000) / 1000);
+}
+
+static void finish_wakeup(void)
+{
+ struct wakeup_info *info;
+ struct trace_hash_item **bucket;
+ struct trace_hash_item *item;
+
+ if (!show_wakeup || !wakeup_lat_count)
+ return;
+
+ show_wakeup_timings(total_wakeup_lat, wakeup_lat_count,
+ max_lat, max_time,
+ min_lat, min_time);
+
+
+ if (wakeup_rt_lat_count) {
+ printf("RT task timings:\n");
+ show_wakeup_timings(total_wakeup_rt_lat, wakeup_rt_lat_count,
+ max_rt_lat, max_rt_time,
+ min_rt_lat, min_rt_time);
+ }
+
+ trace_hash_for_each_bucket(bucket, &wakeup_hash) {
+ trace_hash_while_item(item, bucket) {
+ trace_hash_del(item);
+ info = container_of(item, struct wakeup_info, hash);
+ free(info);
+ }
+ }
+
+ trace_hash_free(&wakeup_hash);
+}
+
+void trace_show_data(struct tracecmd_input *handle, struct tep_record *record)
+{
+ tracecmd_show_data_func func = tracecmd_get_show_data_func(handle);
+ const char *tfmt = time_format(handle, TIME_FMT_NORMAL);
+ const char *cfmt = latency_format ? "%8.8s-%-5d %3d" : "%16s-%-5d [%03d]";
+ struct tep_handle *pevent;
+ struct tep_event *event;
+ struct trace_seq s;
+ int cpu = record->cpu;
+ bool use_trace_clock;
+ static unsigned long long last_ts;
+ unsigned long long diff_ts;
+ unsigned long page_size;
+ char buf[50];
+
+ page_size = tracecmd_page_size(handle);
+
+ test_save(record, cpu);
+
+ if (func) {
+ func(handle, record);
+ return;
+ }
+
+ pevent = tracecmd_get_tep(handle);
+ event = tep_find_event_by_record(pevent, record);
+ use_trace_clock = tracecmd_get_use_trace_clock(handle);
+
+ trace_seq_init(&s);
+ if (record->missed_events > 0)
+ trace_seq_printf(&s, "CPU:%d [%lld EVENTS DROPPED]\n",
+ cpu, record->missed_events);
+ else if (record->missed_events < 0)
+ trace_seq_printf(&s, "CPU:%d [EVENTS DROPPED]\n", cpu);
+ if (buffer_breaks || tracecmd_get_debug()) {
+ if (tracecmd_record_at_buffer_start(handle, record)) {
+ trace_seq_printf(&s, "CPU:%d [SUBBUFFER START]", cpu);
+ if (tracecmd_get_debug())
+ trace_seq_printf(&s, " [%lld:0x%llx]",
+ tracecmd_page_ts(handle, record),
+ record->offset & ~(page_size - 1));
+ trace_seq_putc(&s, '\n');
+ }
+ }
+
+ tep_print_event(pevent, &s, record, cfmt,
+ TEP_PRINT_COMM,
+ TEP_PRINT_PID,
+ TEP_PRINT_CPU);
+
+ if (latency_format) {
+ if (raw_format)
+ trace_seq_printf(&s, "-0x%x",
+ tep_data_flags(pevent, record));
+ else
+ tep_print_event(pevent, &s, record, "%s",
+ TEP_PRINT_LATENCY);
+ }
+
+ tep_print_event(pevent, &s, record, tfmt, TEP_PRINT_TIME);
+
+ if (tsdiff) {
+ unsigned long long rec_ts = record->ts;
+
+ buf[0] = 0;
+ if (use_trace_clock && !tep_test_flag(pevent, TEP_NSEC_OUTPUT))
+ rec_ts = (rec_ts + 500) / 1000;
+ if (last_ts) {
+ diff_ts = rec_ts - last_ts;
+ snprintf(buf, 50, "(+%lld)", diff_ts);
+ buf[49] = 0;
+ }
+ last_ts = rec_ts;
+ trace_seq_printf(&s, " %-8s", buf);
+ }
+
+ print_event_name(&s, event);
+ tep_print_event(pevent, &s, record, "%s", format_type);
+
+ if (s.len && *(s.buffer + s.len - 1) == '\n')
+ s.len--;
+ if (tracecmd_get_debug()) {
+ struct kbuffer *kbuf;
+ struct kbuffer_raw_info info;
+ void *page;
+ void *offset;
+
+ trace_seq_printf(&s, " [%d:0x%llx:%d]",
+ tracecmd_record_ts_delta(handle, record),
+ record->offset & (page_size - 1), record->size);
+ kbuf = tracecmd_record_kbuf(handle, record);
+ page = tracecmd_record_page(handle, record);
+ offset = tracecmd_record_offset(handle, record);
+
+ if (kbuf && page && offset) {
+ struct kbuffer_raw_info *pi = &info;
+
+ /* We need to get the record raw data to get next */
+ pi->next = offset;
+ pi = kbuffer_raw_get(kbuf, page, pi);
+ while ((pi = kbuffer_raw_get(kbuf, page, pi))) {
+ if (pi->type < KBUFFER_TYPE_PADDING)
+ break;
+ switch (pi->type) {
+ case KBUFFER_TYPE_PADDING:
+ trace_seq_printf(&s, "\n PADDING: ");
+ break;
+ case KBUFFER_TYPE_TIME_EXTEND:
+ trace_seq_printf(&s, "\n TIME EXTEND: ");
+ break;
+ case KBUFFER_TYPE_TIME_STAMP:
+ trace_seq_printf(&s, "\n TIME STAMP: ");
+ break;
+ }
+ if (pi->type == KBUFFER_TYPE_TIME_STAMP)
+ trace_seq_printf(&s, "timestamp:%lld length:%d",
+ pi->delta,
+ pi->length);
+ else
+ trace_seq_printf(&s, "delta:%lld length:%d",
+ pi->delta,
+ pi->length);
+ }
+ }
+ }
+
+ trace_seq_do_printf(&s);
+ trace_seq_destroy(&s);
+
+ process_wakeup(pevent, record);
+
+ printf("\n");
+}
+
+static void read_latency(struct tracecmd_input *handle)
+{
+ char *buf = NULL;
+ size_t size = 0;
+ int r;
+
+ do {
+ r = tracecmd_latency_data_read(handle, &buf, &size);
+ if (r > 0)
+ printf("%.*s", r, buf);
+ } while (r > 0);
+
+ printf("\n");
+ free(buf);
+}
+
+static int
+test_filters(struct tep_handle *pevent, struct filter *event_filters,
+ struct tep_record *record, int neg)
+{
+ int found = 0;
+ int ret = FILTER_NONE;
+ int flags;
+
+ if (no_irqs || no_softirqs) {
+ flags = tep_data_flags(pevent, record);
+ if (no_irqs && (flags & TRACE_FLAG_HARDIRQ))
+ return FILTER_MISS;
+ if (no_softirqs && (flags & TRACE_FLAG_SOFTIRQ))
+ return FILTER_MISS;
+ }
+
+ while (event_filters) {
+ ret = tep_filter_match(event_filters->filter, record);
+ switch (ret) {
+ case FILTER_NONE:
+ case FILTER_MATCH:
+ found = 1;
+ }
+ /* We need to test all negative filters */
+ if (!neg && found)
+ break;
+ event_filters = event_filters->next;
+ }
+
+ return ret;
+}
+
+struct stack_info_cpu {
+ int cpu;
+ int last_printed;
+};
+
+struct stack_info {
+ struct stack_info *next;
+ struct handle_list *handles;
+ struct stack_info_cpu *cpus;
+ int stacktrace_id;
+ int nr_cpus;
+};
+
+static int
+test_stacktrace(struct handle_list *handles, struct tep_record *record,
+ int last_printed)
+{
+ static struct stack_info *infos;
+ struct stack_info *info;
+ struct stack_info_cpu *cpu_info;
+ struct handle_list *h;
+ struct tracecmd_input *handle;
+ struct tep_handle *pevent;
+ struct tep_event *event;
+ static int init;
+ int ret;
+ int id;
+
+ if (!init) {
+ init = 1;
+
+ list_for_each_entry(h, &handle_list, list) {
+ info = malloc(sizeof(*info));
+ if (!info)
+ die("Failed to allocate handle");
+ info->handles = h;
+ info->nr_cpus = tracecmd_cpus(h->handle);
+
+ info->cpus = malloc(sizeof(*info->cpus) * info->nr_cpus);
+ if (!info->cpus)
+ die("Failed to allocate for %d cpus", info->nr_cpus);
+ memset(info->cpus, 0, sizeof(*info->cpus));
+
+ pevent = tracecmd_get_tep(h->handle);
+ event = tep_find_event_by_name(pevent, "ftrace",
+ "kernel_stack");
+ if (event)
+ info->stacktrace_id = event->id;
+ else
+ info->stacktrace_id = 0;
+
+ info->next = infos;
+ infos = info;
+ }
+
+
+ }
+
+ handle = handles->handle;
+ pevent = tracecmd_get_tep(handle);
+
+ for (info = infos; info; info = info->next)
+ if (info->handles == handles)
+ break;
+
+ if (!info->stacktrace_id)
+ return 0;
+
+ cpu_info = &info->cpus[record->cpu];
+
+ id = tep_data_type(pevent, record);
+
+ /*
+ * Print the stack trace if the previous event was printed.
+ * But do not print the stack trace if it is explicitly
+ * being filtered out.
+ */
+ if (id == info->stacktrace_id) {
+ ret = test_filters(pevent, handles->event_filter_out, record, 1);
+ if (ret != FILTER_MATCH)
+ return cpu_info->last_printed;
+ return 0;
+ }
+
+ cpu_info->last_printed = last_printed;
+ return 0;
+}
+
+static struct tep_record *get_next_record(struct handle_list *handles)
+{
+ struct tep_record *record;
+ struct tep_handle *pevent;
+ int found = 0;
+ int cpu;
+ int ret;
+
+ if (handles->record)
+ return handles->record;
+
+ if (handles->done)
+ return NULL;
+
+ pevent = tracecmd_get_tep(handles->handle);
+
+ do {
+ if (filter_cpus) {
+ long long last_stamp = -1;
+ struct tep_record *precord;
+ int first_record = 1;
+ int next_cpu = -1;
+ int i;
+
+ for (i = 0; (cpu = filter_cpus[i]) >= 0; i++) {
+ precord = tracecmd_peek_data(handles->handle, cpu);
+ if (precord &&
+ (first_record || precord->ts < last_stamp)) {
+ next_cpu = cpu;
+ last_stamp = precord->ts;
+ first_record = 0;
+ }
+ }
+ if (!first_record)
+ record = tracecmd_read_data(handles->handle, next_cpu);
+ else
+ record = NULL;
+ } else
+ record = tracecmd_read_next_data(handles->handle, &cpu);
+
+ if (record) {
+ ret = test_filters(pevent, handles->event_filters, record, 0);
+ switch (ret) {
+ case FILTER_NOEXIST:
+ /* Stack traces may still filter this */
+ if (stacktrace_id &&
+ test_stacktrace(handles, record, 0))
+ found = 1;
+ else
+ tracecmd_free_record(record);
+ break;
+ case FILTER_NONE:
+ case FILTER_MATCH:
+ /* Test the negative filters (-v) */
+ ret = test_filters(pevent, handles->event_filter_out,
+ record, 1);
+ if (ret != FILTER_MATCH) {
+ found = 1;
+ break;
+ }
+ /* fall through */
+ default:
+ tracecmd_free_record(record);
+ }
+ }
+ } while (record && !found);
+
+ if (record && stacktrace_id)
+ test_stacktrace(handles, record, 1);
+
+ handles->record = record;
+ if (!record)
+ handles->done = 1;
+
+ return record;
+}
+
+static void free_handle_record(struct handle_list *handles)
+{
+ if (!handles->record)
+ return;
+
+ tracecmd_free_record(handles->record);
+ handles->record = NULL;
+}
+
+static void print_handle_file(struct handle_list *handles)
+{
+ /* Only print file names if more than one file is read */
+ if (!multi_inputs && !instances)
+ return;
+ if (handles->file && *handles->file != '\0')
+ printf("%*s: ", max_file_size, handles->file);
+ else
+ printf("%*s ", max_file_size, "");
+}
+
+static void free_filters(struct filter *event_filter)
+{
+ struct filter *filter;
+
+ while (event_filter) {
+ filter = event_filter;
+ event_filter = filter->next;
+
+ tep_filter_free(filter->filter);
+ free(filter);
+ }
+}
+
+enum output_type {
+ OUTPUT_NORMAL,
+ OUTPUT_STAT_ONLY,
+ OUTPUT_UNAME_ONLY,
+ OUTPUT_VERSION_ONLY,
+};
+
+static void read_data_info(struct list_head *handle_list, enum output_type otype,
+ int global, int align_ts)
+{
+ unsigned long long ts, first_ts;
+ struct handle_list *handles;
+ struct handle_list *last_handle;
+ struct tep_record *record;
+ struct tep_record *last_record;
+ struct tep_handle *pevent;
+ struct tep_event *event;
+ int first = 1;
+ int ret;
+
+ list_for_each_entry(handles, handle_list, list) {
+ int cpus;
+
+ if (!tracecmd_is_buffer_instance(handles->handle)) {
+ ret = tracecmd_init_data(handles->handle);
+ if (ret < 0)
+ die("failed to init data");
+ }
+ cpus = tracecmd_cpus(handles->handle);
+ handles->cpus = cpus;
+ handles->last_timestamp = calloc(cpus, sizeof(*handles->last_timestamp));
+ if (!handles->last_timestamp)
+ die("allocating timestamps");
+
+ /* Don't process instances that we added here */
+ if (tracecmd_is_buffer_instance(handles->handle))
+ continue;
+
+ if (align_ts) {
+ ts = tracecmd_get_first_ts(handles->handle);
+ if (first || first_ts > ts)
+ first_ts = ts;
+ first = 0;
+ }
+ print_handle_file(handles);
+ printf("cpus=%d\n", cpus);
+
+ /* Latency trace is just all ASCII */
+ if (ret > 0) {
+ if (multi_inputs)
+ die("latency traces do not work with multiple inputs");
+ read_latency(handles->handle);
+ return;
+ }
+
+ switch (otype) {
+ case OUTPUT_NORMAL:
+ break;
+ case OUTPUT_STAT_ONLY:
+ printf("\nKernel buffer statistics:\n"
+ " Note: \"entries\" are the entries left in the kernel ring buffer and are not\n"
+ " recorded in the trace data. They should all be zero.\n\n");
+ tracecmd_print_stats(handles->handle);
+ continue;
+ case OUTPUT_UNAME_ONLY:
+ tracecmd_print_uname(handles->handle);
+ case OUTPUT_VERSION_ONLY:
+ tracecmd_print_version(handles->handle);
+ continue;
+ }
+
+ /* Find the kernel_stacktrace if available */
+ pevent = tracecmd_get_tep(handles->handle);
+ event = tep_find_event_by_name(pevent, "ftrace", "kernel_stack");
+ if (event)
+ stacktrace_id = event->id;
+
+ init_wakeup(handles->handle);
+ if (last_hook)
+ last_hook->next = tracecmd_hooks(handles->handle);
+ else
+ hooks = tracecmd_hooks(handles->handle);
+ if (profile)
+ trace_init_profile(handles->handle, hooks, global);
+
+ process_filters(handles);
+
+ /* If this file has buffer instances, get the handles for them */
+ instances = tracecmd_buffer_instances(handles->handle);
+ if (instances) {
+ struct tracecmd_input *new_handle;
+ const char *name;
+ int i;
+
+ for (i = 0; i < instances; i++) {
+ name = tracecmd_buffer_instance_name(handles->handle, i);
+ if (!name)
+ die("error in reading buffer instance");
+ new_handle = tracecmd_buffer_instance_handle(handles->handle, i);
+ if (!new_handle) {
+ warning("could not retrieve handle %s", name);
+ continue;
+ }
+ add_handle(new_handle, name);
+ }
+ }
+ }
+
+ if (otype != OUTPUT_NORMAL)
+ return;
+
+ if (align_ts) {
+ list_for_each_entry(handles, handle_list, list) {
+ tracecmd_add_ts_offset(handles->handle, -first_ts);
+ }
+ }
+
+ do {
+ last_handle = NULL;
+ last_record = NULL;
+
+ list_for_each_entry(handles, handle_list, list) {
+ record = get_next_record(handles);
+ if (!record)
+ continue;
+ if (!last_record ||
+ (record && record->ts < last_record->ts)) {
+ last_record = record;
+ last_handle = handles;
+ }
+ }
+ if (last_record) {
+ int cpu = last_record->cpu;
+ if (cpu >= last_handle->cpus)
+ die("cpu %d greater than %d\n", cpu, last_handle->cpus);
+ if (tscheck &&
+ last_handle->last_timestamp[cpu] > last_record->ts) {
+ errno = 0;
+ warning("WARNING: Record on cpu %d went backwards: %lld to %lld delta: -%lld\n",
+ cpu, last_handle->last_timestamp[cpu],
+ last_record->ts,
+ last_handle->last_timestamp[cpu] - last_record->ts);
+ }
+ last_handle->last_timestamp[cpu] = last_record->ts;
+ print_handle_file(last_handle);
+ trace_show_data(last_handle->handle, last_record);
+ free_handle_record(last_handle);
+ }
+ } while (last_record);
+
+ if (profile)
+ do_trace_profile();
+
+ list_for_each_entry(handles, handle_list, list) {
+ free_filters(handles->event_filters);
+ free_filters(handles->event_filter_out);
+ free(handles->last_timestamp);
+
+ show_test(handles->handle);
+ }
+}
+
+struct tracecmd_input *read_trace_header(const char *file, int flags)
+{
+ input_fd = open(file, O_RDONLY);
+ if (input_fd < 0)
+ die("opening '%s'\n", file);
+
+ return tracecmd_alloc_fd(input_fd, flags);
+}
+
+static void sig_end(int sig)
+{
+ struct handle_list *handles;
+
+ fprintf(stderr, "trace-cmd: Received SIGINT\n");
+
+ list_for_each_entry(handles, &handle_list, list) {
+ tracecmd_close(handles->handle);
+ }
+
+ exit(0);
+}
+
+static const char *skip_space_and_test_digit(const char *p, const char *cpu_str)
+{
+ while (isspace(*p))
+ p++;
+ if (!isdigit(*p))
+ die("invalid character '%c' in cpu string '%s'",
+ *p, cpu_str);
+ return p;
+}
+
+static void __add_cpu(int cpu)
+{
+ filter_cpus = tracecmd_add_id(filter_cpus, cpu, nr_filter_cpus++);
+}
+
+static void parse_cpulist(const char *cpu_str)
+{
+ unsigned a, b;
+ const char *s = cpu_str;
+
+ do {
+ s = skip_space_and_test_digit(s, cpu_str);
+ b = a = strtoul(s, (char **)&s, 10);
+ if (*s == '-') {
+ s = skip_space_and_test_digit(s + 1, cpu_str);
+ b = strtoul(s, (char **)&s, 10);
+ }
+ if (!(a <= b))
+ die("range of cpu numbers must be lower to greater");
+ while (a <= b) {
+ __add_cpu(a);
+ a++;
+ }
+ if (*s == ',' || *s == ':')
+ s++;
+ } while (*s != '\0');
+}
+
+static void read_file_fd(int fd, char *dst, int len)
+{
+ size_t size = 0;
+ int r;
+
+ do {
+ r = read(fd, dst+size, len);
+ if (r > 0) {
+ size += r;
+ len -= r;
+ }
+ } while (r > 0);
+}
+
+static void add_functions(struct tep_handle *pevent, const char *file)
+{
+ struct stat st;
+ char *buf;
+ int ret;
+ int fd;
+
+ fd = open(file, O_RDONLY);
+ if (fd < 0)
+ die("Can't read file %s", file);
+
+ ret = fstat(fd, &st);
+ if (ret < 0)
+ die("Can't stat file %s", file);
+
+ buf = malloc(st.st_size + 1);
+ if (!buf)
+ die("Failed to allocate for function buffer");
+ read_file_fd(fd, buf, st.st_size);
+ buf[st.st_size] = '\0';
+ close(fd);
+ tep_parse_kallsyms(pevent, buf);
+ free(buf);
+}
+
+static void process_plugin_option(char *option)
+{
+ char *name = option;
+ char *val = NULL;
+ char *p;
+
+ if ((p = strstr(name, "="))) {
+ *p = '\0';
+ val = p+1;
+ }
+ tep_plugin_add_option(name, val);
+}
+
+static void set_event_flags(struct tep_handle *pevent, struct event_str *list,
+ unsigned int flag)
+{
+ struct tep_event **events;
+ struct tep_event *event;
+ struct event_str *str;
+ regex_t regex;
+ int ret;
+ int i;
+
+ if (!list)
+ return;
+
+ events = tep_list_events(pevent, 0);
+
+ for (str = list; str; str = str->next) {
+ char *match;
+
+ match = malloc(strlen(str->event) + 3);
+ if (!match)
+ die("Failed to allocate for match string '%s'", str->event);
+ sprintf(match, "^%s$", str->event);
+
+ ret = regcomp(&regex, match, REG_ICASE|REG_NOSUB);
+ if (ret < 0)
+ die("Can't parse '%s'", str->event);
+ free(match);
+ for (i = 0; events[i]; i++) {
+ event = events[i];
+ if (!regexec(&regex, event->name, 0, NULL, 0) ||
+ !regexec(&regex, event->system, 0, NULL, 0))
+ event->flags |= flag;
+ }
+ }
+}
+
+static void add_hook(const char *arg)
+{
+ struct hook_list *hook;
+
+ hook = tracecmd_create_event_hook(arg);
+
+ hook->next = hooks;
+ hooks = hook;
+ if (!last_hook)
+ last_hook = hook;
+}
+
+enum {
+ OPT_verbose = 234,
+ OPT_align_ts = 235,
+ OPT_raw_ts = 236,
+ OPT_version = 237,
+ OPT_tscheck = 238,
+ OPT_tsdiff = 239,
+ OPT_ts2secs = 240,
+ OPT_tsoffset = 241,
+ OPT_bycomm = 242,
+ OPT_debug = 243,
+ OPT_uname = 244,
+ OPT_profile = 245,
+ OPT_event = 246,
+ OPT_comm = 247,
+ OPT_boundary = 248,
+ OPT_stat = 249,
+ OPT_pid = 250,
+ OPT_nodate = 251,
+ OPT_check_event_parsing = 252,
+ OPT_kallsyms = 253,
+ OPT_events = 254,
+ OPT_cpu = 255,
+ OPT_cpus = 256,
+};
+
+void trace_report (int argc, char **argv)
+{
+ struct tracecmd_input *handle;
+ struct tep_handle *pevent;
+ struct event_str *raw_events = NULL;
+ struct event_str *nohandler_events = NULL;
+ struct event_str **raw_ptr = &raw_events;
+ struct event_str **nohandler_ptr = &nohandler_events;
+ const char *functions = NULL;
+ const char *print_event = NULL;
+ struct input_files *inputs;
+ struct handle_list *handles;
+ enum output_type otype;
+ long long tsoffset = 0;
+ unsigned long long ts2secs = 0;
+ unsigned long long ts2sc;
+ int open_flags = 0;
+ int show_stat = 0;
+ int show_funcs = 0;
+ int show_endian = 0;
+ int show_page_size = 0;
+ int show_printk = 0;
+ int show_uname = 0;
+ int show_version = 0;
+ int show_events = 0;
+ int show_cpus = 0;
+ int print_events = 0;
+ int nanosec = 0;
+ int no_date = 0;
+ int raw_ts = 0;
+ int align_ts = 0;
+ int global = 0;
+ int neg = 0;
+ int ret = 0;
+ int check_event_parsing = 0;
+ int c;
+
+ list_head_init(&handle_list);
+ list_head_init(&input_files);
+
+ if (argc < 2)
+ usage(argv);
+
+ if (strcmp(argv[1], "report") != 0)
+ usage(argv);
+
+ signal(SIGINT, sig_end);
+
+ for (;;) {
+ int option_index = 0;
+ static struct option long_options[] = {
+ {"cpu", required_argument, NULL, OPT_cpu},
+ {"cpus", no_argument, NULL, OPT_cpus},
+ {"events", no_argument, NULL, OPT_events},
+ {"event", required_argument, NULL, OPT_event},
+ {"filter-test", no_argument, NULL, 'T'},
+ {"kallsyms", required_argument, NULL, OPT_kallsyms},
+ {"pid", required_argument, NULL, OPT_pid},
+ {"comm", required_argument, NULL, OPT_comm},
+ {"check-events", no_argument, NULL,
+ OPT_check_event_parsing},
+ {"nodate", no_argument, NULL, OPT_nodate},
+ {"stat", no_argument, NULL, OPT_stat},
+ {"boundary", no_argument, NULL, OPT_boundary},
+ {"debug", no_argument, NULL, OPT_debug},
+ {"profile", no_argument, NULL, OPT_profile},
+ {"uname", no_argument, NULL, OPT_uname},
+ {"version", no_argument, NULL, OPT_version},
+ {"by-comm", no_argument, NULL, OPT_bycomm},
+ {"ts-offset", required_argument, NULL, OPT_tsoffset},
+ {"ts2secs", required_argument, NULL, OPT_ts2secs},
+ {"ts-diff", no_argument, NULL, OPT_tsdiff},
+ {"ts-check", no_argument, NULL, OPT_tscheck},
+ {"raw-ts", no_argument, NULL, OPT_raw_ts},
+ {"align-ts", no_argument, NULL, OPT_align_ts},
+ {"verbose", optional_argument, NULL, OPT_verbose},
+ {"help", no_argument, NULL, '?'},
+ {NULL, 0, NULL, 0}
+ };
+
+ c = getopt_long (argc-1, argv+1, "+hSIi:H:feGpRr:tPNn:LlEwF:V::vTqO:",
+ long_options, &option_index);
+ if (c == -1)
+ break;
+ switch (c) {
+ case 'h':
+ usage(argv);
+ break;
+ case 'i':
+ if (input_file) {
+ if (!multi_inputs) {
+ add_input(input_file);
+ if (tsoffset)
+ last_input_file->tsoffset = tsoffset;
+ }
+ multi_inputs++;
+ add_input(optarg);
+ } else
+ input_file = optarg;
+ break;
+ case 'F':
+ add_filter(optarg, neg);
+ break;
+ case 'H':
+ add_hook(optarg);
+ break;
+ case 'T':
+ test_filters_mode = 1;
+ break;
+ case 'f':
+ show_funcs = 1;
+ break;
+ case 'I':
+ no_irqs = 1;
+ break;
+ case 'S':
+ no_softirqs = 1;
+ break;
+ case 'P':
+ show_printk = 1;
+ break;
+ case 'L':
+ open_flags |= TRACECMD_FL_LOAD_NO_SYSTEM_PLUGINS;
+ break;
+ case 'N':
+ open_flags |= TRACECMD_FL_LOAD_NO_PLUGINS;
+ break;
+ case 'n':
+ *nohandler_ptr = malloc(sizeof(struct event_str));
+ if (!*nohandler_ptr)
+ die("Failed to allocate for '-n %s'", optarg);
+ (*nohandler_ptr)->event = optarg;
+ (*nohandler_ptr)->next = NULL;
+ nohandler_ptr = &(*nohandler_ptr)->next;
+ break;
+ case 'e':
+ show_endian = 1;
+ break;
+ case 'p':
+ show_page_size = 1;
+ break;
+ case 'E':
+ show_events = 1;
+ break;
+ case 'G':
+ global = 1;
+ break;
+ case 'R':
+ raw_format = true;
+ break;
+ case 'r':
+ *raw_ptr = malloc(sizeof(struct event_str));
+ if (!*raw_ptr)
+ die("Failed to allocate '-r %s'", optarg);
+ (*raw_ptr)->event = optarg;
+ (*raw_ptr)->next = NULL;
+ raw_ptr = &(*raw_ptr)->next;
+ break;
+ case 't':
+ nanosec = 1;
+ break;
+ case 'w':
+ show_wakeup = 1;
+ break;
+ case 'l':
+ latency_format = 1;
+ break;
+ case 'O':
+ process_plugin_option(optarg);
+ break;
+ case 'v':
+ if (neg)
+ die("Only 1 -v can be used");
+ neg = 1;
+ break;
+ case 'q':
+ silence_warnings = 1;
+ tracecmd_set_loglevel(TEP_LOG_NONE);
+ break;
+ case OPT_cpu:
+ parse_cpulist(optarg);
+ break;
+ case OPT_cpus:
+ show_cpus = 1;
+ break;
+ case OPT_events:
+ print_events = 1;
+ break;
+ case OPT_event:
+ print_event = optarg;
+ break;
+ case OPT_kallsyms:
+ functions = optarg;
+ break;
+ case OPT_pid:
+ add_pid_filter(optarg);
+ break;
+ case OPT_comm:
+ add_comm_filter(optarg);
+ break;
+ case OPT_check_event_parsing:
+ check_event_parsing = 1;
+ break;
+ case OPT_nodate:
+ no_date = 1;
+ break;
+ case OPT_stat:
+ show_stat = 1;
+ break;
+ case OPT_boundary:
+ /* Debug to look at buffer breaks */
+ buffer_breaks = 1;
+ break;
+ case OPT_debug:
+ buffer_breaks = 1;
+ tracecmd_set_debug(true);
+ break;
+ case OPT_profile:
+ profile = 1;
+ break;
+ case OPT_uname:
+ show_uname = 1;
+ break;
+ case OPT_version:
+ show_version = 1;
+ break;
+ case OPT_bycomm:
+ trace_profile_set_merge_like_comms();
+ break;
+ case OPT_ts2secs:
+ ts2sc = atoll(optarg);
+ if (multi_inputs)
+ last_input_file->ts2secs = ts2sc;
+ else
+ ts2secs = ts2sc;
+ break;
+ case OPT_tsoffset:
+ tsoffset = atoll(optarg);
+ if (multi_inputs)
+ last_input_file->tsoffset = tsoffset;
+ if (!input_file)
+ die("--ts-offset must come after -i");
+ break;
+ case OPT_tsdiff:
+ tsdiff = 1;
+ break;
+ case OPT_tscheck:
+ tscheck = 1;
+ break;
+ case OPT_raw_ts:
+ raw_ts = 1;
+ break;
+ case OPT_align_ts:
+ align_ts = 1;
+ break;
+ case 'V':
+ case OPT_verbose:
+ show_status = 1;
+ if (trace_set_verbose(optarg) < 0)
+ die("invalid verbose level %s", optarg);
+ break;
+ default:
+ usage(argv);
+ }
+ }
+
+ if ((argc - optind) >= 2) {
+ if (input_file)
+ usage(argv);
+ input_file = argv[optind + 1];
+ }
+
+ if (!input_file)
+ input_file = default_input_file;
+
+ if (!multi_inputs) {
+ add_input(input_file);
+ if (tsoffset)
+ last_input_file->tsoffset = tsoffset;
+ } else if (show_wakeup)
+ die("Wakeup tracing can only be done on a single input file");
+
+ list_for_each_entry(inputs, &input_files, list) {
+ handle = read_trace_header(inputs->file, open_flags);
+ if (!handle)
+ die("error reading header for %s", inputs->file);
+
+ /* If used with instances, top instance will have no tag */
+ add_handle(handle, multi_inputs ? inputs->file : NULL);
+
+ if (no_date)
+ tracecmd_set_flag(handle, TRACECMD_FL_IGNORE_DATE);
+ if (raw_ts)
+ tracecmd_set_flag(handle, TRACECMD_FL_RAW_TS);
+ page_size = tracecmd_page_size(handle);
+
+ if (show_page_size) {
+ printf("file page size is %d, and host page size is %d\n",
+ page_size,
+ getpagesize());
+ return;
+ }
+
+ if (inputs->tsoffset)
+ tracecmd_set_ts_offset(handle, inputs->tsoffset);
+
+ if (inputs->ts2secs)
+ tracecmd_set_ts2secs(handle, inputs->ts2secs);
+ else if (ts2secs)
+ tracecmd_set_ts2secs(handle, ts2secs);
+
+ pevent = tracecmd_get_tep(handle);
+
+ if (nanosec)
+ tep_set_flag(pevent, TEP_NSEC_OUTPUT);
+
+ if (raw_format)
+ format_type = TEP_PRINT_INFO_RAW;
+
+ if (test_filters_mode)
+ tep_set_test_filters(pevent, 1);
+
+ if (functions)
+ add_functions(pevent, functions);
+
+ if (show_endian) {
+ printf("file is %s endian and host is %s endian\n",
+ tep_is_file_bigendian(pevent) ? "big" : "little",
+ tep_is_local_bigendian(pevent) ? "big" : "little");
+ return;
+ }
+
+ if (print_events) {
+ tracecmd_print_events(handle, NULL);
+ return;
+ }
+
+ if (print_event) {
+ tracecmd_print_events(handle, print_event);
+ return;
+ }
+
+ ret = tracecmd_read_headers(handle, 0);
+ if (check_event_parsing) {
+ if (ret || tracecmd_get_parsing_failures(handle))
+ exit(EINVAL);
+ else
+ exit(0);
+ } else {
+ if (ret)
+ return;
+ }
+
+ if (show_funcs) {
+ tep_print_funcs(pevent);
+ return;
+ }
+ if (show_printk) {
+ tep_print_printk(pevent);
+ return;
+ }
+
+ if (show_events) {
+ struct tep_event **events;
+ struct tep_event *event;
+ int i;
+
+ events = tep_list_events(pevent, TEP_EVENT_SORT_SYSTEM);
+ for (i = 0; events[i]; i++) {
+ event = events[i];
+ if (event->system)
+ printf("%s:", event->system);
+ printf("%s\n", event->name);
+ }
+ return;
+ }
+
+ if (show_cpus) {
+ int cpus;
+ int ret;
+ int i;
+
+ if (!tracecmd_is_buffer_instance(handle)) {
+ ret = tracecmd_init_data(handle);
+ if (ret < 0)
+ die("failed to init data");
+ }
+ cpus = tracecmd_cpus(handle);
+ printf("List of CPUs in %s with data:\n", inputs->file);
+ for (i = 0; i < cpus; i++) {
+ if (tracecmd_read_cpu_first(handle, i))
+ printf(" %d\n", i);
+ }
+ continue;
+ }
+
+ set_event_flags(pevent, nohandler_events, TEP_EVENT_FL_NOHANDLE);
+ set_event_flags(pevent, raw_events, TEP_EVENT_FL_PRINTRAW);
+ }
+
+ if (show_cpus)
+ return;
+
+ otype = OUTPUT_NORMAL;
+
+ if (tracecmd_get_flags(handle) & TRACECMD_FL_RAW_TS) {
+ tep_func_repeat_format = "%d";
+ } else if (tracecmd_get_flags(handle) & TRACECMD_FL_IN_USECS) {
+ if (tep_test_flag(tracecmd_get_tep(handle), TEP_NSEC_OUTPUT))
+ tep_func_repeat_format = "%9.1d";
+ else
+ tep_func_repeat_format = "%6.1000d";
+ } else {
+ tep_func_repeat_format = "%12d";
+ }
+
+
+ if (show_stat)
+ otype = OUTPUT_STAT_ONLY;
+ /* yeah yeah, uname overrides stat */
+ if (show_uname)
+ otype = OUTPUT_UNAME_ONLY;
+ /* and version overrides uname! */
+ if (show_version)
+ otype = OUTPUT_VERSION_ONLY;
+ read_data_info(&handle_list, otype, global, align_ts);
+
+ list_for_each_entry(handles, &handle_list, list) {
+ tracecmd_close(handles->handle);
+ }
+ free_handles();
+ free_inputs();
+
+ finish_wakeup();
+
+ return;
+}
diff --git a/tracecmd/trace-record.c b/tracecmd/trace-record.c
new file mode 100644
index 00000000..27c4e7ba
--- /dev/null
+++ b/tracecmd/trace-record.c
@@ -0,0 +1,7322 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2008, 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <dirent.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <getopt.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <sys/socket.h>
+#include <sys/syscall.h>
+#include <sys/utsname.h>
+#ifndef NO_PTRACE
+#include <sys/ptrace.h>
+#else
+#ifdef WARN_NO_PTRACE
+#warning ptrace not supported. -c feature will not work
+#endif
+#endif
+#include <netdb.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <sched.h>
+#include <glob.h>
+#include <errno.h>
+#include <limits.h>
+#include <libgen.h>
+#include <poll.h>
+#include <pwd.h>
+#include <grp.h>
+
+#include "tracefs.h"
+#include "version.h"
+#include "trace-local.h"
+#include "trace-msg.h"
+
+#define _STR(x) #x
+#define STR(x) _STR(x)
+
+#define TRACE_CTRL "tracing_on"
+#define TRACE "trace"
+#define AVAILABLE "available_tracers"
+#define CURRENT "current_tracer"
+#define ITER_CTRL "trace_options"
+#define MAX_LATENCY "tracing_max_latency"
+#define STAMP "stamp"
+#define FUNC_STACK_TRACE "func_stack_trace"
+#define TSC_CLOCK "x86-tsc"
+
+#define dprint(fmt, ...) tracecmd_debug(fmt, ##__VA_ARGS__)
+
+enum trace_type {
+ TRACE_TYPE_RECORD = 1,
+ TRACE_TYPE_START = (1 << 1),
+ TRACE_TYPE_STREAM = (1 << 2),
+ TRACE_TYPE_EXTRACT = (1 << 3),
+ TRACE_TYPE_SET = (1 << 4),
+};
+
+static tracecmd_handle_init_func handle_init = NULL;
+
+static int rt_prio;
+
+static int keep;
+
+static int latency;
+static int sleep_time = 1000;
+static int recorder_threads;
+static struct pid_record_data *pids;
+static int buffers;
+
+/* Clear all function filters */
+static int clear_function_filters;
+
+static bool no_fifos;
+
+static char *host;
+
+static const char *gai_err;
+
+static bool quiet;
+
+static bool fork_process;
+
+/* Max size to let a per cpu file get */
+static int max_kb;
+
+static int do_ptrace;
+
+static int filter_task;
+static bool no_filter = false;
+
+static int local_cpu_count;
+
+static int finished;
+
+/* setting of /proc/sys/kernel/ftrace_enabled */
+static int fset;
+
+static unsigned recorder_flags;
+
+/* Try a few times to get an accurate date */
+static int date2ts_tries = 50;
+
+static struct func_list *graph_funcs;
+
+static int func_stack;
+
+static int save_stdout = -1;
+
+static struct hook_list *hooks;
+
+struct event_list {
+ struct event_list *next;
+ const char *event;
+ char *trigger;
+ char *filter;
+ char *pid_filter;
+ char *filter_file;
+ char *trigger_file;
+ char *enable_file;
+ int neg;
+};
+
+struct tracecmd_event_list *listed_events;
+
+struct events {
+ struct events *sibling;
+ struct events *children;
+ struct events *next;
+ char *name;
+};
+
+/* Files to be reset when done recording */
+struct reset_file {
+ struct reset_file *next;
+ char *path;
+ char *reset;
+ int prio;
+};
+
+static struct reset_file *reset_files;
+
+/* Triggers need to be cleared in a special way */
+static struct reset_file *reset_triggers;
+
+struct buffer_instance top_instance;
+struct buffer_instance *buffer_instances;
+struct buffer_instance *first_instance;
+
+static struct tracecmd_recorder *recorder;
+
+static int ignore_event_not_found = 0;
+
+static inline int is_top_instance(struct buffer_instance *instance)
+{
+ return instance == &top_instance;
+}
+
+static inline int no_top_instance(void)
+{
+ return first_instance != &top_instance;
+}
+
+static void init_instance(struct buffer_instance *instance)
+{
+ instance->event_next = &instance->events;
+}
+
+enum {
+ RESET_DEFAULT_PRIO = 0,
+ RESET_HIGH_PRIO = 100000,
+};
+
+enum trace_cmd {
+ CMD_extract,
+ CMD_start,
+ CMD_stream,
+ CMD_profile,
+ CMD_record,
+ CMD_record_agent,
+ CMD_set,
+};
+
+struct common_record_context {
+ enum trace_cmd curr_cmd;
+ struct buffer_instance *instance;
+ const char *output;
+ char *date2ts;
+ char *user;
+ const char *clock;
+ const char *compression;
+ struct tsc_nsec tsc2nsec;
+ int data_flags;
+ int tsync_loop_interval;
+
+ int record_all;
+ int total_disable;
+ int disable;
+ int events;
+ int global;
+ int filtered;
+ int date;
+ int manual;
+ int topt;
+ int run_command;
+ int saved_cmdlines_size;
+ int file_version;
+};
+
+static void add_reset_file(const char *file, const char *val, int prio)
+{
+ struct reset_file *reset;
+ struct reset_file **last = &reset_files;
+
+ /* Only reset if we are not keeping the state */
+ if (keep)
+ return;
+
+ reset = malloc(sizeof(*reset));
+ if (!reset)
+ die("Failed to allocate reset");
+ reset->path = strdup(file);
+ reset->reset = strdup(val);
+ reset->prio = prio;
+ if (!reset->path || !reset->reset)
+ die("Failed to allocate reset path or val");
+
+ while (*last && (*last)->prio > prio)
+ last = &(*last)->next;
+
+ reset->next = *last;
+ *last = reset;
+}
+
+static void add_reset_trigger(const char *file)
+{
+ struct reset_file *reset;
+
+ /* Only reset if we are not keeping the state */
+ if (keep)
+ return;
+
+ reset = malloc(sizeof(*reset));
+ if (!reset)
+ die("Failed to allocate reset");
+ reset->path = strdup(file);
+
+ reset->next = reset_triggers;
+ reset_triggers = reset;
+}
+
+/* To save the contents of the file */
+static void reset_save_file(const char *file, int prio)
+{
+ char *content;
+
+ content = get_file_content(file);
+ if (content) {
+ add_reset_file(file, content, prio);
+ free(content);
+ }
+}
+
+/*
+ * @file: the file to check
+ * @nop: If the content of the file is this, use the reset value
+ * @reset: What to write if the file == @nop
+ */
+static void reset_save_file_cond(const char *file, int prio,
+ const char *nop, const char *reset)
+{
+ char *content;
+ char *cond;
+
+ if (keep)
+ return;
+
+ content = get_file_content(file);
+
+ cond = strstrip(content);
+
+ if (strcmp(cond, nop) == 0)
+ add_reset_file(file, reset, prio);
+ else
+ add_reset_file(file, content, prio);
+
+ free(content);
+}
+
+/**
+ * add_instance - add a buffer instance to the internal list
+ * @instance: The buffer instance to add
+ */
+void add_instance(struct buffer_instance *instance, int cpu_count)
+{
+ init_instance(instance);
+ instance->next = buffer_instances;
+ if (first_instance == buffer_instances)
+ first_instance = instance;
+ buffer_instances = instance;
+ instance->cpu_count = cpu_count;
+ buffers++;
+}
+
+static void instance_reset_file_save(struct buffer_instance *instance, char *file, int prio)
+{
+ char *path;
+
+ path = tracefs_instance_get_file(instance->tracefs, file);
+ if (path)
+ reset_save_file(path, prio);
+ tracefs_put_tracing_file(path);
+}
+
+static void test_set_event_pid(struct buffer_instance *instance)
+{
+ static int have_set_event_pid;
+ static int have_event_fork;
+ static int have_func_fork;
+
+ if (!have_set_event_pid &&
+ tracefs_file_exists(top_instance.tracefs, "set_event_pid"))
+ have_set_event_pid = 1;
+ if (!have_event_fork &&
+ tracefs_file_exists(top_instance.tracefs, "options/event-fork"))
+ have_event_fork = 1;
+ if (!have_func_fork &&
+ tracefs_file_exists(top_instance.tracefs, "options/function-fork"))
+ have_func_fork = 1;
+
+ if (!instance->have_set_event_pid && have_set_event_pid) {
+ instance->have_set_event_pid = 1;
+ instance_reset_file_save(instance, "set_event_pid",
+ RESET_DEFAULT_PRIO);
+ }
+ if (!instance->have_event_fork && have_event_fork) {
+ instance->have_event_fork = 1;
+ instance_reset_file_save(instance, "options/event-fork",
+ RESET_DEFAULT_PRIO);
+ }
+ if (!instance->have_func_fork && have_func_fork) {
+ instance->have_func_fork = 1;
+ instance_reset_file_save(instance, "options/function-fork",
+ RESET_DEFAULT_PRIO);
+ }
+}
+
+/**
+ * allocate_instance - allocate a new buffer instance,
+ * it must exist in the ftrace system
+ * @name: The name of the instance (instance will point to this)
+ *
+ * Returns a newly allocated instance. In case of an error or if the
+ * instance does not exist in the ftrace system, NULL is returned.
+ */
+struct buffer_instance *allocate_instance(const char *name)
+{
+ struct buffer_instance *instance;
+
+ instance = calloc(1, sizeof(*instance));
+ if (!instance)
+ return NULL;
+ if (name)
+ instance->name = strdup(name);
+ if (tracefs_instance_exists(name)) {
+ instance->tracefs = tracefs_instance_create(name);
+ if (!instance->tracefs)
+ goto error;
+ }
+
+ return instance;
+
+error:
+ if (instance) {
+ free(instance->name);
+ tracefs_instance_free(instance->tracefs);
+ free(instance);
+ }
+ return NULL;
+}
+
+static int __add_all_instances(const char *tracing_dir)
+{
+ struct dirent *dent;
+ char *instances_dir;
+ struct stat st;
+ DIR *dir;
+ int ret;
+
+ if (!tracing_dir)
+ return -1;
+
+ instances_dir = append_file(tracing_dir, "instances");
+ if (!instances_dir)
+ return -1;
+
+ ret = stat(instances_dir, &st);
+ if (ret < 0 || !S_ISDIR(st.st_mode)) {
+ ret = -1;
+ goto out_free;
+ }
+
+ dir = opendir(instances_dir);
+ if (!dir) {
+ ret = -1;
+ goto out_free;
+ }
+
+ while ((dent = readdir(dir))) {
+ const char *name = strdup(dent->d_name);
+ char *instance_path;
+ struct buffer_instance *instance;
+
+ if (strcmp(name, ".") == 0 ||
+ strcmp(name, "..") == 0)
+ continue;
+
+ instance_path = append_file(instances_dir, name);
+ ret = stat(instance_path, &st);
+ if (ret < 0 || !S_ISDIR(st.st_mode)) {
+ free(instance_path);
+ continue;
+ }
+ free(instance_path);
+
+ instance = allocate_instance(name);
+ if (!instance)
+ die("Failed to create instance");
+ add_instance(instance, local_cpu_count);
+ }
+
+ closedir(dir);
+ ret = 0;
+
+ out_free:
+ free(instances_dir);
+ return ret;
+}
+
+/**
+ * add_all_instances - Add all pre-existing instances to the internal list
+ * @tracing_dir: The top-level tracing directory
+ *
+ * Returns whether the operation succeeded
+ */
+void add_all_instances(void)
+{
+ const char *tracing_dir = tracefs_tracing_dir();
+ if (!tracing_dir)
+ die("can't get the tracing directory");
+
+ __add_all_instances(tracing_dir);
+}
+
+/**
+ * tracecmd_stat_cpu - show the buffer stats of a particular CPU
+ * @s: the trace_seq to record the data in.
+ * @cpu: the CPU to stat
+ *
+ */
+void tracecmd_stat_cpu_instance(struct buffer_instance *instance,
+ struct trace_seq *s, int cpu)
+{
+ char buf[BUFSIZ];
+ char *path;
+ char *file;
+ int fd;
+ int r;
+
+ file = malloc(40);
+ if (!file)
+ return;
+ snprintf(file, 40, "per_cpu/cpu%d/stats", cpu);
+
+ path = tracefs_instance_get_file(instance->tracefs, file);
+ free(file);
+ fd = open(path, O_RDONLY);
+ tracefs_put_tracing_file(path);
+ if (fd < 0)
+ return;
+
+ while ((r = read(fd, buf, BUFSIZ)) > 0)
+ trace_seq_printf(s, "%.*s", r, buf);
+
+ close(fd);
+}
+
+/**
+ * tracecmd_stat_cpu - show the buffer stats of a particular CPU
+ * @s: the trace_seq to record the data in.
+ * @cpu: the CPU to stat
+ *
+ */
+void tracecmd_stat_cpu(struct trace_seq *s, int cpu)
+{
+ tracecmd_stat_cpu_instance(&top_instance, s, cpu);
+}
+
+static void add_event(struct buffer_instance *instance, struct event_list *event)
+{
+ *instance->event_next = event;
+ instance->event_next = &event->next;
+ event->next = NULL;
+}
+
+static void reset_event_list(struct buffer_instance *instance)
+{
+ instance->events = NULL;
+ init_instance(instance);
+}
+
+static char *get_temp_file(struct buffer_instance *instance, int cpu)
+{
+ const char *output_file = instance->output_file;
+ const char *name;
+ char *file = NULL;
+ int size;
+
+ name = tracefs_instance_get_name(instance->tracefs);
+ if (name) {
+ size = snprintf(file, 0, "%s.%s.cpu%d", output_file, name, cpu);
+ file = malloc(size + 1);
+ if (!file)
+ die("Failed to allocate temp file for %s", name);
+ sprintf(file, "%s.%s.cpu%d", output_file, name, cpu);
+ } else {
+ size = snprintf(file, 0, "%s.cpu%d", output_file, cpu);
+ file = malloc(size + 1);
+ if (!file)
+ die("Failed to allocate temp file for %s", name);
+ sprintf(file, "%s.cpu%d", output_file, cpu);
+ }
+
+ return file;
+}
+
+char *trace_get_guest_file(const char *file, const char *guest)
+{
+ const char *p;
+ char *out = NULL;
+ int ret, base_len;
+
+ p = strrchr(file, '.');
+ if (p && p != file)
+ base_len = p - file;
+ else
+ base_len = strlen(file);
+
+ ret = asprintf(&out, "%.*s-%s%s", base_len, file,
+ guest, file + base_len);
+ if (ret < 0)
+ return NULL;
+ return out;
+}
+
+static void put_temp_file(char *file)
+{
+ free(file);
+}
+
+static void delete_temp_file(struct buffer_instance *instance, int cpu)
+{
+ const char *output_file = instance->output_file;
+ const char *name;
+ char file[PATH_MAX];
+
+ name = tracefs_instance_get_name(instance->tracefs);
+ if (name)
+ snprintf(file, PATH_MAX, "%s.%s.cpu%d", output_file, name, cpu);
+ else
+ snprintf(file, PATH_MAX, "%s.cpu%d", output_file, cpu);
+ unlink(file);
+}
+
+static int kill_thread_instance(int start, struct buffer_instance *instance)
+{
+ int n = start;
+ int i;
+
+ for (i = 0; i < instance->cpu_count; i++) {
+ if (pids[n].pid > 0) {
+ kill(pids[n].pid, SIGKILL);
+ delete_temp_file(instance, i);
+ pids[n].pid = 0;
+ if (pids[n].brass[0] >= 0)
+ close(pids[n].brass[0]);
+ }
+ n++;
+ }
+
+ return n;
+}
+
+static void kill_threads(void)
+{
+ struct buffer_instance *instance;
+ int i = 0;
+
+ if (!recorder_threads || !pids)
+ return;
+
+ for_all_instances(instance)
+ i = kill_thread_instance(i, instance);
+}
+
+void die(const char *fmt, ...)
+{
+ va_list ap;
+ int ret = errno;
+
+ if (errno)
+ perror("trace-cmd");
+ else
+ ret = -1;
+
+ kill_threads();
+ va_start(ap, fmt);
+ fprintf(stderr, " ");
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+
+ fprintf(stderr, "\n");
+ exit(ret);
+}
+
+static int delete_thread_instance(int start, struct buffer_instance *instance)
+{
+ int n = start;
+ int i;
+
+ for (i = 0; i < instance->cpu_count; i++) {
+ if (pids) {
+ if (pids[n].pid) {
+ delete_temp_file(instance, i);
+ if (pids[n].pid < 0)
+ pids[n].pid = 0;
+ }
+ n++;
+ } else
+ /* Extract does not allocate pids */
+ delete_temp_file(instance, i);
+ }
+ return n;
+}
+
+static void delete_thread_data(void)
+{
+ struct buffer_instance *instance;
+ int i = 0;
+
+ for_all_instances(instance)
+ i = delete_thread_instance(i, instance);
+ /*
+ * Top instance temp files are still created even if it
+ * isn't used.
+ */
+ if (no_top_instance()) {
+ for (i = 0; i < local_cpu_count; i++)
+ delete_temp_file(&top_instance, i);
+ }
+}
+
+static void
+add_tsc2nsec(struct tracecmd_output *handle, struct tsc_nsec *tsc2nsec)
+{
+ /* multiplier, shift, offset */
+ struct iovec vector[3];
+
+ vector[0].iov_len = 4;
+ vector[0].iov_base = &tsc2nsec->mult;
+ vector[1].iov_len = 4;
+ vector[1].iov_base = &tsc2nsec->shift;
+ vector[2].iov_len = 8;
+ vector[2].iov_base = &tsc2nsec->offset;
+
+ tracecmd_add_option_v(handle, TRACECMD_OPTION_TSC2NSEC, vector, 3);
+}
+
+static void host_tsync_complete(struct common_record_context *ctx,
+ struct buffer_instance *instance)
+{
+ struct tracecmd_output *handle = NULL;
+ int fd = -1;
+ int ret;
+
+ ret = tracecmd_tsync_with_guest_stop(instance->tsync);
+ if (!ret) {
+ fd = open(instance->output_file, O_RDWR);
+ if (fd < 0)
+ die("error opening %s", instance->output_file);
+ handle = tracecmd_get_output_handle_fd(fd);
+ if (!handle)
+ die("cannot create output handle");
+
+ if (ctx->tsc2nsec.mult)
+ add_tsc2nsec(handle, &ctx->tsc2nsec);
+
+ tracecmd_write_guest_time_shift(handle, instance->tsync);
+ tracecmd_append_options(handle);
+ tracecmd_output_close(handle);
+ }
+
+ tracecmd_tsync_free(instance->tsync);
+ instance->tsync = NULL;
+}
+
+static void tell_guests_to_stop(struct common_record_context *ctx)
+{
+ struct buffer_instance *instance;
+
+ /* Send close message to guests */
+ for_all_instances(instance) {
+ if (is_guest(instance))
+ tracecmd_msg_send_close_msg(instance->msg_handle);
+ }
+
+ for_all_instances(instance) {
+ if (is_guest(instance))
+ host_tsync_complete(ctx, instance);
+ }
+
+ /* Wait for guests to acknowledge */
+ for_all_instances(instance) {
+ if (is_guest(instance)) {
+ tracecmd_msg_wait_close_resp(instance->msg_handle);
+ tracecmd_msg_handle_close(instance->msg_handle);
+ }
+ }
+}
+
+static void stop_threads(enum trace_type type)
+{
+ int ret;
+ int i;
+
+ if (!recorder_threads)
+ return;
+
+ /* Tell all threads to finish up */
+ for (i = 0; i < recorder_threads; i++) {
+ if (pids[i].pid > 0) {
+ kill(pids[i].pid, SIGUSR1);
+ }
+ }
+
+ /* Flush out the pipes */
+ if (type & TRACE_TYPE_STREAM) {
+ do {
+ ret = trace_stream_read(pids, recorder_threads, NULL);
+ } while (ret > 0);
+ }
+}
+
+static void wait_threads()
+{
+ int i;
+
+ for (i = 0; i < recorder_threads; i++) {
+ if (pids[i].pid > 0) {
+ waitpid(pids[i].pid, NULL, 0);
+ pids[i].pid = -1;
+ }
+ }
+}
+
+static int create_recorder(struct buffer_instance *instance, int cpu,
+ enum trace_type type, int *brass);
+
+static void flush_threads(void)
+{
+ struct buffer_instance *instance;
+ long ret;
+ int i;
+
+ for_all_instances(instance) {
+ for (i = 0; i < instance->cpu_count; i++) {
+ /* Extract doesn't support sub buffers yet */
+ ret = create_recorder(instance, i, TRACE_TYPE_EXTRACT, NULL);
+ if (ret < 0)
+ die("error reading ring buffer");
+ }
+ }
+}
+
+static int set_ftrace_enable(const char *path, int set)
+{
+ struct stat st;
+ int fd;
+ char *val = set ? "1" : "0";
+ int ret;
+
+ /* if ftace_enable does not exist, simply ignore it */
+ fd = stat(path, &st);
+ if (fd < 0)
+ return -ENODEV;
+
+ reset_save_file(path, RESET_DEFAULT_PRIO);
+
+ ret = -1;
+ fd = open(path, O_WRONLY);
+ if (fd < 0)
+ goto out;
+
+ /* Now set or clear the function option */
+ ret = write(fd, val, 1);
+ close(fd);
+
+ out:
+ return ret < 0 ? ret : 0;
+}
+
+static int set_ftrace_proc(int set)
+{
+ const char *path = "/proc/sys/kernel/ftrace_enabled";
+ int ret;
+
+ ret = set_ftrace_enable(path, set);
+ if (ret == -1)
+ die ("Can't %s ftrace", set ? "enable" : "disable");
+ return ret;
+}
+
+static int set_ftrace(struct buffer_instance *instance, int set, int use_proc)
+{
+ char *path;
+ int ret;
+
+ path = tracefs_instance_get_file(instance->tracefs, "options/function-trace");
+ if (!path)
+ return -1;
+ ret = set_ftrace_enable(path, set);
+ tracefs_put_tracing_file(path);
+
+ /* Always enable ftrace_enable proc file when set is true */
+ if (ret < 0 || set || use_proc)
+ ret = set_ftrace_proc(set);
+
+ return ret;
+}
+
+static int write_file(const char *file, const char *str)
+{
+ int ret;
+ int fd;
+
+ fd = open(file, O_WRONLY | O_TRUNC);
+ if (fd < 0)
+ die("opening to '%s'", file);
+ ret = write(fd, str, strlen(str));
+ close(fd);
+ return ret;
+}
+
+static void __clear_trace(struct buffer_instance *instance)
+{
+ FILE *fp;
+ char *path;
+
+ if (is_guest(instance))
+ return;
+
+ /* reset the trace */
+ path = tracefs_instance_get_file(instance->tracefs, "trace");
+ fp = fopen(path, "w");
+ if (!fp)
+ die("writing to '%s'", path);
+ tracefs_put_tracing_file(path);
+ fwrite("0", 1, 1, fp);
+ fclose(fp);
+}
+
+static void clear_trace_instances(void)
+{
+ struct buffer_instance *instance;
+
+ for_all_instances(instance)
+ __clear_trace(instance);
+}
+
+static void reset_max_latency(struct buffer_instance *instance)
+{
+ tracefs_instance_file_write(instance->tracefs,
+ "tracing_max_latency", "0");
+}
+
+static int add_filter_pid(struct buffer_instance *instance, int pid, int exclude)
+{
+ struct filter_pids *p;
+ char buf[100];
+
+ for (p = instance->filter_pids; p; p = p->next) {
+ if (p->pid == pid) {
+ p->exclude = exclude;
+ return 0;
+ }
+ }
+
+ p = malloc(sizeof(*p));
+ if (!p)
+ die("Failed to allocate pid filter");
+ p->next = instance->filter_pids;
+ p->exclude = exclude;
+ p->pid = pid;
+ instance->filter_pids = p;
+ instance->nr_filter_pids++;
+
+ instance->len_filter_pids += sprintf(buf, "%d", pid);
+
+ return 1;
+}
+
+static void add_filter_pid_all(int pid, int exclude)
+{
+ struct buffer_instance *instance;
+
+ for_all_instances(instance)
+ add_filter_pid(instance, pid, exclude);
+}
+
+static void reset_save_ftrace_pid(struct buffer_instance *instance)
+{
+ static char *path;
+
+ if (!tracefs_file_exists(instance->tracefs, "set_ftrace_pid"))
+ return;
+
+ path = tracefs_instance_get_file(instance->tracefs, "set_ftrace_pid");
+ if (!path)
+ return;
+
+ reset_save_file_cond(path, RESET_DEFAULT_PRIO, "no pid", "");
+
+ tracefs_put_tracing_file(path);
+}
+
+static void update_ftrace_pid(struct buffer_instance *instance,
+ const char *pid, int reset)
+{
+ int fd = -1;
+ char *path;
+ int ret;
+
+ if (!tracefs_file_exists(instance->tracefs, "set_ftrace_pid"))
+ return;
+
+ path = tracefs_instance_get_file(instance->tracefs, "set_ftrace_pid");
+ if (!path)
+ return;
+
+ fd = open(path, O_WRONLY | O_CLOEXEC | (reset ? O_TRUNC : 0));
+ tracefs_put_tracing_file(path);
+ if (fd < 0)
+ return;
+
+ ret = write(fd, pid, strlen(pid));
+
+ /*
+ * Older kernels required "-1" to disable pid
+ */
+ if (ret < 0 && !strlen(pid))
+ ret = write(fd, "-1", 2);
+
+ if (ret < 0)
+ die("error writing to %s", path);
+ /* add whitespace in case another pid is written */
+ write(fd, " ", 1);
+ close(fd);
+}
+
+static void update_ftrace_pids(int reset)
+{
+ struct buffer_instance *instance;
+ struct filter_pids *pid;
+ static int first = 1;
+ char buf[100];
+ int rst;
+
+ for_all_instances(instance) {
+ if (first)
+ reset_save_ftrace_pid(instance);
+ rst = reset;
+ for (pid = instance->filter_pids; pid; pid = pid->next) {
+ if (pid->exclude)
+ continue;
+ snprintf(buf, 100, "%d ", pid->pid);
+ update_ftrace_pid(instance, buf, rst);
+ /* Only reset the first entry */
+ rst = 0;
+ }
+ }
+
+ if (first)
+ first = 0;
+}
+
+static void update_event_filters(struct buffer_instance *instance);
+static void update_pid_event_filters(struct buffer_instance *instance);
+
+static void append_filter_pid_range(char **filter, int *curr_len,
+ const char *field,
+ int start_pid, int end_pid, bool exclude)
+{
+ const char *op = "", *op1, *op2, *op3;
+ int len;
+
+ if (*filter && **filter)
+ op = exclude ? "&&" : "||";
+
+ /* Handle thus case explicitly so that we get `pid==3` instead of
+ * `pid>=3&&pid<=3` for singleton ranges
+ */
+ if (start_pid == end_pid) {
+#define FMT "%s(%s%s%d)"
+ len = snprintf(NULL, 0, FMT, op,
+ field, exclude ? "!=" : "==", start_pid);
+ *filter = realloc(*filter, *curr_len + len + 1);
+ if (!*filter)
+ die("realloc");
+
+ len = snprintf(*filter + *curr_len, len + 1, FMT, op,
+ field, exclude ? "!=" : "==", start_pid);
+ *curr_len += len;
+
+ return;
+#undef FMT
+ }
+
+ if (exclude) {
+ op1 = "<";
+ op2 = "||";
+ op3 = ">";
+ } else {
+ op1 = ">=";
+ op2 = "&&";
+ op3 = "<=";
+ }
+
+#define FMT "%s(%s%s%d%s%s%s%d)"
+ len = snprintf(NULL, 0, FMT, op,
+ field, op1, start_pid, op2,
+ field, op3, end_pid);
+ *filter = realloc(*filter, *curr_len + len + 1);
+ if (!*filter)
+ die("realloc");
+
+ len = snprintf(*filter + *curr_len, len + 1, FMT, op,
+ field, op1, start_pid, op2,
+ field, op3, end_pid);
+ *curr_len += len;
+}
+
+/**
+ * make_pid_filter - create a filter string to all pids against @field
+ * @curr_filter: Append to a previous filter (may realloc). Can be NULL
+ * @field: The field to compare the pids against
+ *
+ * Creates a new string or appends to an existing one if @curr_filter
+ * is not NULL. The new string will contain a filter with all pids
+ * in pid_filter list with the format (@field == pid) || ..
+ * If @curr_filter is not NULL, it will add this string as:
+ * (@curr_filter) && ((@field == pid) || ...)
+ */
+static char *make_pid_filter(struct buffer_instance *instance,
+ char *curr_filter, const char *field)
+{
+ int start_pid = -1, last_pid = -1;
+ int last_exclude = -1;
+ struct filter_pids *p;
+ char *filter = NULL;
+ int curr_len = 0;
+
+ /* Use the new method if possible */
+ if (instance->have_set_event_pid)
+ return NULL;
+
+ if (!instance->filter_pids)
+ return curr_filter;
+
+ for (p = instance->filter_pids; p; p = p->next) {
+ /*
+ * PIDs are inserted in `filter_pids` from the front and that's
+ * why we expect them in descending order here.
+ */
+ if (p->pid == last_pid - 1 && p->exclude == last_exclude) {
+ last_pid = p->pid;
+ continue;
+ }
+
+ if (start_pid != -1)
+ append_filter_pid_range(&filter, &curr_len, field,
+ last_pid, start_pid,
+ last_exclude);
+
+ start_pid = last_pid = p->pid;
+ last_exclude = p->exclude;
+
+ }
+ append_filter_pid_range(&filter, &curr_len, field,
+ last_pid, start_pid, last_exclude);
+
+ if (curr_filter) {
+ char *save = filter;
+ asprintf(&filter, "(%s)&&(%s)", curr_filter, filter);
+ free(save);
+ }
+
+ return filter;
+}
+
+#define _STRINGIFY(x) #x
+#define STRINGIFY(x) _STRINGIFY(x)
+
+static int get_pid_addr_maps(struct buffer_instance *instance, int pid)
+{
+ struct pid_addr_maps *maps = instance->pid_maps;
+ struct tracecmd_proc_addr_map *map;
+ unsigned long long begin, end;
+ struct pid_addr_maps *m;
+ char mapname[PATH_MAX+1];
+ char fname[PATH_MAX+1];
+ char buf[PATH_MAX+100];
+ FILE *f;
+ int ret;
+ int res;
+ int i;
+
+ sprintf(fname, "/proc/%d/exe", pid);
+ ret = readlink(fname, mapname, PATH_MAX);
+ if (ret >= PATH_MAX || ret < 0)
+ return -ENOENT;
+ mapname[ret] = 0;
+
+ sprintf(fname, "/proc/%d/maps", pid);
+ f = fopen(fname, "r");
+ if (!f)
+ return -ENOENT;
+
+ while (maps) {
+ if (pid == maps->pid)
+ break;
+ maps = maps->next;
+ }
+
+ ret = -ENOMEM;
+ if (!maps) {
+ maps = calloc(1, sizeof(*maps));
+ if (!maps)
+ goto out_fail;
+ maps->pid = pid;
+ maps->next = instance->pid_maps;
+ instance->pid_maps = maps;
+ } else {
+ for (i = 0; i < maps->nr_lib_maps; i++)
+ free(maps->lib_maps[i].lib_name);
+ free(maps->lib_maps);
+ maps->lib_maps = NULL;
+ maps->nr_lib_maps = 0;
+ free(maps->proc_name);
+ }
+
+ maps->proc_name = strdup(mapname);
+ if (!maps->proc_name)
+ goto out;
+
+ while (fgets(buf, sizeof(buf), f)) {
+ mapname[0] = '\0';
+ res = sscanf(buf, "%llx-%llx %*s %*x %*s %*d %"STRINGIFY(PATH_MAX)"s",
+ &begin, &end, mapname);
+ if (res == 3 && mapname[0] != '\0') {
+ map = realloc(maps->lib_maps,
+ (maps->nr_lib_maps + 1) * sizeof(*map));
+ if (!map)
+ goto out_fail;
+ map[maps->nr_lib_maps].end = end;
+ map[maps->nr_lib_maps].start = begin;
+ map[maps->nr_lib_maps].lib_name = strdup(mapname);
+ if (!map[maps->nr_lib_maps].lib_name)
+ goto out_fail;
+ maps->lib_maps = map;
+ maps->nr_lib_maps++;
+ }
+ }
+out:
+ fclose(f);
+ return 0;
+
+out_fail:
+ fclose(f);
+ if (maps) {
+ for (i = 0; i < maps->nr_lib_maps; i++)
+ free(maps->lib_maps[i].lib_name);
+ if (instance->pid_maps != maps) {
+ m = instance->pid_maps;
+ while (m) {
+ if (m->next == maps) {
+ m->next = maps->next;
+ break;
+ }
+ m = m->next;
+ }
+ } else
+ instance->pid_maps = maps->next;
+ free(maps->lib_maps);
+ maps->lib_maps = NULL;
+ maps->nr_lib_maps = 0;
+ free(maps->proc_name);
+ maps->proc_name = NULL;
+ free(maps);
+ }
+ return ret;
+}
+
+static void get_filter_pid_maps(void)
+{
+ struct buffer_instance *instance;
+ struct filter_pids *p;
+
+ for_all_instances(instance) {
+ if (!instance->get_procmap)
+ continue;
+ for (p = instance->filter_pids; p; p = p->next) {
+ if (p->exclude)
+ continue;
+ get_pid_addr_maps(instance, p->pid);
+ }
+ }
+}
+
+static void update_task_filter(void)
+{
+ struct buffer_instance *instance;
+ int pid = getpid();
+
+ if (no_filter)
+ return;
+
+ get_filter_pid_maps();
+
+ if (filter_task)
+ add_filter_pid_all(pid, 0);
+
+ for_all_instances(instance) {
+ if (!instance->filter_pids)
+ continue;
+ if (instance->common_pid_filter)
+ free(instance->common_pid_filter);
+ instance->common_pid_filter = make_pid_filter(instance, NULL,
+ "common_pid");
+ }
+ update_ftrace_pids(1);
+ for_all_instances(instance)
+ update_pid_event_filters(instance);
+}
+
+static pid_t trace_waitpid(enum trace_type type, pid_t pid, int *status, int options)
+{
+ struct timeval tv = { 1, 0 };
+ int ret;
+
+ if (type & TRACE_TYPE_STREAM)
+ options |= WNOHANG;
+
+ do {
+ ret = waitpid(pid, status, options);
+ if (ret != 0)
+ return ret;
+
+ if (type & TRACE_TYPE_STREAM)
+ trace_stream_read(pids, recorder_threads, &tv);
+ } while (1);
+}
+
+#ifndef __NR_pidfd_open
+#define __NR_pidfd_open 434
+#endif
+
+static int pidfd_open(pid_t pid, unsigned int flags) {
+ return syscall(__NR_pidfd_open, pid, flags);
+}
+
+static int trace_waitpidfd(id_t pidfd) {
+ struct pollfd pollfd;
+
+ pollfd.fd = pidfd;
+ pollfd.events = POLLIN;
+
+ while (!finished) {
+ int ret = poll(&pollfd, 1, -1);
+ /* If waitid was interrupted, keep waiting */
+ if (ret < 0 && errno == EINTR)
+ continue;
+ else if (ret < 0)
+ return 1;
+ else
+ break;
+ }
+
+ return 0;
+}
+
+static int trace_wait_for_processes(struct buffer_instance *instance) {
+ int ret = 0;
+ int nr_fds = 0;
+ int i;
+ int *pidfds;
+ struct filter_pids *pid;
+
+ pidfds = malloc(sizeof(int) * instance->nr_process_pids);
+ if (!pidfds)
+ return 1;
+
+ for (pid = instance->process_pids;
+ pid && instance->nr_process_pids;
+ pid = pid->next) {
+ if (pid->exclude) {
+ instance->nr_process_pids--;
+ continue;
+ }
+ pidfds[nr_fds] = pidfd_open(pid->pid, 0);
+
+ /* If the pid doesn't exist, the process has probably exited */
+ if (pidfds[nr_fds] < 0 && errno == ESRCH) {
+ instance->nr_process_pids--;
+ continue;
+ } else if (pidfds[nr_fds] < 0) {
+ ret = 1;
+ goto out;
+ }
+
+ nr_fds++;
+ instance->nr_process_pids--;
+ }
+
+ for (i = 0; i < nr_fds; i++) {
+ if (trace_waitpidfd(pidfds[i])) {
+ ret = 1;
+ goto out;
+ }
+ }
+
+out:
+ for (i = 0; i < nr_fds; i++)
+ close(pidfds[i]);
+ free(pidfds);
+ return ret;
+}
+
+static void add_event_pid(struct buffer_instance *instance, const char *buf)
+{
+ tracefs_instance_file_write(instance->tracefs, "set_event_pid", buf);
+}
+
+#ifndef NO_PTRACE
+/**
+ * append_pid_filter - add a new pid to an existing filter
+ * @curr_filter: the filter to append to. If NULL, then allocate one
+ * @field: The fild to compare the pid to
+ * @pid: The pid to add to.
+ */
+static char *append_pid_filter(char *curr_filter, const char *field, int pid)
+{
+ char *filter;
+ int len;
+
+ len = snprintf(NULL, 0, "(%s==%d)||", field, pid);
+
+ if (!curr_filter) {
+ /* No need for +1 as we don't use the "||" */
+ filter = malloc(len);
+ if (!filter)
+ die("Failed to allocate pid filter");
+ sprintf(filter, "(%s==%d)", field, pid);
+ } else {
+ int indx = strlen(curr_filter);
+
+ len += indx;
+ filter = realloc(curr_filter, len + indx + 1);
+ if (!filter)
+ die("realloc");
+ sprintf(filter + indx, "||(%s==%d)", field, pid);
+ }
+
+ return filter;
+}
+
+static void append_sched_event(struct event_list *event, const char *field, int pid)
+{
+ if (!event || !event->pid_filter)
+ return;
+
+ event->pid_filter = append_pid_filter(event->pid_filter, field, pid);
+}
+
+static void update_sched_events(struct buffer_instance *instance, int pid)
+{
+ /*
+ * Also make sure that the sched_switch to this pid
+ * and wakeups of this pid are also traced.
+ * Only need to do this if the events are active.
+ */
+ append_sched_event(instance->sched_switch_event, "next_pid", pid);
+ append_sched_event(instance->sched_wakeup_event, "pid", pid);
+ append_sched_event(instance->sched_wakeup_new_event, "pid", pid);
+}
+
+static int open_instance_fd(struct buffer_instance *instance,
+ const char *file, int flags);
+
+static void add_new_filter_child_pid(int pid, int child)
+{
+ struct buffer_instance *instance;
+ struct filter_pids *fpid;
+ char buf[100];
+
+ for_all_instances(instance) {
+ if (!instance->ptrace_child || !instance->filter_pids)
+ continue;
+ for (fpid = instance->filter_pids; fpid; fpid = fpid->next) {
+ if (fpid->pid == pid)
+ break;
+ }
+ if (!fpid)
+ continue;
+
+ add_filter_pid(instance, child, 0);
+ sprintf(buf, "%d", child);
+ update_ftrace_pid(instance, buf, 0);
+
+ instance->common_pid_filter = append_pid_filter(instance->common_pid_filter,
+ "common_pid", pid);
+ if (instance->have_set_event_pid) {
+ add_event_pid(instance, buf);
+ } else {
+ update_sched_events(instance, pid);
+ update_event_filters(instance);
+ }
+ }
+
+}
+
+static void ptrace_attach(struct buffer_instance *instance, int pid)
+{
+ int ret;
+
+ ret = ptrace(PTRACE_ATTACH, pid, NULL, 0);
+ if (ret < 0) {
+ warning("Unable to trace process %d children", pid);
+ do_ptrace = 0;
+ return;
+ }
+ if (instance)
+ add_filter_pid(instance, pid, 0);
+ else
+ add_filter_pid_all(pid, 0);
+}
+
+static void enable_ptrace(void)
+{
+ if (!do_ptrace || !filter_task)
+ return;
+
+ ptrace(PTRACE_TRACEME, 0, NULL, 0);
+}
+
+static struct buffer_instance *get_intance_fpid(int pid)
+{
+ struct buffer_instance *instance;
+ struct filter_pids *fpid;
+
+ for_all_instances(instance) {
+ for (fpid = instance->filter_pids; fpid; fpid = fpid->next) {
+ if (fpid->exclude)
+ continue;
+ if (fpid->pid == pid)
+ break;
+ }
+ if (fpid)
+ return instance;
+ }
+
+ return NULL;
+}
+
+static void ptrace_wait(enum trace_type type)
+{
+ struct buffer_instance *instance;
+ struct filter_pids *fpid;
+ unsigned long send_sig;
+ unsigned long child;
+ int nr_pids = 0;
+ siginfo_t sig;
+ int main_pids;
+ int cstatus;
+ int status;
+ int i = 0;
+ int *pids;
+ int event;
+ int pid;
+ int ret;
+
+
+ for_all_instances(instance)
+ nr_pids += instance->nr_filter_pids;
+
+ pids = calloc(nr_pids, sizeof(int));
+ if (!pids) {
+ warning("Unable to allocate array for %d PIDs", nr_pids);
+ return;
+ }
+ for_all_instances(instance) {
+ if (!instance->ptrace_child && !instance->get_procmap)
+ continue;
+
+ for (fpid = instance->filter_pids; fpid && i < nr_pids; fpid = fpid->next) {
+ if (fpid->exclude)
+ continue;
+ pids[i++] = fpid->pid;
+ }
+ }
+ main_pids = i;
+
+ do {
+ ret = trace_waitpid(type, -1, &status, WSTOPPED | __WALL);
+ if (ret < 0)
+ continue;
+
+ pid = ret;
+
+ if (WIFSTOPPED(status)) {
+ event = (status >> 16) & 0xff;
+ ptrace(PTRACE_GETSIGINFO, pid, NULL, &sig);
+ send_sig = sig.si_signo;
+ /* Don't send ptrace sigs to child */
+ if (send_sig == SIGTRAP || send_sig == SIGSTOP)
+ send_sig = 0;
+ switch (event) {
+ case PTRACE_EVENT_FORK:
+ case PTRACE_EVENT_VFORK:
+ case PTRACE_EVENT_CLONE:
+ /* forked a child */
+ ptrace(PTRACE_GETEVENTMSG, pid, NULL, &child);
+ ptrace(PTRACE_SETOPTIONS, child, NULL,
+ PTRACE_O_TRACEFORK |
+ PTRACE_O_TRACEVFORK |
+ PTRACE_O_TRACECLONE |
+ PTRACE_O_TRACEEXIT);
+ add_new_filter_child_pid(pid, child);
+ ptrace(PTRACE_CONT, child, NULL, 0);
+ break;
+
+ case PTRACE_EVENT_EXIT:
+ instance = get_intance_fpid(pid);
+ if (instance && instance->get_procmap)
+ get_pid_addr_maps(instance, pid);
+ ptrace(PTRACE_GETEVENTMSG, pid, NULL, &cstatus);
+ ptrace(PTRACE_DETACH, pid, NULL, NULL);
+ break;
+ }
+ ptrace(PTRACE_SETOPTIONS, pid, NULL,
+ PTRACE_O_TRACEFORK |
+ PTRACE_O_TRACEVFORK |
+ PTRACE_O_TRACECLONE |
+ PTRACE_O_TRACEEXIT);
+ ptrace(PTRACE_CONT, pid, NULL, send_sig);
+ }
+ if (WIFEXITED(status) ||
+ (WIFSTOPPED(status) && event == PTRACE_EVENT_EXIT)) {
+ for (i = 0; i < nr_pids; i++) {
+ if (pid == pids[i]) {
+ pids[i] = 0;
+ main_pids--;
+ if (!main_pids)
+ finished = 1;
+ }
+ }
+ }
+ } while (!finished && ret > 0);
+
+ free(pids);
+}
+#else
+static inline void ptrace_wait(enum trace_type type) { }
+static inline void enable_ptrace(void) { }
+static inline void ptrace_attach(struct buffer_instance *instance, int pid) { }
+
+#endif /* NO_PTRACE */
+
+static void trace_or_sleep(enum trace_type type, bool pwait)
+{
+ struct timeval tv = { 1 , 0 };
+
+ if (pwait)
+ ptrace_wait(type);
+ else if (type & TRACE_TYPE_STREAM)
+ trace_stream_read(pids, recorder_threads, &tv);
+ else
+ sleep(10);
+}
+
+static int change_user(const char *user)
+{
+ struct passwd *pwd;
+
+ if (!user)
+ return 0;
+
+ pwd = getpwnam(user);
+ if (!pwd)
+ return -1;
+ if (initgroups(user, pwd->pw_gid) < 0)
+ return -1;
+ if (setgid(pwd->pw_gid) < 0)
+ return -1;
+ if (setuid(pwd->pw_uid) < 0)
+ return -1;
+
+ if (setenv("HOME", pwd->pw_dir, 1) < 0)
+ return -1;
+ if (setenv("USER", pwd->pw_name, 1) < 0)
+ return -1;
+ if (setenv("LOGNAME", pwd->pw_name, 1) < 0)
+ return -1;
+
+ return 0;
+}
+
+static void run_cmd(enum trace_type type, const char *user, int argc, char **argv)
+{
+ int status;
+ int pid;
+
+ if ((pid = fork()) < 0)
+ die("failed to fork");
+ if (!pid) {
+ /* child */
+ update_task_filter();
+ tracecmd_enable_tracing();
+ if (!fork_process)
+ enable_ptrace();
+ /*
+ * If we are using stderr for stdout, switch
+ * it back to the saved stdout for the code we run.
+ */
+ if (save_stdout >= 0) {
+ close(1);
+ dup2(save_stdout, 1);
+ close(save_stdout);
+ }
+
+ if (change_user(user) < 0)
+ die("Failed to change user to %s", user);
+
+ if (execvp(argv[0], argv)) {
+ fprintf(stderr, "\n********************\n");
+ fprintf(stderr, " Unable to exec %s\n", argv[0]);
+ fprintf(stderr, "********************\n");
+ die("Failed to exec %s", argv[0]);
+ }
+ }
+ if (fork_process)
+ exit(0);
+ if (do_ptrace) {
+ ptrace_attach(NULL, pid);
+ ptrace_wait(type);
+ } else
+ trace_waitpid(type, pid, &status, 0);
+ if (type & (TRACE_TYPE_START | TRACE_TYPE_SET))
+ exit(0);
+}
+
+static void
+set_plugin_instance(struct buffer_instance *instance, const char *name)
+{
+ char *path;
+ char zero = '0';
+ int ret;
+ int fd;
+
+ if (is_guest(instance))
+ return;
+
+ path = tracefs_instance_get_file(instance->tracefs, "current_tracer");
+ fd = open(path, O_WRONLY);
+ if (fd < 0) {
+ /*
+ * Legacy kernels do not have current_tracer file, and they
+ * always use nop. So, it doesn't need to try to change the
+ * plugin for those if name is "nop".
+ */
+ if (!strncmp(name, "nop", 3)) {
+ tracefs_put_tracing_file(path);
+ return;
+ }
+ die("Opening '%s'", path);
+ }
+ ret = write(fd, name, strlen(name));
+ close(fd);
+
+ if (ret < 0)
+ die("writing to '%s'", path);
+
+ tracefs_put_tracing_file(path);
+
+ if (strncmp(name, "function", 8) != 0)
+ return;
+
+ /* Make sure func_stack_trace option is disabled */
+ /* First try instance file, then top level */
+ path = tracefs_instance_get_file(instance->tracefs, "options/func_stack_trace");
+ fd = open(path, O_WRONLY);
+ if (fd < 0) {
+ tracefs_put_tracing_file(path);
+ path = tracefs_get_tracing_file("options/func_stack_trace");
+ fd = open(path, O_WRONLY);
+ if (fd < 0) {
+ tracefs_put_tracing_file(path);
+ return;
+ }
+ }
+ /*
+ * Always reset func_stack_trace to zero. Don't bother saving
+ * the original content.
+ */
+ add_reset_file(path, "0", RESET_HIGH_PRIO);
+ tracefs_put_tracing_file(path);
+ write(fd, &zero, 1);
+ close(fd);
+}
+
+static void set_plugin(const char *name)
+{
+ struct buffer_instance *instance;
+
+ for_all_instances(instance)
+ set_plugin_instance(instance, name);
+}
+
+static void save_option(struct buffer_instance *instance, const char *option)
+{
+ struct opt_list *opt;
+
+ opt = malloc(sizeof(*opt));
+ if (!opt)
+ die("Failed to allocate option");
+ opt->next = instance->options;
+ instance->options = opt;
+ opt->option = option;
+}
+
+static int set_option(struct buffer_instance *instance, const char *option)
+{
+ FILE *fp;
+ char *path;
+
+ path = tracefs_instance_get_file(instance->tracefs, "trace_options");
+ fp = fopen(path, "w");
+ if (!fp)
+ warning("writing to '%s'", path);
+ tracefs_put_tracing_file(path);
+
+ if (!fp)
+ return -1;
+
+ fwrite(option, 1, strlen(option), fp);
+ fclose(fp);
+
+ return 0;
+}
+
+static void disable_func_stack_trace_instance(struct buffer_instance *instance)
+{
+ struct stat st;
+ char *content;
+ char *path;
+ char *cond;
+ int size;
+ int ret;
+
+ if (is_guest(instance))
+ return;
+
+ path = tracefs_instance_get_file(instance->tracefs, "current_tracer");
+ ret = stat(path, &st);
+ tracefs_put_tracing_file(path);
+ if (ret < 0)
+ return;
+
+ content = tracefs_instance_file_read(instance->tracefs,
+ "current_tracer", &size);
+ cond = strstrip(content);
+ if (memcmp(cond, "function", size - (cond - content)) !=0)
+ goto out;
+
+ set_option(instance, "nofunc_stack_trace");
+ out:
+ free(content);
+}
+
+static void disable_func_stack_trace(void)
+{
+ struct buffer_instance *instance;
+
+ for_all_instances(instance)
+ disable_func_stack_trace_instance(instance);
+}
+
+static void add_reset_options(struct buffer_instance *instance)
+{
+ struct opt_list *opt;
+ const char *option;
+ char *content;
+ char *path;
+ char *ptr;
+ int len;
+
+ if (keep)
+ return;
+
+ path = tracefs_instance_get_file(instance->tracefs, "trace_options");
+ content = get_file_content(path);
+
+ for (opt = instance->options; opt; opt = opt->next) {
+ option = opt->option;
+ len = strlen(option);
+ ptr = content;
+ again:
+ ptr = strstr(ptr, option);
+ if (ptr) {
+ /* First make sure its the option we want */
+ if (ptr[len] != '\n') {
+ ptr += len;
+ goto again;
+ }
+ if (ptr - content >= 2 && strncmp(ptr - 2, "no", 2) == 0) {
+ /* Make sure this isn't ohno-option */
+ if (ptr > content + 2 && *(ptr - 3) != '\n') {
+ ptr += len;
+ goto again;
+ }
+ /* we enabled it */
+ ptr[len] = 0;
+ add_reset_file(path, ptr-2, RESET_DEFAULT_PRIO);
+ ptr[len] = '\n';
+ continue;
+ }
+ /* make sure this is our option */
+ if (ptr > content && *(ptr - 1) != '\n') {
+ ptr += len;
+ goto again;
+ }
+ /* this option hasn't changed, ignore it */
+ continue;
+ }
+
+ /* ptr is NULL, not found, maybe option is a no */
+ if (strncmp(option, "no", 2) != 0)
+ /* option is really not found? */
+ continue;
+
+ option += 2;
+ len = strlen(option);
+ ptr = content;
+ loop:
+ ptr = strstr(content, option);
+ if (!ptr)
+ /* Really not found? */
+ continue;
+
+ /* make sure this is our option */
+ if (ptr[len] != '\n') {
+ ptr += len;
+ goto loop;
+ }
+
+ if (ptr > content && *(ptr - 1) != '\n') {
+ ptr += len;
+ goto loop;
+ }
+
+ add_reset_file(path, option, RESET_DEFAULT_PRIO);
+ }
+ tracefs_put_tracing_file(path);
+ free(content);
+}
+
+static void set_options(void)
+{
+ struct buffer_instance *instance;
+ struct opt_list *opt;
+ int ret;
+
+ for_all_instances(instance) {
+ add_reset_options(instance);
+ while (instance->options) {
+ opt = instance->options;
+ instance->options = opt->next;
+ ret = set_option(instance, opt->option);
+ if (ret < 0)
+ die("Failed to set ftrace option %s",
+ opt->option);
+ free(opt);
+ }
+ }
+}
+
+static void set_saved_cmdlines_size(struct common_record_context *ctx)
+{
+ int fd, len, ret = -1;
+ char *path, *str;
+
+ if (!ctx->saved_cmdlines_size)
+ return;
+
+ path = tracefs_get_tracing_file("saved_cmdlines_size");
+ if (!path)
+ goto err;
+
+ reset_save_file(path, RESET_DEFAULT_PRIO);
+
+ fd = open(path, O_WRONLY);
+ tracefs_put_tracing_file(path);
+ if (fd < 0)
+ goto err;
+
+ len = asprintf(&str, "%d", ctx->saved_cmdlines_size);
+ if (len < 0)
+ die("%s couldn't allocate memory", __func__);
+
+ if (write(fd, str, len) > 0)
+ ret = 0;
+
+ close(fd);
+ free(str);
+err:
+ if (ret)
+ warning("Couldn't set saved_cmdlines_size");
+}
+
+static int trace_check_file_exists(struct buffer_instance *instance, char *file)
+{
+ struct stat st;
+ char *path;
+ int ret;
+
+ path = tracefs_instance_get_file(instance->tracefs, file);
+ ret = stat(path, &st);
+ tracefs_put_tracing_file(path);
+
+ return ret < 0 ? 0 : 1;
+}
+
+static int use_old_event_method(void)
+{
+ static int old_event_method;
+ static int processed;
+
+ if (processed)
+ return old_event_method;
+
+ /* Check if the kernel has the events/enable file */
+ if (!trace_check_file_exists(&top_instance, "events/enable"))
+ old_event_method = 1;
+
+ processed = 1;
+
+ return old_event_method;
+}
+
+static void old_update_events(const char *name, char update)
+{
+ char *path;
+ FILE *fp;
+ int ret;
+
+ if (strcmp(name, "all") == 0)
+ name = "*:*";
+
+ /* need to use old way */
+ path = tracefs_get_tracing_file("set_event");
+ fp = fopen(path, "w");
+ if (!fp)
+ die("opening '%s'", path);
+ tracefs_put_tracing_file(path);
+
+ /* Disable the event with "!" */
+ if (update == '0')
+ fwrite("!", 1, 1, fp);
+
+ ret = fwrite(name, 1, strlen(name), fp);
+ if (ret < 0)
+ die("bad event '%s'", name);
+
+ ret = fwrite("\n", 1, 1, fp);
+ if (ret < 0)
+ die("bad event '%s'", name);
+
+ fclose(fp);
+
+ return;
+}
+
+static void
+reset_events_instance(struct buffer_instance *instance)
+{
+ glob_t globbuf;
+ char *path;
+ char c;
+ int fd;
+ int i;
+ int ret;
+
+ if (is_guest(instance))
+ return;
+
+ if (use_old_event_method()) {
+ /* old way only had top instance */
+ if (!is_top_instance(instance))
+ return;
+ old_update_events("all", '0');
+ return;
+ }
+
+ c = '0';
+ path = tracefs_instance_get_file(instance->tracefs, "events/enable");
+ fd = open(path, O_WRONLY);
+ if (fd < 0)
+ die("opening to '%s'", path);
+ ret = write(fd, &c, 1);
+ close(fd);
+ tracefs_put_tracing_file(path);
+
+ path = tracefs_instance_get_file(instance->tracefs, "events/*/filter");
+ globbuf.gl_offs = 0;
+ ret = glob(path, 0, NULL, &globbuf);
+ tracefs_put_tracing_file(path);
+ if (ret < 0)
+ return;
+
+ for (i = 0; i < globbuf.gl_pathc; i++) {
+ path = globbuf.gl_pathv[i];
+ fd = open(path, O_WRONLY);
+ if (fd < 0)
+ die("opening to '%s'", path);
+ ret = write(fd, &c, 1);
+ close(fd);
+ }
+ globfree(&globbuf);
+}
+
+static void reset_events(void)
+{
+ struct buffer_instance *instance;
+
+ for_all_instances(instance)
+ reset_events_instance(instance);
+}
+
+enum {
+ STATE_NEWLINE,
+ STATE_SKIP,
+ STATE_COPY,
+};
+
+static char *read_file(const char *file)
+{
+ char stbuf[BUFSIZ];
+ char *buf = NULL;
+ int size = 0;
+ char *nbuf;
+ int fd;
+ int r;
+
+ fd = open(file, O_RDONLY);
+ if (fd < 0)
+ return NULL;
+
+ do {
+ r = read(fd, stbuf, BUFSIZ);
+ if (r <= 0)
+ continue;
+ nbuf = realloc(buf, size+r+1);
+ if (!nbuf) {
+ free(buf);
+ buf = NULL;
+ break;
+ }
+ buf = nbuf;
+ memcpy(buf+size, stbuf, r);
+ size += r;
+ } while (r > 0);
+
+ close(fd);
+ if (r == 0 && size > 0)
+ buf[size] = '\0';
+
+ return buf;
+}
+
+static void read_error_log(const char *log)
+{
+ char *buf, *line;
+ char *start = NULL;
+ char *p;
+
+ buf = read_file(log);
+ if (!buf)
+ return;
+
+ line = buf;
+
+ /* Only the last lines have meaning */
+ while ((p = strstr(line, "\n")) && p[1]) {
+ if (line[0] != ' ')
+ start = line;
+ line = p + 1;
+ }
+
+ if (start)
+ printf("%s", start);
+
+ free(buf);
+}
+
+static void show_error(const char *file, const char *type)
+{
+ struct stat st;
+ char *path = strdup(file);
+ char *p;
+ int ret;
+
+ if (!path)
+ die("Could not allocate memory");
+
+ p = strstr(path, "tracing");
+ if (p) {
+ if (strncmp(p + sizeof("tracing"), "instances", sizeof("instances") - 1) == 0) {
+ p = strstr(p + sizeof("tracing") + sizeof("instances"), "/");
+ if (!p)
+ goto read_file;
+ } else {
+ p += sizeof("tracing") - 1;
+ }
+ ret = asprintf(&p, "%.*s/error_log", (int)(p - path), path);
+ if (ret < 0)
+ die("Could not allocate memory");
+ ret = stat(p, &st);
+ if (ret < 0) {
+ free(p);
+ goto read_file;
+ }
+ read_error_log(p);
+ goto out;
+ }
+
+ read_file:
+ p = read_file(path);
+ if (p)
+ printf("%s", p);
+
+ out:
+ printf("Failed %s of %s\n", type, file);
+ free(path);
+ return;
+}
+
+static void write_filter(const char *file, const char *filter)
+{
+ if (write_file(file, filter) < 0)
+ show_error(file, "filter");
+}
+
+static void clear_filter(const char *file)
+{
+ write_filter(file, "0");
+}
+
+static void write_trigger(const char *file, const char *trigger)
+{
+ if (write_file(file, trigger) < 0)
+ show_error(file, "trigger");
+}
+
+static int clear_trigger(const char *file)
+{
+ char trigger[BUFSIZ];
+ char *save = NULL;
+ char *line;
+ char *buf;
+ int len;
+ int ret;
+
+ buf = read_file(file);
+ if (!buf) {
+ perror(file);
+ return 0;
+ }
+
+ trigger[0] = '!';
+
+ for (line = strtok_r(buf, "\n", &save); line; line = strtok_r(NULL, "\n", &save)) {
+ if (line[0] == '#')
+ continue;
+ len = strlen(line);
+ if (len > BUFSIZ - 2)
+ len = BUFSIZ - 2;
+ strncpy(trigger + 1, line, len);
+ trigger[len + 1] = '\0';
+ /* We don't want any filters or extra on the line */
+ strtok(trigger, " ");
+ write_file(file, trigger);
+ }
+
+ free(buf);
+
+ /*
+ * Some triggers have an order in removing them.
+ * They will not be removed if done in the wrong order.
+ */
+ buf = read_file(file);
+ if (!buf)
+ return 0;
+
+ ret = 0;
+ for (line = strtok(buf, "\n"); line; line = strtok(NULL, "\n")) {
+ if (line[0] == '#')
+ continue;
+ ret = 1;
+ break;
+ }
+ free(buf);
+ return ret;
+}
+
+static void clear_func_filter(const char *file)
+{
+ char filter[BUFSIZ];
+ struct stat st;
+ char *line;
+ char *buf;
+ char *p;
+ int len;
+ int ret;
+ int fd;
+
+ /* Function filters may not exist */
+ ret = stat(file, &st);
+ if (ret < 0)
+ return;
+
+ /* First zero out normal filters */
+ fd = open(file, O_WRONLY | O_TRUNC);
+ if (fd < 0)
+ die("opening to '%s'", file);
+ close(fd);
+
+ buf = read_file(file);
+ if (!buf) {
+ perror(file);
+ return;
+ }
+
+ /* Now remove filters */
+ filter[0] = '!';
+
+ /*
+ * To delete a filter, we need to write a '!filter'
+ * to the file for each filter.
+ */
+ for (line = strtok(buf, "\n"); line; line = strtok(NULL, "\n")) {
+ if (line[0] == '#')
+ continue;
+ len = strlen(line);
+ if (len > BUFSIZ - 2)
+ len = BUFSIZ - 2;
+
+ strncpy(filter + 1, line, len);
+ filter[len + 1] = '\0';
+ /*
+ * To remove "unlimited" filters, we must remove
+ * the ":unlimited" from what we write.
+ */
+ if ((p = strstr(filter, ":unlimited"))) {
+ *p = '\0';
+ len = p - filter;
+ }
+ /*
+ * The write to this file expects white space
+ * at the end :-p
+ */
+ filter[len] = '\n';
+ filter[len+1] = '\0';
+ write_file(file, filter);
+ }
+}
+
+static void update_reset_triggers(void)
+{
+ struct reset_file *reset;
+
+ while (reset_triggers) {
+ reset = reset_triggers;
+ reset_triggers = reset->next;
+
+ clear_trigger(reset->path);
+ free(reset->path);
+ free(reset);
+ }
+}
+
+static void update_reset_files(void)
+{
+ struct reset_file *reset;
+
+ while (reset_files) {
+ reset = reset_files;
+ reset_files = reset->next;
+
+ if (!keep)
+ write_file(reset->path, reset->reset);
+ free(reset->path);
+ free(reset->reset);
+ free(reset);
+ }
+}
+
+static void
+update_event(struct event_list *event, const char *filter,
+ int filter_only, char update)
+{
+ const char *name = event->event;
+ FILE *fp;
+ char *path;
+ int ret;
+
+ if (use_old_event_method()) {
+ if (filter_only)
+ return;
+ old_update_events(name, update);
+ return;
+ }
+
+ if (filter && event->filter_file) {
+ add_reset_file(event->filter_file, "0", RESET_DEFAULT_PRIO);
+ write_filter(event->filter_file, filter);
+ }
+
+ if (event->trigger_file) {
+ add_reset_trigger(event->trigger_file);
+ clear_trigger(event->trigger_file);
+ write_trigger(event->trigger_file, event->trigger);
+ /* Make sure we don't write this again */
+ free(event->trigger_file);
+ free(event->trigger);
+ event->trigger_file = NULL;
+ event->trigger = NULL;
+ }
+
+ if (filter_only || !event->enable_file)
+ return;
+
+ path = event->enable_file;
+
+ fp = fopen(path, "w");
+ if (!fp)
+ die("writing to '%s'", path);
+ ret = fwrite(&update, 1, 1, fp);
+ fclose(fp);
+ if (ret < 0)
+ die("writing to '%s'", path);
+}
+
+/*
+ * The debugfs file tracing_enabled needs to be deprecated.
+ * But just in case anyone fiddled with it. If it exists,
+ * make sure it is one.
+ * No error checking needed here.
+ */
+static void check_tracing_enabled(void)
+{
+ static int fd = -1;
+ char *path;
+
+ if (fd < 0) {
+ path = tracefs_get_tracing_file("tracing_enabled");
+ fd = open(path, O_WRONLY | O_CLOEXEC);
+ tracefs_put_tracing_file(path);
+
+ if (fd < 0)
+ return;
+ }
+ write(fd, "1", 1);
+}
+
+static int open_instance_fd(struct buffer_instance *instance,
+ const char *file, int flags)
+{
+ int fd;
+ char *path;
+
+ path = tracefs_instance_get_file(instance->tracefs, file);
+ fd = open(path, flags);
+ if (fd < 0) {
+ /* instances may not be created yet */
+ if (is_top_instance(instance))
+ die("opening '%s'", path);
+ }
+ tracefs_put_tracing_file(path);
+
+ return fd;
+}
+
+static int open_tracing_on(struct buffer_instance *instance)
+{
+ int fd = instance->tracing_on_fd;
+
+ /* OK, we keep zero for stdin */
+ if (fd > 0)
+ return fd;
+
+ fd = open_instance_fd(instance, "tracing_on", O_RDWR | O_CLOEXEC);
+ if (fd < 0) {
+ return fd;
+ }
+ instance->tracing_on_fd = fd;
+
+ return fd;
+}
+
+static void write_tracing_on(struct buffer_instance *instance, int on)
+{
+ int ret;
+ int fd;
+
+ if (is_guest(instance))
+ return;
+
+ fd = open_tracing_on(instance);
+ if (fd < 0)
+ return;
+
+ if (on)
+ ret = write(fd, "1", 1);
+ else
+ ret = write(fd, "0", 1);
+
+ if (ret < 0)
+ die("writing 'tracing_on'");
+}
+
+static int read_tracing_on(struct buffer_instance *instance)
+{
+ int fd;
+ char buf[10];
+ int ret;
+
+ if (is_guest(instance))
+ return -1;
+
+ fd = open_tracing_on(instance);
+ if (fd < 0)
+ return fd;
+
+ ret = read(fd, buf, 10);
+ if (ret <= 0)
+ die("Reading 'tracing_on'");
+ buf[9] = 0;
+ ret = atoi(buf);
+
+ return ret;
+}
+
+static void reset_max_latency_instance(void)
+{
+ struct buffer_instance *instance;
+
+ for_all_instances(instance)
+ reset_max_latency(instance);
+}
+
+void tracecmd_enable_tracing(void)
+{
+ struct buffer_instance *instance;
+
+ check_tracing_enabled();
+
+ for_all_instances(instance)
+ write_tracing_on(instance, 1);
+
+ if (latency)
+ reset_max_latency_instance();
+}
+
+void tracecmd_disable_tracing(void)
+{
+ struct buffer_instance *instance;
+
+ for_all_instances(instance)
+ write_tracing_on(instance, 0);
+}
+
+void tracecmd_disable_all_tracing(int disable_tracer)
+{
+ struct buffer_instance *instance;
+
+ tracecmd_disable_tracing();
+
+ if (disable_tracer) {
+ disable_func_stack_trace();
+ set_plugin("nop");
+ }
+
+ reset_events();
+
+ /* Force close and reset of ftrace pid file */
+ for_all_instances(instance)
+ update_ftrace_pid(instance, "", 1);
+
+ clear_trace_instances();
+}
+
+static void
+update_sched_event(struct buffer_instance *instance,
+ struct event_list *event, const char *field)
+{
+ if (!event)
+ return;
+
+ event->pid_filter = make_pid_filter(instance, event->pid_filter, field);
+}
+
+static void update_event_filters(struct buffer_instance *instance)
+{
+ struct event_list *event;
+ char *event_filter;
+ int free_it;
+ int len;
+ int common_len = 0;
+
+ if (instance->common_pid_filter)
+ common_len = strlen(instance->common_pid_filter);
+
+ for (event = instance->events; event; event = event->next) {
+ if (!event->neg) {
+
+ free_it = 0;
+ if (event->filter) {
+ if (!instance->common_pid_filter)
+ /*
+ * event->pid_filter is only created if
+ * common_pid_filter is. No need to check that.
+ * Just use the current event->filter.
+ */
+ event_filter = event->filter;
+ else if (event->pid_filter) {
+ free_it = 1;
+ len = common_len + strlen(event->pid_filter) +
+ strlen(event->filter) + strlen("()&&(||)") + 1;
+ event_filter = malloc(len);
+ if (!event_filter)
+ die("Failed to allocate event_filter");
+ sprintf(event_filter, "(%s)&&(%s||%s)",
+ event->filter, instance->common_pid_filter,
+ event->pid_filter);
+ } else {
+ free_it = 1;
+ len = common_len + strlen(event->filter) +
+ strlen("()&&()") + 1;
+ event_filter = malloc(len);
+ if (!event_filter)
+ die("Failed to allocate event_filter");
+ sprintf(event_filter, "(%s)&&(%s)",
+ event->filter, instance->common_pid_filter);
+ }
+ } else {
+ /* event->pid_filter only exists when common_pid_filter does */
+ if (!instance->common_pid_filter)
+ continue;
+
+ if (event->pid_filter) {
+ free_it = 1;
+ len = common_len + strlen(event->pid_filter) +
+ strlen("||") + 1;
+ event_filter = malloc(len);
+ if (!event_filter)
+ die("Failed to allocate event_filter");
+ sprintf(event_filter, "%s||%s",
+ instance->common_pid_filter, event->pid_filter);
+ } else
+ event_filter = instance->common_pid_filter;
+ }
+
+ update_event(event, event_filter, 1, '1');
+ if (free_it)
+ free(event_filter);
+ }
+ }
+}
+
+static void update_pid_filters(struct buffer_instance *instance)
+{
+ struct filter_pids *p;
+ char *filter;
+ char *str;
+ int len;
+ int ret;
+ int fd;
+
+ if (is_guest(instance))
+ return;
+
+ fd = open_instance_fd(instance, "set_event_pid",
+ O_WRONLY | O_CLOEXEC | O_TRUNC);
+ if (fd < 0)
+ die("Failed to access set_event_pid");
+
+ len = instance->len_filter_pids + instance->nr_filter_pids;
+ filter = malloc(len);
+ if (!filter)
+ die("Failed to allocate pid filter");
+
+ str = filter;
+
+ for (p = instance->filter_pids; p; p = p->next) {
+ if (p->exclude)
+ continue;
+ len = sprintf(str, "%d ", p->pid);
+ str += len;
+ }
+
+ if (filter == str)
+ goto out;
+
+ len = str - filter;
+ str = filter;
+ do {
+ ret = write(fd, str, len);
+ if (ret < 0)
+ die("Failed to write to set_event_pid");
+ str += ret;
+ len -= ret;
+ } while (ret >= 0 && len);
+
+ out:
+ close(fd);
+}
+
+static void update_pid_event_filters(struct buffer_instance *instance)
+{
+ if (instance->have_set_event_pid)
+ return update_pid_filters(instance);
+ /*
+ * Also make sure that the sched_switch to this pid
+ * and wakeups of this pid are also traced.
+ * Only need to do this if the events are active.
+ */
+ update_sched_event(instance, instance->sched_switch_event, "next_pid");
+ update_sched_event(instance, instance->sched_wakeup_event, "pid");
+ update_sched_event(instance, instance->sched_wakeup_new_event, "pid");
+
+ update_event_filters(instance);
+}
+
+#define MASK_STR_MAX 4096 /* Don't expect more than 32768 CPUS */
+
+static char *alloc_mask_from_hex(struct buffer_instance *instance, const char *str)
+{
+ char *cpumask;
+
+ if (strcmp(str, "-1") == 0) {
+ /* set all CPUs */
+ int bytes = (instance->cpu_count + 7) / 8;
+ int last = instance->cpu_count % 8;
+ int i;
+
+ cpumask = malloc(MASK_STR_MAX);
+ if (!cpumask)
+ die("can't allocate cpumask");
+
+ if (bytes > (MASK_STR_MAX-1)) {
+ warning("cpumask can't handle more than 32768 CPUS!");
+ bytes = MASK_STR_MAX-1;
+ }
+
+ sprintf(cpumask, "%x", (1 << last) - 1);
+
+ for (i = 1; i < bytes; i++)
+ cpumask[i] = 'f';
+
+ cpumask[i+1] = 0;
+ } else {
+ cpumask = strdup(str);
+ if (!cpumask)
+ die("can't allocate cpumask");
+ }
+
+ return cpumask;
+}
+
+static void set_mask(struct buffer_instance *instance)
+{
+ struct stat st;
+ char *path;
+ int fd;
+ int ret;
+
+ if (is_guest(instance))
+ return;
+
+ if (!instance->cpumask)
+ return;
+
+ path = tracefs_instance_get_file(instance->tracefs, "tracing_cpumask");
+ if (!path)
+ die("could not allocate path");
+ reset_save_file(path, RESET_DEFAULT_PRIO);
+
+ ret = stat(path, &st);
+ if (ret < 0) {
+ warning("%s not found", path);
+ goto out;
+ }
+
+ fd = open(path, O_WRONLY | O_TRUNC);
+ if (fd < 0)
+ die("could not open %s\n", path);
+
+ write(fd, instance->cpumask, strlen(instance->cpumask));
+
+ close(fd);
+ out:
+ tracefs_put_tracing_file(path);
+ free(instance->cpumask);
+ instance->cpumask = NULL;
+}
+
+static void enable_events(struct buffer_instance *instance)
+{
+ struct event_list *event;
+
+ if (is_guest(instance))
+ return;
+
+ for (event = instance->events; event; event = event->next) {
+ if (!event->neg)
+ update_event(event, event->filter, 0, '1');
+ }
+
+ /* Now disable any events */
+ for (event = instance->events; event; event = event->next) {
+ if (event->neg)
+ update_event(event, NULL, 0, '0');
+ }
+}
+
+void tracecmd_enable_events(void)
+{
+ enable_events(first_instance);
+}
+
+static void set_clock(struct common_record_context *ctx, struct buffer_instance *instance)
+{
+ const char *clock;
+ char *path;
+ char *content;
+ char *str;
+
+ if (is_guest(instance))
+ return;
+
+ if (instance->clock)
+ clock = instance->clock;
+ else
+ clock = ctx->clock;
+
+ if (!clock)
+ return;
+
+ /* The current clock is in brackets, reset it when we are done */
+ content = tracefs_instance_file_read(instance->tracefs,
+ "trace_clock", NULL);
+
+ /* check if first clock is set */
+ if (*content == '[')
+ str = strtok(content+1, "]");
+ else {
+ str = strtok(content, "[");
+ if (!str)
+ die("Can not find clock in trace_clock");
+ str = strtok(NULL, "]");
+ }
+ path = tracefs_instance_get_file(instance->tracefs, "trace_clock");
+ add_reset_file(path, str, RESET_DEFAULT_PRIO);
+
+ free(content);
+ tracefs_put_tracing_file(path);
+
+ tracefs_instance_file_write(instance->tracefs,
+ "trace_clock", clock);
+}
+
+static void set_max_graph_depth(struct buffer_instance *instance, char *max_graph_depth)
+{
+ char *path;
+ int ret;
+
+ if (is_guest(instance))
+ return;
+
+ path = tracefs_instance_get_file(instance->tracefs, "max_graph_depth");
+ reset_save_file(path, RESET_DEFAULT_PRIO);
+ tracefs_put_tracing_file(path);
+ ret = tracefs_instance_file_write(instance->tracefs, "max_graph_depth",
+ max_graph_depth);
+ if (ret < 0)
+ die("could not write to max_graph_depth");
+}
+
+static bool check_file_in_dir(char *dir, char *file)
+{
+ struct stat st;
+ char *path;
+ int ret;
+
+ ret = asprintf(&path, "%s/%s", dir, file);
+ if (ret < 0)
+ die("Failed to allocate id file path for %s/%s", dir, file);
+ ret = stat(path, &st);
+ free(path);
+ if (ret < 0 || S_ISDIR(st.st_mode))
+ return false;
+ return true;
+}
+
+/**
+ * create_event - create and event descriptor
+ * @instance: instance to use
+ * @path: path to event attribute
+ * @old_event: event descriptor to use as base
+ *
+ * NOTE: the function purpose is to create a data structure to describe
+ * an ftrace event. During the process it becomes handy to change the
+ * string `path`. So, do not rely on the content of `path` after you
+ * invoke this function.
+ */
+static struct event_list *
+create_event(struct buffer_instance *instance, char *path, struct event_list *old_event)
+{
+ struct event_list *event;
+ struct stat st;
+ char *path_dirname;
+ char *p;
+ int ret;
+
+ event = malloc(sizeof(*event));
+ if (!event)
+ die("Failed to allocate event");
+ *event = *old_event;
+ add_event(instance, event);
+
+ if (event->filter || filter_task || instance->filter_pids) {
+ event->filter_file = strdup(path);
+ if (!event->filter_file)
+ die("malloc filter file");
+ }
+
+ path_dirname = dirname(path);
+
+ ret = asprintf(&p, "%s/enable", path_dirname);
+ if (ret < 0)
+ die("Failed to allocate enable path for %s", path);
+ ret = stat(p, &st);
+ if (ret >= 0)
+ event->enable_file = p;
+ else
+ free(p);
+
+ if (old_event->trigger) {
+ if (check_file_in_dir(path_dirname, "trigger")) {
+ event->trigger = strdup(old_event->trigger);
+ ret = asprintf(&p, "%s/trigger", path_dirname);
+ if (ret < 0)
+ die("Failed to allocate trigger path for %s", path);
+ event->trigger_file = p;
+ } else {
+ /* Check if this is event or system.
+ * Systems do not have trigger files by design
+ */
+ if (check_file_in_dir(path_dirname, "id"))
+ die("trigger specified but not supported by this kernel");
+ }
+ }
+
+ return event;
+}
+
+static void make_sched_event(struct buffer_instance *instance,
+ struct event_list **event, struct event_list *sched,
+ const char *sched_path)
+{
+ char *path_dirname;
+ char *tmp_file;
+ char *path;
+ int ret;
+
+ /* Do nothing if the event already exists */
+ if (*event)
+ return;
+
+ /* we do not want to corrupt sched->filter_file when using dirname() */
+ tmp_file = strdup(sched->filter_file);
+ if (!tmp_file)
+ die("Failed to allocate path for %s", sched_path);
+ path_dirname = dirname(tmp_file);
+
+ ret = asprintf(&path, "%s/%s/filter", path_dirname, sched_path);
+ free(tmp_file);
+ if (ret < 0)
+ die("Failed to allocate path for %s", sched_path);
+
+ *event = create_event(instance, path, sched);
+ free(path);
+}
+
+static void test_event(struct event_list *event, const char *path,
+ const char *name, struct event_list **save, int len)
+{
+ path += len - strlen(name);
+
+ if (strcmp(path, name) != 0)
+ return;
+
+ *save = event;
+}
+
+static void print_event(const char *fmt, ...)
+{
+ va_list ap;
+
+ if (!show_status)
+ return;
+
+ va_start(ap, fmt);
+ vprintf(fmt, ap);
+ va_end(ap);
+
+ printf("\n");
+}
+
+
+static int expand_event_files(struct buffer_instance *instance,
+ const char *file, struct event_list *old_event)
+{
+ struct event_list **save_event_tail = instance->event_next;
+ struct event_list *sched_event = NULL;
+ struct event_list *event;
+ glob_t globbuf;
+ char *path;
+ char *p;
+ int ret;
+ int i;
+
+ ret = asprintf(&p, "events/%s/filter", file);
+ if (ret < 0)
+ die("Failed to allocate event filter path for %s", file);
+
+ path = tracefs_instance_get_file(instance->tracefs, p);
+
+ globbuf.gl_offs = 0;
+ ret = glob(path, 0, NULL, &globbuf);
+ tracefs_put_tracing_file(path);
+ free(p);
+
+ if (ret < 0)
+ die("No filters found");
+
+ for (i = 0; i < globbuf.gl_pathc; i++) {
+ int len;
+
+ path = globbuf.gl_pathv[i];
+
+ event = create_event(instance, path, old_event);
+ print_event("%s\n", path);
+
+ len = strlen(path);
+
+ test_event(event, path, "sched", &sched_event, len);
+ test_event(event, path, "sched/sched_switch", &instance->sched_switch_event, len);
+ test_event(event, path, "sched/sched_wakeup_new", &instance->sched_wakeup_new_event, len);
+ test_event(event, path, "sched/sched_wakeup", &instance->sched_wakeup_event, len);
+ }
+
+ if (sched_event && sched_event->filter_file) {
+ /* make sure all sched events exist */
+ make_sched_event(instance, &instance->sched_switch_event,
+ sched_event, "sched_switch");
+ make_sched_event(instance, &instance->sched_wakeup_event,
+ sched_event, "sched_wakeup");
+ make_sched_event(instance, &instance->sched_wakeup_new_event,
+ sched_event, "sched_wakeup_new");
+
+ }
+
+
+ globfree(&globbuf);
+
+ /* If the event list tail changed, that means events were added */
+ return save_event_tail == instance->event_next;
+}
+
+static int expand_events_all(struct buffer_instance *instance,
+ char *system_name, char *event_name,
+ struct event_list *event)
+{
+ char *name;
+ int ret;
+
+ ret = asprintf(&name, "%s/%s", system_name, event_name);
+ if (ret < 0)
+ die("Failed to allocate system/event for %s/%s",
+ system_name, event_name);
+ ret = expand_event_files(instance, name, event);
+ free(name);
+
+ return ret;
+}
+
+static void expand_event(struct buffer_instance *instance, struct event_list *event)
+{
+ const char *name = event->event;
+ char *str;
+ char *ptr;
+ int ret;
+
+ /*
+ * We allow the user to use "all" to enable all events.
+ * Expand event_selection to all systems.
+ */
+ if (strcmp(name, "all") == 0) {
+ expand_event_files(instance, "*", event);
+ return;
+ }
+
+ str = strdup(name);
+ if (!str)
+ die("Failed to allocate %s string", name);
+
+ ptr = strchr(str, ':');
+ if (ptr) {
+ *ptr = '\0';
+ ptr++;
+
+ if (strlen(ptr))
+ ret = expand_events_all(instance, str, ptr, event);
+ else
+ ret = expand_events_all(instance, str, "*", event);
+
+ if (!ignore_event_not_found && ret)
+ die("No events enabled with %s", name);
+
+ goto out;
+ }
+
+ /* No ':' so enable all matching systems and events */
+ ret = expand_event_files(instance, str, event);
+ ret &= expand_events_all(instance, "*", str, event);
+ if (event->trigger)
+ ret &= expand_events_all(instance, str, "*", event);
+
+ if (!ignore_event_not_found && ret)
+ die("No events enabled with %s", name);
+
+out:
+ free(str);
+}
+
+static void expand_event_instance(struct buffer_instance *instance)
+{
+ struct event_list *compressed_list = instance->events;
+ struct event_list *event;
+
+ if (is_guest(instance))
+ return;
+
+ reset_event_list(instance);
+
+ while (compressed_list) {
+ event = compressed_list;
+ compressed_list = event->next;
+ expand_event(instance, event);
+ free(event->trigger);
+ free(event);
+ }
+}
+
+static void expand_event_list(void)
+{
+ struct buffer_instance *instance;
+
+ if (use_old_event_method())
+ return;
+
+ for_all_instances(instance)
+ expand_event_instance(instance);
+}
+
+static void finish(int sig)
+{
+ /* all done */
+ if (recorder)
+ tracecmd_stop_recording(recorder);
+ finished = 1;
+}
+
+static struct addrinfo *do_getaddrinfo(const char *host, unsigned int port,
+ enum port_type type)
+{
+ struct addrinfo *results;
+ struct addrinfo hints;
+ char buf[BUFSIZ];
+ int s;
+
+ snprintf(buf, BUFSIZ, "%u", port);
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = AF_UNSPEC;
+ hints.ai_socktype = type == USE_TCP ? SOCK_STREAM : SOCK_DGRAM;
+
+ s = getaddrinfo(host, buf, &hints, &results);
+ if (s != 0) {
+ gai_err = gai_strerror(s);
+ return NULL;
+ }
+
+ dprint("Attached port %s: %d to results: %p\n",
+ type == USE_TCP ? "TCP" : "UDP", port, results);
+
+ return results;
+}
+
+static int connect_addr(struct addrinfo *results)
+{
+ struct addrinfo *rp;
+ int sfd = -1;
+
+ for (rp = results; rp != NULL; rp = rp->ai_next) {
+ sfd = socket(rp->ai_family, rp->ai_socktype,
+ rp->ai_protocol);
+ if (sfd == -1)
+ continue;
+ if (connect(sfd, rp->ai_addr, rp->ai_addrlen) != -1)
+ break;
+ close(sfd);
+ }
+
+ if (rp == NULL)
+ return -1;
+
+ dprint("connect results: %p with fd: %d\n", results, sfd);
+
+ return sfd;
+}
+
+static int connect_port(const char *host, unsigned int port, enum port_type type)
+{
+ struct addrinfo *results;
+ int sfd;
+
+ if (type == USE_VSOCK)
+ return trace_vsock_open(atoi(host), port);
+
+ results = do_getaddrinfo(host, port, type);
+
+ if (!results)
+ die("connecting to %s server %s:%u",
+ type == USE_TCP ? "TCP" : "UDP", host, port);
+
+ sfd = connect_addr(results);
+
+ freeaddrinfo(results);
+
+ if (sfd < 0)
+ die("Can not connect to %s server %s:%u",
+ type == USE_TCP ? "TCP" : "UDP", host, port);
+
+ return sfd;
+}
+
+static int do_accept(int sd)
+{
+ int cd;
+
+ for (;;) {
+ dprint("Wait on accept: %d\n", sd);
+ cd = accept(sd, NULL, NULL);
+ dprint("accepted: %d\n", cd);
+ if (cd < 0) {
+ if (errno == EINTR)
+ continue;
+ die("accept");
+ }
+
+ return cd;
+ }
+
+ return -1;
+}
+
+/* Find all the tasks associated with the guest pid */
+static void find_tasks(struct trace_guest *guest)
+{
+ struct dirent *dent;
+ char *path;
+ DIR *dir;
+ int ret;
+ int tasks = 0;
+
+ ret = asprintf(&path, "/proc/%d/task", guest->pid);
+ if (ret < 0)
+ return;
+
+ dir = opendir(path);
+ free(path);
+ if (!dir)
+ return;
+
+ while ((dent = readdir(dir))) {
+ int *pids;
+ if (!(dent->d_type == DT_DIR && is_digits(dent->d_name)))
+ continue;
+ pids = realloc(guest->task_pids, sizeof(int) * (tasks + 2));
+ if (!pids)
+ break;
+ pids[tasks++] = strtol(dent->d_name, NULL, 0);
+ pids[tasks] = -1;
+ guest->task_pids = pids;
+ }
+ closedir(dir);
+}
+
+static char *parse_guest_name(char *gname, int *cid, int *port,
+ struct addrinfo **res)
+{
+ struct trace_guest *guest = NULL;
+ struct addrinfo *result;
+ char *ip = NULL;
+ char *p;
+
+ *res = NULL;
+
+ *port = -1;
+ for (p = gname + strlen(gname); p > gname; p--) {
+ if (*p == ':')
+ break;
+ }
+ if (p > gname) {
+ *p = '\0';
+ *port = atoi(p + 1);
+ }
+
+ *cid = -1;
+ p = strrchr(gname, '@');
+ if (p) {
+ *p = '\0';
+ *cid = atoi(p + 1);
+ } else if (is_digits(gname)) {
+ *cid = atoi(gname);
+ } else {
+ /* Check if this is an IP address */
+ if (strstr(gname, ":") || strstr(gname, "."))
+ ip = gname;
+ }
+
+ if (!ip && *cid < 0)
+ read_qemu_guests();
+
+ if (!ip)
+ guest = trace_get_guest(*cid, gname);
+ if (guest) {
+ *cid = guest->cid;
+ /* Mapping not found, search for them */
+ if (!guest->cpu_pid)
+ find_tasks(guest);
+ return guest->name;
+ }
+
+ /* Test to see if this is an internet address */
+ result = do_getaddrinfo(gname, *port, USE_TCP);
+ if (!result)
+ return NULL;
+
+ *res = result;
+
+ return gname;
+}
+
+static void set_prio(int prio)
+{
+ struct sched_param sp;
+
+ memset(&sp, 0, sizeof(sp));
+ sp.sched_priority = prio;
+ if (sched_setscheduler(0, SCHED_FIFO, &sp) < 0)
+ warning("failed to set priority");
+}
+
+static struct tracecmd_recorder *
+create_recorder_instance_pipe(struct buffer_instance *instance,
+ int cpu, int *brass)
+{
+ struct tracecmd_recorder *recorder;
+ unsigned flags = recorder_flags | TRACECMD_RECORD_BLOCK_SPLICE;
+ char *path;
+
+ path = tracefs_instance_get_dir(instance->tracefs);
+
+ if (!path)
+ die("malloc");
+
+ /* This is already the child */
+ close(brass[0]);
+
+ recorder = tracecmd_create_buffer_recorder_fd(brass[1], cpu, flags, path);
+
+ tracefs_put_tracing_file(path);
+
+ return recorder;
+}
+
+static struct tracecmd_recorder *
+create_recorder_instance(struct buffer_instance *instance, const char *file, int cpu,
+ int *brass)
+{
+ struct tracecmd_recorder *record;
+ struct addrinfo *result;
+ char *path;
+
+ if (is_guest(instance)) {
+ int fd;
+ unsigned int flags;
+
+ if (instance->use_fifos)
+ fd = instance->fds[cpu];
+ else if (is_network(instance)) {
+ result = do_getaddrinfo(instance->name,
+ instance->client_ports[cpu],
+ instance->port_type);
+ if (!result)
+ die("Failed to connect to %s port %d\n",
+ instance->name,
+ instance->client_ports[cpu]);
+ fd = connect_addr(result);
+ freeaddrinfo(result);
+ } else
+ fd = trace_vsock_open(instance->cid, instance->client_ports[cpu]);
+ if (fd < 0)
+ die("Failed to connect to agent");
+
+ flags = recorder_flags;
+ if (instance->use_fifos)
+ flags |= TRACECMD_RECORD_NOBRASS;
+ else if (!trace_vsock_can_splice_read())
+ flags |= TRACECMD_RECORD_NOSPLICE;
+ return tracecmd_create_recorder_virt(file, cpu, flags, fd);
+ }
+
+ if (brass)
+ return create_recorder_instance_pipe(instance, cpu, brass);
+
+ if (!tracefs_instance_get_name(instance->tracefs))
+ return tracecmd_create_recorder_maxkb(file, cpu, recorder_flags, max_kb);
+
+ path = tracefs_instance_get_dir(instance->tracefs);
+
+ record = tracecmd_create_buffer_recorder_maxkb(file, cpu, recorder_flags,
+ path, max_kb);
+ tracefs_put_tracing_file(path);
+
+ return record;
+}
+
+/*
+ * If extract is set, then this is going to set up the recorder,
+ * connections and exit as the tracing is serialized by a single thread.
+ */
+static int create_recorder(struct buffer_instance *instance, int cpu,
+ enum trace_type type, int *brass)
+{
+ long ret;
+ char *file;
+ pid_t pid;
+
+ if (type != TRACE_TYPE_EXTRACT) {
+
+ pid = fork();
+ if (pid < 0)
+ die("fork");
+
+ if (pid)
+ return pid;
+
+ signal(SIGINT, SIG_IGN);
+ signal(SIGUSR1, finish);
+
+ if (rt_prio)
+ set_prio(rt_prio);
+
+ /* do not kill tasks on error */
+ instance->cpu_count = 0;
+ }
+
+ if ((instance->client_ports && !is_guest(instance)) || is_agent(instance)) {
+ unsigned int flags = recorder_flags;
+ char *path = NULL;
+ int fd;
+
+ if (is_agent(instance)) {
+ if (instance->use_fifos)
+ fd = instance->fds[cpu];
+ else {
+ again:
+ fd = do_accept(instance->fds[cpu]);
+ if (instance->host &&
+ !trace_net_cmp_connection_fd(fd, instance->host)) {
+ dprint("Client does not match '%s' for cpu:%d\n",
+ instance->host, cpu);
+ goto again;
+ }
+ }
+ } else {
+ fd = connect_port(host, instance->client_ports[cpu],
+ instance->port_type);
+ }
+ if (fd < 0)
+ die("Failed connecting to client");
+ if (tracefs_instance_get_name(instance->tracefs) && !is_agent(instance)) {
+ path = tracefs_instance_get_dir(instance->tracefs);
+ } else {
+ const char *dir = tracefs_tracing_dir();
+
+ if (dir)
+ path = strdup(dir);
+ }
+ if (!path)
+ die("can't get the tracing directory");
+
+ recorder = tracecmd_create_buffer_recorder_fd(fd, cpu, flags, path);
+ tracefs_put_tracing_file(path);
+ } else {
+ file = get_temp_file(instance, cpu);
+ recorder = create_recorder_instance(instance, file, cpu, brass);
+ put_temp_file(file);
+ }
+
+ if (!recorder)
+ die ("can't create recorder");
+
+ if (type == TRACE_TYPE_EXTRACT) {
+ ret = tracecmd_flush_recording(recorder);
+ tracecmd_free_recorder(recorder);
+ recorder = NULL;
+ return ret;
+ }
+
+ while (!finished) {
+ if (tracecmd_start_recording(recorder, sleep_time) < 0)
+ break;
+ }
+ tracecmd_free_recorder(recorder);
+ recorder = NULL;
+
+ exit(0);
+}
+
+static void check_first_msg_from_server(struct tracecmd_msg_handle *msg_handle)
+{
+ char buf[BUFSIZ];
+
+ read(msg_handle->fd, buf, 8);
+
+ /* Make sure the server is the tracecmd server */
+ if (memcmp(buf, "tracecmd", 8) != 0)
+ die("server not tracecmd server");
+}
+
+static void communicate_with_listener_v1(struct tracecmd_msg_handle *msg_handle,
+ struct buffer_instance *instance)
+{
+ unsigned int *client_ports;
+ char buf[BUFSIZ];
+ ssize_t n;
+ int cpu, i;
+
+ check_first_msg_from_server(msg_handle);
+
+ /* write the number of CPUs we have (in ASCII) */
+ sprintf(buf, "%d", local_cpu_count);
+
+ /* include \0 */
+ write(msg_handle->fd, buf, strlen(buf)+1);
+
+ /* write the pagesize (in ASCII) */
+ sprintf(buf, "%d", page_size);
+
+ /* include \0 */
+ write(msg_handle->fd, buf, strlen(buf)+1);
+
+ /*
+ * If we are using IPV4 and our page size is greater than
+ * or equal to 64K, we need to punt and use TCP. :-(
+ */
+
+ /* TODO, test for ipv4 */
+ if (page_size >= UDP_MAX_PACKET) {
+ warning("page size too big for UDP using TCP in live read");
+ instance->port_type = USE_TCP;
+ msg_handle->flags |= TRACECMD_MSG_FL_USE_TCP;
+ }
+
+ if (instance->port_type == USE_TCP) {
+ /* Send one option */
+ write(msg_handle->fd, "1", 2);
+ /* Size 4 */
+ write(msg_handle->fd, "4", 2);
+ /* use TCP */
+ write(msg_handle->fd, "TCP", 4);
+ } else
+ /* No options */
+ write(msg_handle->fd, "0", 2);
+
+ client_ports = malloc(local_cpu_count * sizeof(*client_ports));
+ if (!client_ports)
+ die("Failed to allocate client ports for %d cpus", local_cpu_count);
+
+ /*
+ * Now we will receive back a comma deliminated list
+ * of client ports to connect to.
+ */
+ for (cpu = 0; cpu < local_cpu_count; cpu++) {
+ for (i = 0; i < BUFSIZ; i++) {
+ n = read(msg_handle->fd, buf+i, 1);
+ if (n != 1)
+ die("Error, reading server ports");
+ if (!buf[i] || buf[i] == ',')
+ break;
+ }
+ if (i == BUFSIZ)
+ die("read bad port number");
+ buf[i] = 0;
+ client_ports[cpu] = atoi(buf);
+ }
+
+ instance->client_ports = client_ports;
+}
+
+static void communicate_with_listener_v3(struct tracecmd_msg_handle *msg_handle,
+ unsigned int **client_ports)
+{
+ if (tracecmd_msg_send_init_data(msg_handle, client_ports) < 0)
+ die("Cannot communicate with server");
+}
+
+static void check_protocol_version(struct tracecmd_msg_handle *msg_handle)
+{
+ char buf[BUFSIZ];
+ int fd = msg_handle->fd;
+ int n;
+
+ check_first_msg_from_server(msg_handle);
+
+ /*
+ * Write the protocol version, the magic number, and the dummy
+ * option(0) (in ASCII). The client understands whether the client
+ * uses the v3 protocol or not by checking a reply message from the
+ * server. If the message is "V3", the server uses v3 protocol. On the
+ * other hands, if the message is just number strings, the server
+ * returned port numbers. So, in that time, the client understands the
+ * server uses the v1 protocol. However, the old server tells the
+ * client port numbers after reading cpu_count, page_size, and option.
+ * So, we add the dummy number (the magic number and 0 option) to the
+ * first client message.
+ */
+ write(fd, V3_CPU, sizeof(V3_CPU));
+
+ buf[0] = 0;
+
+ /* read a reply message */
+ n = read(fd, buf, BUFSIZ);
+
+ if (n < 0 || !buf[0]) {
+ /* the server uses the v1 protocol, so we'll use it */
+ msg_handle->version = V1_PROTOCOL;
+ tracecmd_plog("Use the v1 protocol\n");
+ } else {
+ if (memcmp(buf, "V3", n) != 0)
+ die("Cannot handle the protocol %s", buf);
+ /* OK, let's use v3 protocol */
+ write(fd, V3_MAGIC, sizeof(V3_MAGIC));
+
+ n = read(fd, buf, BUFSIZ - 1);
+ if (n != 2 || memcmp(buf, "OK", 2) != 0) {
+ if (n < 0)
+ n = 0;
+ buf[n] = 0;
+ die("Cannot handle the protocol %s", buf);
+ }
+ }
+}
+
+static int connect_vsock(char *vhost)
+{
+ char *cid;
+ char *port;
+ char *p;
+ int sd;
+
+ host = strdup(vhost);
+ if (!host)
+ die("alloctating server");
+
+ cid = strtok_r(host, ":", &p);
+ port = strtok_r(NULL, "", &p);
+
+ if (!port)
+ die("vsocket must have format of 'CID:PORT'");
+
+ sd = trace_vsock_open(atoi(cid), atoi(port));
+
+ return sd;
+}
+
+static int connect_ip(char *thost)
+{
+ struct addrinfo *result;
+ int sfd;
+ char *server;
+ char *port;
+ char *p;
+
+ if (!strchr(host, ':')) {
+ server = strdup("localhost");
+ if (!server)
+ die("alloctating server");
+ port = thost;
+ host = server;
+ } else {
+ host = strdup(thost);
+ if (!host)
+ die("alloctating server");
+ server = strtok_r(host, ":", &p);
+ port = strtok_r(NULL, ":", &p);
+ }
+
+ result = do_getaddrinfo(server, atoi(port), USE_TCP);
+ if (!result)
+ die("getaddrinfo: %s", gai_err);
+
+ sfd = connect_addr(result);
+
+ freeaddrinfo(result);
+
+ if (sfd < 0)
+ die("Can not connect to %s:%s", server, port);
+
+ return sfd;
+}
+
+static struct tracecmd_msg_handle *setup_network(struct buffer_instance *instance)
+{
+ struct tracecmd_msg_handle *msg_handle = NULL;
+ enum port_type type = instance->port_type;
+ int sfd;
+
+again:
+ switch (type) {
+ case USE_VSOCK:
+ sfd = connect_vsock(host);
+ break;
+ default:
+ sfd = connect_ip(host);
+ }
+
+ if (sfd < 0)
+ return NULL;
+
+ if (msg_handle) {
+ msg_handle->fd = sfd;
+ } else {
+ msg_handle = tracecmd_msg_handle_alloc(sfd, 0);
+ if (!msg_handle)
+ die("Failed to allocate message handle");
+
+ msg_handle->cpu_count = local_cpu_count;
+ msg_handle->version = V3_PROTOCOL;
+ }
+
+ switch (type) {
+ case USE_TCP:
+ msg_handle->flags |= TRACECMD_MSG_FL_USE_TCP;
+ break;
+ case USE_VSOCK:
+ msg_handle->flags |= TRACECMD_MSG_FL_USE_VSOCK;
+ break;
+ default:
+ break;
+ }
+
+ if (msg_handle->version == V3_PROTOCOL) {
+ check_protocol_version(msg_handle);
+ if (msg_handle->version == V1_PROTOCOL) {
+ /* reconnect to the server for using the v1 protocol */
+ close(sfd);
+ free(host);
+ goto again;
+ }
+ communicate_with_listener_v3(msg_handle, &instance->client_ports);
+ }
+
+ if (msg_handle->version == V1_PROTOCOL)
+ communicate_with_listener_v1(msg_handle, instance);
+
+ return msg_handle;
+}
+
+static void add_options(struct tracecmd_output *handle, struct common_record_context *ctx);
+
+static struct tracecmd_output *create_net_output(struct common_record_context *ctx,
+ struct tracecmd_msg_handle *msg_handle)
+{
+ struct tracecmd_output *out;
+
+ out = tracecmd_output_create(NULL);
+ if (!out)
+ return NULL;
+ if (ctx->file_version && tracecmd_output_set_version(out, ctx->file_version))
+ goto error;
+ if (tracecmd_output_set_msg(out, msg_handle))
+ goto error;
+
+ if (ctx->compression) {
+ if (tracecmd_output_set_compression(out, ctx->compression))
+ goto error;
+ } else if (ctx->file_version >= FILE_VERSION_COMPRESSION) {
+ tracecmd_output_set_compression(out, "any");
+ }
+
+ if (tracecmd_output_write_headers(out, listed_events))
+ goto error;
+
+ return out;
+error:
+ tracecmd_output_close(out);
+ return NULL;
+}
+
+static struct tracecmd_msg_handle *
+setup_connection(struct buffer_instance *instance, struct common_record_context *ctx)
+{
+ struct tracecmd_msg_handle *msg_handle = NULL;
+ struct tracecmd_output *network_handle = NULL;
+ int ret;
+
+ msg_handle = setup_network(instance);
+ if (!msg_handle)
+ die("Failed to make connection");
+
+ /* Now create the handle through this socket */
+ if (msg_handle->version == V3_PROTOCOL) {
+ network_handle = create_net_output(ctx, msg_handle);
+ if (!network_handle)
+ goto error;
+ tracecmd_set_quiet(network_handle, quiet);
+ add_options(network_handle, ctx);
+ ret = tracecmd_write_cmdlines(network_handle);
+ if (ret)
+ goto error;
+ ret = tracecmd_write_cpus(network_handle, instance->cpu_count);
+ if (ret)
+ goto error;
+ ret = tracecmd_write_buffer_info(network_handle);
+ if (ret)
+ goto error;
+ ret = tracecmd_write_options(network_handle);
+ if (ret)
+ goto error;
+ ret = tracecmd_msg_finish_sending_data(msg_handle);
+ if (ret)
+ goto error;
+ } else {
+ network_handle = tracecmd_output_create_fd(msg_handle->fd);
+ if (!network_handle)
+ goto error;
+ if (tracecmd_output_set_version(network_handle, ctx->file_version))
+ goto error;
+
+ if (ctx->compression) {
+ if (tracecmd_output_set_compression(network_handle, ctx->compression))
+ goto error;
+ } else if (ctx->file_version >= FILE_VERSION_COMPRESSION) {
+ tracecmd_output_set_compression(network_handle, "any");
+ }
+
+ if (tracecmd_output_write_headers(network_handle, listed_events))
+ goto error;
+ tracecmd_set_quiet(network_handle, quiet);
+ }
+
+ instance->network_handle = network_handle;
+
+ /* OK, we are all set, let'r rip! */
+ return msg_handle;
+
+error:
+ if (msg_handle)
+ tracecmd_msg_handle_close(msg_handle);
+ if (network_handle)
+ tracecmd_output_close(network_handle);
+ return NULL;
+}
+
+static void finish_network(struct tracecmd_msg_handle *msg_handle)
+{
+ if (msg_handle->version == V3_PROTOCOL)
+ tracecmd_msg_send_close_msg(msg_handle);
+ tracecmd_msg_handle_close(msg_handle);
+ free(host);
+}
+
+static int open_guest_fifos(const char *guest, int **fds)
+{
+ char path[PATH_MAX];
+ int i, fd, flags;
+
+ for (i = 0; ; i++) {
+ snprintf(path, sizeof(path), GUEST_FIFO_FMT ".out", guest, i);
+
+ /* O_NONBLOCK so we don't wait for writers */
+ fd = open(path, O_RDONLY | O_NONBLOCK);
+ if (fd < 0)
+ break;
+
+ /* Success, now clear O_NONBLOCK */
+ flags = fcntl(fd, F_GETFL);
+ fcntl(fd, F_SETFL, flags & ~O_NONBLOCK);
+
+ *fds = realloc(*fds, i + 1);
+ (*fds)[i] = fd;
+ }
+
+ return i;
+}
+
+struct trace_mapping {
+ struct tep_event *kvm_entry;
+ struct tep_format_field *vcpu_id;
+ struct tep_format_field *common_pid;
+ int *pids;
+ int *map;
+ int max_cpus;
+};
+
+static void start_mapping_vcpus(struct trace_guest *guest)
+{
+ char *pids = NULL;
+ char *t;
+ int len = 0;
+ int s;
+ int i;
+
+ if (!guest->task_pids)
+ return;
+
+ guest->instance = tracefs_instance_create("map_guest_pids");
+ if (!guest->instance)
+ return;
+
+ for (i = 0; guest->task_pids[i] >= 0; i++) {
+ s = snprintf(NULL, 0, "%d ", guest->task_pids[i]);
+ t = realloc(pids, len + s + 1);
+ if (!t) {
+ free(pids);
+ pids = NULL;
+ break;
+ }
+ pids = t;
+ sprintf(pids + len, "%d ", guest->task_pids[i]);
+ len += s;
+ }
+ if (pids) {
+ tracefs_instance_file_write(guest->instance, "set_event_pid", pids);
+ free(pids);
+ }
+ tracefs_instance_file_write(guest->instance, "events/kvm/kvm_entry/enable", "1");
+}
+
+static int map_vcpus(struct tep_event *event, struct tep_record *record,
+ int cpu, void *context)
+{
+ struct trace_mapping *tmap = context;
+ unsigned long long val;
+ int type;
+ int pid;
+ int ret;
+ int i;
+
+ /* Do we have junk in the buffer? */
+ type = tep_data_type(event->tep, record);
+ if (type != tmap->kvm_entry->id)
+ return 0;
+
+ ret = tep_read_number_field(tmap->common_pid, record->data, &val);
+ if (ret < 0)
+ return 0;
+ pid = (int)val;
+
+ for (i = 0; tmap->pids[i] >= 0; i++) {
+ if (pid == tmap->pids[i])
+ break;
+ }
+ /* Is this thread one we care about ? */
+ if (tmap->pids[i] < 0)
+ return 0;
+
+ ret = tep_read_number_field(tmap->vcpu_id, record->data, &val);
+ if (ret < 0)
+ return 0;
+
+ cpu = (int)val;
+
+ /* Sanity check, warn? */
+ if (cpu >= tmap->max_cpus)
+ return 0;
+
+ /* Already have this one? Should we check if it is the same? */
+ if (tmap->map[cpu] >= 0)
+ return 0;
+
+ tmap->map[cpu] = pid;
+
+ /* Did we get them all */
+ for (i = 0; i < tmap->max_cpus; i++) {
+ if (tmap->map[i] < 0)
+ break;
+ }
+
+ return i == tmap->max_cpus;
+}
+
+static void stop_mapping_vcpus(struct buffer_instance *instance,
+ struct trace_guest *guest)
+{
+ struct trace_mapping tmap = { };
+ struct tep_handle *tep;
+ const char *systems[] = { "kvm", NULL };
+ int i;
+
+ if (!guest->instance)
+ return;
+
+ tmap.pids = guest->task_pids;
+ tmap.max_cpus = instance->cpu_count;
+
+ tmap.map = malloc(sizeof(*tmap.map) * tmap.max_cpus);
+ if (!tmap.map)
+ return;
+
+ for (i = 0; i < tmap.max_cpus; i++)
+ tmap.map[i] = -1;
+
+ tracefs_instance_file_write(guest->instance, "events/kvm/kvm_entry/enable", "0");
+
+ tep = tracefs_local_events_system(NULL, systems);
+ if (!tep)
+ goto out;
+
+ tmap.kvm_entry = tep_find_event_by_name(tep, "kvm", "kvm_entry");
+ if (!tmap.kvm_entry)
+ goto out_free;
+
+ tmap.vcpu_id = tep_find_field(tmap.kvm_entry, "vcpu_id");
+ if (!tmap.vcpu_id)
+ goto out_free;
+
+ tmap.common_pid = tep_find_any_field(tmap.kvm_entry, "common_pid");
+ if (!tmap.common_pid)
+ goto out_free;
+
+ tracefs_iterate_raw_events(tep, guest->instance, NULL, 0, map_vcpus, &tmap);
+
+ for (i = 0; i < tmap.max_cpus; i++) {
+ if (tmap.map[i] < 0)
+ break;
+ }
+ /* We found all the mapped CPUs */
+ if (i == tmap.max_cpus) {
+ guest->cpu_pid = tmap.map;
+ guest->cpu_max = tmap.max_cpus;
+ tmap.map = NULL;
+ }
+
+ out_free:
+ tep_free(tep);
+ out:
+ free(tmap.map);
+ tracefs_instance_destroy(guest->instance);
+ tracefs_instance_free(guest->instance);
+}
+
+static int host_tsync(struct common_record_context *ctx,
+ struct buffer_instance *instance,
+ unsigned int tsync_port, char *proto)
+{
+ struct trace_guest *guest;
+ int guest_pid = -1;
+ int fd;
+
+ if (!proto)
+ return -1;
+
+ if (is_network(instance)) {
+ fd = connect_port(instance->name, tsync_port,
+ instance->port_type);
+ } else {
+ guest = trace_get_guest(instance->cid, NULL);
+ if (guest == NULL)
+ return -1;
+
+ guest_pid = guest->pid;
+ start_mapping_vcpus(guest);
+ fd = trace_vsock_open(instance->cid, tsync_port);
+ }
+
+ instance->tsync = tracecmd_tsync_with_guest(top_instance.trace_id,
+ instance->tsync_loop_interval,
+ fd, guest_pid,
+ instance->cpu_count,
+ proto, ctx->clock);
+ if (!is_network(instance))
+ stop_mapping_vcpus(instance, guest);
+
+ if (!instance->tsync)
+ return -1;
+
+ return 0;
+}
+
+static void connect_to_agent(struct common_record_context *ctx,
+ struct buffer_instance *instance)
+{
+ struct tracecmd_tsync_protos *protos = NULL;
+ int sd, ret, nr_fifos, nr_cpus, page_size;
+ struct tracecmd_msg_handle *msg_handle;
+ enum tracecmd_time_sync_role role;
+ char *tsync_protos_reply = NULL;
+ unsigned int tsync_port = 0;
+ unsigned int *ports;
+ int i, *fds = NULL;
+ bool use_fifos = false;
+
+ if (!no_fifos) {
+ nr_fifos = open_guest_fifos(instance->name, &fds);
+ use_fifos = nr_fifos > 0;
+ }
+
+ if (ctx->instance->result) {
+ role = TRACECMD_TIME_SYNC_ROLE_CLIENT;
+ sd = connect_addr(ctx->instance->result);
+ if (sd < 0)
+ die("Failed to connect to host %s:%u",
+ instance->name, instance->port);
+ } else {
+ role = TRACECMD_TIME_SYNC_ROLE_HOST;
+ sd = trace_vsock_open(instance->cid, instance->port);
+ if (sd < 0)
+ die("Failed to connect to vsocket @%u:%u",
+ instance->cid, instance->port);
+ }
+
+ msg_handle = tracecmd_msg_handle_alloc(sd, 0);
+ if (!msg_handle)
+ die("Failed to allocate message handle");
+
+ if (!instance->clock)
+ instance->clock = tracefs_get_clock(NULL);
+
+ if (instance->tsync_loop_interval >= 0)
+ tracecmd_tsync_proto_getall(&protos, instance->clock, role);
+
+ ret = tracecmd_msg_send_trace_req(msg_handle, instance->argc,
+ instance->argv, use_fifos,
+ top_instance.trace_id, protos);
+ if (ret < 0)
+ die("Failed to send trace request");
+
+ if (protos) {
+ free(protos->names);
+ free(protos);
+ }
+ ret = tracecmd_msg_recv_trace_resp(msg_handle, &nr_cpus, &page_size,
+ &ports, &use_fifos,
+ &instance->trace_id,
+ &tsync_protos_reply, &tsync_port);
+ if (ret < 0)
+ die("Failed to receive trace response %d", ret);
+ if (tsync_protos_reply && tsync_protos_reply[0]) {
+ if (tsync_proto_is_supported(tsync_protos_reply)) {
+ printf("Negotiated %s time sync protocol with guest %s\n",
+ tsync_protos_reply,
+ instance->name);
+ instance->cpu_count = nr_cpus;
+ host_tsync(ctx, instance, tsync_port, tsync_protos_reply);
+ } else
+ warning("Failed to negotiate timestamps synchronization with the guest");
+ }
+ free(tsync_protos_reply);
+
+ if (use_fifos) {
+ if (nr_cpus != nr_fifos) {
+ warning("number of FIFOs (%d) for guest %s differs "
+ "from number of virtual CPUs (%d)",
+ nr_fifos, instance->name, nr_cpus);
+ nr_cpus = nr_cpus < nr_fifos ? nr_cpus : nr_fifos;
+ }
+ free(ports);
+ instance->fds = fds;
+ } else {
+ for (i = 0; i < nr_fifos; i++)
+ close(fds[i]);
+ free(fds);
+ instance->client_ports = ports;
+ }
+
+ instance->use_fifos = use_fifos;
+ instance->cpu_count = nr_cpus;
+
+ /* the msg_handle now points to the guest fd */
+ instance->msg_handle = msg_handle;
+}
+
+static void setup_guest(struct buffer_instance *instance)
+{
+ struct tracecmd_msg_handle *msg_handle = instance->msg_handle;
+ const char *output_file = instance->output_file;
+ char *file;
+ int fd;
+
+ /* Create a place to store the guest meta data */
+ file = trace_get_guest_file(output_file, instance->name);
+ if (!file)
+ die("Failed to allocate memory");
+
+ free(instance->output_file);
+ instance->output_file = file;
+
+ fd = open(file, O_CREAT|O_WRONLY|O_TRUNC, 0644);
+ if (fd < 0)
+ die("Failed to open %s", file);
+
+ /* Start reading tracing metadata */
+ if (tracecmd_msg_read_data(msg_handle, fd))
+ die("Failed receiving metadata");
+ close(fd);
+}
+
+static void setup_agent(struct buffer_instance *instance,
+ struct common_record_context *ctx)
+{
+ struct tracecmd_output *network_handle;
+
+ network_handle = create_net_output(ctx, instance->msg_handle);
+ add_options(network_handle, ctx);
+ tracecmd_write_cmdlines(network_handle);
+ tracecmd_write_cpus(network_handle, instance->cpu_count);
+ tracecmd_write_buffer_info(network_handle);
+ tracecmd_write_options(network_handle);
+ tracecmd_write_meta_strings(network_handle);
+ tracecmd_msg_finish_sending_data(instance->msg_handle);
+ instance->network_handle = network_handle;
+}
+
+void start_threads(enum trace_type type, struct common_record_context *ctx)
+{
+ struct buffer_instance *instance;
+ int total_cpu_count = 0;
+ int i = 0;
+ int ret;
+
+ for_all_instances(instance) {
+ /* Start the connection now to find out how many CPUs we need */
+ if (is_guest(instance))
+ connect_to_agent(ctx, instance);
+ total_cpu_count += instance->cpu_count;
+ }
+
+ /* make a thread for every CPU we have */
+ pids = calloc(total_cpu_count * (buffers + 1), sizeof(*pids));
+ if (!pids)
+ die("Failed to allocate pids for %d cpus", total_cpu_count);
+
+ for_all_instances(instance) {
+ int *brass = NULL;
+ int x, pid;
+
+ if (is_agent(instance)) {
+ setup_agent(instance, ctx);
+ } else if (is_guest(instance)) {
+ setup_guest(instance);
+ } else if (host) {
+ instance->msg_handle = setup_connection(instance, ctx);
+ if (!instance->msg_handle)
+ die("Failed to make connection");
+ }
+
+ for (x = 0; x < instance->cpu_count; x++) {
+ if (type & TRACE_TYPE_STREAM) {
+ brass = pids[i].brass;
+ ret = pipe(brass);
+ if (ret < 0)
+ die("pipe");
+ pids[i].stream = trace_stream_init(instance, x,
+ brass[0],
+ instance->cpu_count,
+ hooks, handle_init,
+ ctx->global);
+ if (!pids[i].stream)
+ die("Creating stream for %d", i);
+ } else
+ pids[i].brass[0] = -1;
+ pids[i].cpu = x;
+ pids[i].instance = instance;
+ /* Make sure all output is flushed before forking */
+ fflush(stdout);
+ pid = pids[i++].pid = create_recorder(instance, x, type, brass);
+ if (brass)
+ close(brass[1]);
+ if (pid > 0)
+ add_filter_pid(instance, pid, 1);
+ }
+ }
+ recorder_threads = i;
+}
+
+static void touch_file(const char *file)
+{
+ int fd;
+
+ fd = open(file, O_WRONLY | O_CREAT | O_TRUNC, 0644);
+ if (fd < 0)
+ die("could not create file %s\n", file);
+ close(fd);
+}
+
+static void append_buffer(struct tracecmd_output *handle,
+ struct buffer_instance *instance,
+ char **temp_files)
+{
+ int cpu_count = instance->cpu_count;
+ int i;
+
+ /*
+ * Since we can record remote and virtual machines in the same file
+ * as the host, the buffers may no longer have matching number of
+ * CPU data as the host. For backward compatibility for older
+ * trace-cmd versions, which will blindly read the number of CPUs
+ * for each buffer instance as there are for the host, if there are
+ * fewer CPUs on the remote machine than on the host, an "empty"
+ * CPU is needed for each CPU that the host has that the remote does
+ * not. If there are more CPUs on the remote, older executables will
+ * simply ignore them (which is OK, we only need to guarantee that
+ * old executables don't crash).
+ */
+ if (instance->cpu_count < local_cpu_count)
+ cpu_count = local_cpu_count;
+
+ for (i = 0; i < cpu_count; i++) {
+ temp_files[i] = get_temp_file(instance, i);
+ if (i >= instance->cpu_count)
+ touch_file(temp_files[i]);
+ }
+
+ tracecmd_append_buffer_cpu_data(handle, tracefs_instance_get_name(instance->tracefs),
+ cpu_count, temp_files);
+
+ for (i = 0; i < instance->cpu_count; i++) {
+ if (i >= instance->cpu_count)
+ delete_temp_file(instance, i);
+ put_temp_file(temp_files[i]);
+ }
+}
+
+static void
+add_guest_info(struct tracecmd_output *handle, struct buffer_instance *instance)
+{
+ struct trace_guest *guest;
+ const char *name;
+ char *buf, *p;
+ int size;
+ int pid;
+ int i;
+
+ if (is_network(instance)) {
+ name = instance->name;
+ } else {
+ guest = trace_get_guest(instance->cid, NULL);
+ if (!guest)
+ return;
+ name = guest->name;
+ }
+
+ size = strlen(name) + 1;
+ size += sizeof(long long); /* trace_id */
+ size += sizeof(int); /* cpu count */
+ size += instance->cpu_count * 2 * sizeof(int); /* cpu,pid pair */
+
+ buf = calloc(1, size);
+ if (!buf)
+ return;
+ p = buf;
+ strcpy(p, name);
+ p += strlen(name) + 1;
+
+ memcpy(p, &instance->trace_id, sizeof(long long));
+ p += sizeof(long long);
+
+ memcpy(p, &instance->cpu_count, sizeof(int));
+ p += sizeof(int);
+ for (i = 0; i < instance->cpu_count; i++) {
+ pid = -1;
+ if (!is_network(instance)) {
+ if (i < guest->cpu_max)
+ pid = guest->cpu_pid[i];
+ }
+ memcpy(p, &i, sizeof(int));
+ p += sizeof(int);
+ memcpy(p, &pid, sizeof(int));
+ p += sizeof(int);
+ }
+
+ tracecmd_add_option(handle, TRACECMD_OPTION_GUEST, size, buf);
+ free(buf);
+}
+
+static void
+add_pid_maps(struct tracecmd_output *handle, struct buffer_instance *instance)
+{
+ struct pid_addr_maps *maps = instance->pid_maps;
+ struct trace_seq s;
+ int i;
+
+ trace_seq_init(&s);
+ while (maps) {
+ if (!maps->nr_lib_maps) {
+ maps = maps->next;
+ continue;
+ }
+ trace_seq_reset(&s);
+ trace_seq_printf(&s, "%x %x %s\n",
+ maps->pid, maps->nr_lib_maps, maps->proc_name);
+ for (i = 0; i < maps->nr_lib_maps; i++)
+ trace_seq_printf(&s, "%llx %llx %s\n",
+ maps->lib_maps[i].start,
+ maps->lib_maps[i].end,
+ maps->lib_maps[i].lib_name);
+ trace_seq_terminate(&s);
+ tracecmd_add_option(handle, TRACECMD_OPTION_PROCMAPS,
+ s.len + 1, s.buffer);
+ maps = maps->next;
+ }
+ trace_seq_destroy(&s);
+}
+
+static void
+add_trace_id(struct tracecmd_output *handle, struct buffer_instance *instance)
+{
+ tracecmd_add_option(handle, TRACECMD_OPTION_TRACEID,
+ sizeof(long long), &instance->trace_id);
+}
+
+static void
+add_buffer_stat(struct tracecmd_output *handle, struct buffer_instance *instance)
+{
+ struct trace_seq s;
+ int i;
+
+ trace_seq_init(&s);
+ trace_seq_printf(&s, "\nBuffer: %s\n\n",
+ tracefs_instance_get_name(instance->tracefs));
+ tracecmd_add_option(handle, TRACECMD_OPTION_CPUSTAT,
+ s.len+1, s.buffer);
+ trace_seq_destroy(&s);
+
+ for (i = 0; i < instance->cpu_count; i++)
+ tracecmd_add_option(handle, TRACECMD_OPTION_CPUSTAT,
+ instance->s_save[i].len+1,
+ instance->s_save[i].buffer);
+}
+
+static void add_option_hooks(struct tracecmd_output *handle)
+{
+ struct hook_list *hook;
+ int len;
+
+ for (hook = hooks; hook; hook = hook->next) {
+ len = strlen(hook->hook);
+ tracecmd_add_option(handle, TRACECMD_OPTION_HOOK,
+ len + 1, hook->hook);
+ }
+}
+
+static void add_uname(struct tracecmd_output *handle)
+{
+ struct utsname buf;
+ char *str;
+ int len;
+ int ret;
+
+ ret = uname(&buf);
+ /* if this fails for some reason, just ignore it */
+ if (ret < 0)
+ return;
+
+ len = strlen(buf.sysname) + strlen(buf.nodename) +
+ strlen(buf.release) + strlen(buf.machine) + 4;
+ str = malloc(len);
+ if (!str)
+ return;
+ sprintf(str, "%s %s %s %s", buf.sysname, buf.nodename, buf.release, buf.machine);
+ tracecmd_add_option(handle, TRACECMD_OPTION_UNAME, len, str);
+ free(str);
+}
+
+static void add_version(struct tracecmd_output *handle)
+{
+ char *str;
+ int len;
+
+ len = asprintf(&str, "%s %s", VERSION_STRING, VERSION_GIT);
+ if (len < 0)
+ return;
+
+ tracecmd_add_option(handle, TRACECMD_OPTION_VERSION, len+1, str);
+ free(str);
+}
+
+static void print_stat(struct buffer_instance *instance)
+{
+ int cpu;
+
+ if (quiet)
+ return;
+
+ if (!is_top_instance(instance))
+ printf("\nBuffer: %s\n\n",
+ tracefs_instance_get_name(instance->tracefs));
+
+ for (cpu = 0; cpu < instance->cpu_count; cpu++)
+ trace_seq_do_printf(&instance->s_print[cpu]);
+}
+
+static char *get_trace_clock(bool selected)
+{
+ struct buffer_instance *instance;
+
+ for_all_instances(instance) {
+ if (is_guest(instance))
+ continue;
+ break;
+ }
+
+ if (selected)
+ return tracefs_get_clock(instance ? instance->tracefs : NULL);
+ else
+ return tracefs_instance_file_read(instance ? instance->tracefs : NULL,
+ "trace_clock", NULL);
+}
+
+enum {
+ DATA_FL_NONE = 0,
+ DATA_FL_DATE = 1,
+ DATA_FL_OFFSET = 2,
+ DATA_FL_GUEST = 4,
+};
+
+static void add_options(struct tracecmd_output *handle, struct common_record_context *ctx)
+{
+ int type = 0;
+ char *clocks;
+
+ if (ctx->date2ts) {
+ if (ctx->data_flags & DATA_FL_DATE)
+ type = TRACECMD_OPTION_DATE;
+ else if (ctx->data_flags & DATA_FL_OFFSET)
+ type = TRACECMD_OPTION_OFFSET;
+ }
+
+ if (type)
+ tracecmd_add_option(handle, type, strlen(ctx->date2ts)+1, ctx->date2ts);
+
+ clocks = get_trace_clock(false);
+ tracecmd_add_option(handle, TRACECMD_OPTION_TRACECLOCK,
+ clocks ? strlen(clocks)+1 : 0, clocks);
+ add_option_hooks(handle);
+ add_uname(handle);
+ add_version(handle);
+ if (!no_top_instance())
+ add_trace_id(handle, &top_instance);
+ free(clocks);
+}
+
+static void write_guest_file(struct buffer_instance *instance)
+{
+ struct tracecmd_output *handle;
+ int cpu_count = instance->cpu_count;
+ char *file;
+ char **temp_files;
+ int i, fd;
+
+ file = instance->output_file;
+ fd = open(file, O_RDWR);
+ if (fd < 0)
+ die("error opening %s", file);
+
+ handle = tracecmd_get_output_handle_fd(fd);
+ if (!handle)
+ die("error writing to %s", file);
+ if (instance->flags & BUFFER_FL_TSC2NSEC)
+ tracecmd_set_out_clock(handle, TSCNSEC_CLOCK);
+ temp_files = malloc(sizeof(*temp_files) * cpu_count);
+ if (!temp_files)
+ die("failed to allocate temp_files for %d cpus",
+ cpu_count);
+
+ for (i = 0; i < cpu_count; i++) {
+ temp_files[i] = get_temp_file(instance, i);
+ if (!temp_files[i])
+ die("failed to allocate memory");
+ }
+
+ if (tracecmd_write_cpu_data(handle, cpu_count, temp_files, NULL) < 0)
+ die("failed to write CPU data");
+ tracecmd_output_close(handle);
+
+ for (i = 0; i < cpu_count; i++)
+ put_temp_file(temp_files[i]);
+ free(temp_files);
+}
+
+static struct tracecmd_output *create_output(struct common_record_context *ctx)
+{
+ struct tracecmd_output *out;
+
+ if (!ctx->output)
+ return NULL;
+
+ out = tracecmd_output_create(ctx->output);
+ if (!out)
+ goto error;
+ if (ctx->file_version && tracecmd_output_set_version(out, ctx->file_version))
+ goto error;
+
+ if (ctx->compression) {
+ if (tracecmd_output_set_compression(out, ctx->compression))
+ goto error;
+ } else if (ctx->file_version >= FILE_VERSION_COMPRESSION) {
+ tracecmd_output_set_compression(out, "any");
+ }
+
+ if (tracecmd_output_write_headers(out, listed_events))
+ goto error;
+
+ return out;
+error:
+ if (out)
+ tracecmd_output_close(out);
+ unlink(ctx->output);
+ return NULL;
+}
+
+static void record_data(struct common_record_context *ctx)
+{
+ struct tracecmd_output *handle;
+ struct buffer_instance *instance;
+ bool local = false;
+ int max_cpu_count = local_cpu_count;
+ char **temp_files;
+ int i;
+
+ for_all_instances(instance) {
+ if (is_guest(instance))
+ write_guest_file(instance);
+ else if (host && instance->msg_handle)
+ finish_network(instance->msg_handle);
+ else
+ local = true;
+ }
+
+ if (!local)
+ return;
+
+ if (latency) {
+ handle = tracecmd_create_file_latency(ctx->output, local_cpu_count,
+ ctx->file_version, ctx->compression);
+ tracecmd_set_quiet(handle, quiet);
+ } else {
+ if (!local_cpu_count)
+ return;
+
+ /* Allocate enough temp files to handle each instance */
+ for_all_instances(instance) {
+ if (instance->msg_handle)
+ continue;
+ if (instance->cpu_count > max_cpu_count)
+ max_cpu_count = instance->cpu_count;
+ }
+
+ temp_files = malloc(sizeof(*temp_files) * max_cpu_count);
+ if (!temp_files)
+ die("Failed to allocate temp_files for %d cpus",
+ local_cpu_count);
+
+ for (i = 0; i < max_cpu_count; i++)
+ temp_files[i] = get_temp_file(&top_instance, i);
+
+ /*
+ * If top_instance was not used, we still need to create
+ * empty trace.dat files for it.
+ */
+ if (no_top_instance() || top_instance.msg_handle) {
+ for (i = 0; i < local_cpu_count; i++)
+ touch_file(temp_files[i]);
+ }
+
+ handle = create_output(ctx);
+ if (!handle)
+ die("Error creating output file");
+ tracecmd_set_quiet(handle, quiet);
+
+ add_options(handle, ctx);
+
+ /* Only record the top instance under TRACECMD_OPTION_CPUSTAT*/
+ if (!no_top_instance() && !top_instance.msg_handle) {
+ struct trace_seq *s = top_instance.s_save;
+
+ for (i = 0; i < local_cpu_count; i++)
+ tracecmd_add_option(handle, TRACECMD_OPTION_CPUSTAT,
+ s[i].len+1, s[i].buffer);
+ }
+
+ if (buffers) {
+ i = 0;
+ for_each_instance(instance) {
+ int cpus = instance->cpu_count != local_cpu_count ?
+ instance->cpu_count : 0;
+
+ if (instance->msg_handle)
+ continue;
+ tracecmd_add_buffer_info(handle,
+ tracefs_instance_get_name(instance->tracefs),
+ cpus);
+ add_buffer_stat(handle, instance);
+ }
+ }
+
+ if (!no_top_instance() && !top_instance.msg_handle)
+ print_stat(&top_instance);
+
+ for_all_instances(instance) {
+ add_pid_maps(handle, instance);
+ }
+
+ for_all_instances(instance) {
+ if (is_guest(instance))
+ add_guest_info(handle, instance);
+ }
+
+ if (ctx->tsc2nsec.mult) {
+ add_tsc2nsec(handle, &ctx->tsc2nsec);
+ tracecmd_set_out_clock(handle, TSCNSEC_CLOCK);
+ }
+ if (tracecmd_write_cmdlines(handle))
+ die("Writing cmdlines");
+
+ tracecmd_append_cpu_data(handle, local_cpu_count, temp_files);
+
+ for (i = 0; i < max_cpu_count; i++)
+ put_temp_file(temp_files[i]);
+
+ if (buffers) {
+ i = 0;
+ for_each_instance(instance) {
+ if (instance->msg_handle)
+ continue;
+ print_stat(instance);
+ append_buffer(handle, instance, temp_files);
+ }
+ }
+
+ free(temp_files);
+ }
+ if (!handle)
+ die("could not write to file");
+ tracecmd_output_close(handle);
+}
+
+enum filter_type {
+ FUNC_FILTER,
+ FUNC_NOTRACE,
+};
+
+static int filter_command(struct tracefs_instance *instance, const char *cmd)
+{
+ return tracefs_instance_file_append(instance, "set_ftrace_filter", cmd);
+}
+
+static int write_func_filter(enum filter_type type, struct buffer_instance *instance,
+ struct func_list **list)
+{
+ struct func_list *item, *cmds = NULL;
+ const char *file;
+ int ret = -1;
+ int (*filter_function)(struct tracefs_instance *instance, const char *filter,
+ const char *module, unsigned int flags);
+
+ if (!*list)
+ return 0;
+
+ switch (type) {
+ case FUNC_FILTER:
+ filter_function = tracefs_function_filter;
+ file = "set_ftrace_filter";
+ break;
+ case FUNC_NOTRACE:
+ filter_function = tracefs_function_notrace;
+ file = "set_ftrace_notrace";
+ break;
+ }
+
+ ret = filter_function(instance->tracefs, NULL, NULL,
+ TRACEFS_FL_RESET | TRACEFS_FL_CONTINUE);
+ if (ret < 0)
+ return ret;
+
+ while (*list) {
+ item = *list;
+ *list = item->next;
+ /* Do commands separately at the end */
+ if (type == FUNC_FILTER && strstr(item->func, ":")) {
+ item->next = cmds;
+ cmds = item;
+ continue;
+ }
+ ret = filter_function(instance->tracefs, item->func, item->mod,
+ TRACEFS_FL_CONTINUE);
+ if (ret < 0)
+ goto failed;
+ free(item);
+ }
+ ret = filter_function(instance->tracefs, NULL, NULL, 0);
+
+ /* Now add any commands */
+ while (cmds) {
+ item = cmds;
+ cmds = item->next;
+ ret = filter_command(instance->tracefs, item->func);
+ if (ret < 0)
+ goto failed;
+ free(item);
+ }
+ return ret;
+ failed:
+ die("Failed to write %s to %s.\n"
+ "Perhaps this function is not available for tracing.\n"
+ "run 'trace-cmd list -f %s' to see if it is.",
+ item->func, file, item->func);
+ return ret;
+}
+
+static int write_func_file(struct buffer_instance *instance,
+ const char *file, struct func_list **list)
+{
+ struct func_list *item;
+ const char *prefix = ":mod:";
+ char *path;
+ int fd;
+ int ret = -1;
+
+ if (!*list)
+ return 0;
+
+ path = tracefs_instance_get_file(instance->tracefs, file);
+
+ fd = open(path, O_WRONLY | O_TRUNC);
+ if (fd < 0)
+ goto free;
+
+ while (*list) {
+ item = *list;
+ *list = item->next;
+ ret = write(fd, item->func, strlen(item->func));
+ if (ret < 0)
+ goto failed;
+ if (item->mod) {
+ ret = write(fd, prefix, strlen(prefix));
+ if (ret < 0)
+ goto failed;
+ ret = write(fd, item->mod, strlen(item->mod));
+ if (ret < 0)
+ goto failed;
+ }
+ ret = write(fd, " ", 1);
+ if (ret < 0)
+ goto failed;
+ free(item);
+ }
+ close(fd);
+ ret = 0;
+ free:
+ tracefs_put_tracing_file(path);
+ return ret;
+ failed:
+ die("Failed to write %s to %s.\n"
+ "Perhaps this function is not available for tracing.\n"
+ "run 'trace-cmd list -f %s' to see if it is.",
+ item->func, file, item->func);
+ return ret;
+}
+
+static int functions_filtered(struct buffer_instance *instance)
+{
+ char buf[1] = { '#' };
+ char *path;
+ int fd;
+
+ path = tracefs_instance_get_file(instance->tracefs, "set_ftrace_filter");
+ fd = open(path, O_RDONLY);
+ tracefs_put_tracing_file(path);
+ if (fd < 0) {
+ if (is_top_instance(instance))
+ warning("Can not set set_ftrace_filter");
+ else
+ warning("Can not set set_ftrace_filter for %s",
+ tracefs_instance_get_name(instance->tracefs));
+ return 0;
+ }
+
+ /*
+ * If functions are not filtered, than the first character
+ * will be '#'. Make sure it is not an '#' and also not space.
+ */
+ read(fd, buf, 1);
+ close(fd);
+
+ if (buf[0] == '#' || isspace(buf[0]))
+ return 0;
+ return 1;
+}
+
+static void set_funcs(struct buffer_instance *instance)
+{
+ int set_notrace = 0;
+ int ret;
+
+ if (is_guest(instance))
+ return;
+
+ ret = write_func_filter(FUNC_FILTER, instance, &instance->filter_funcs);
+ if (ret < 0)
+ die("set_ftrace_filter does not exist. Can not filter functions");
+
+ /* graph tracing currently only works for top instance */
+ if (is_top_instance(instance)) {
+ ret = write_func_file(instance, "set_graph_function", &graph_funcs);
+ if (ret < 0)
+ die("set_graph_function does not exist.");
+ if (instance->plugin && strcmp(instance->plugin, "function_graph") == 0) {
+ ret = write_func_file(instance, "set_graph_notrace",
+ &instance->notrace_funcs);
+ if (!ret)
+ set_notrace = 1;
+ }
+ if (!set_notrace) {
+ ret = write_func_filter(FUNC_NOTRACE, instance,
+ &instance->notrace_funcs);
+ if (ret < 0)
+ die("set_ftrace_notrace does not exist. Can not filter functions");
+ }
+ } else
+ write_func_filter(FUNC_NOTRACE, instance, &instance->notrace_funcs);
+
+ /* make sure we are filtering functions */
+ if (func_stack && is_top_instance(instance)) {
+ if (!functions_filtered(instance))
+ die("Function stack trace set, but functions not filtered");
+ save_option(instance, FUNC_STACK_TRACE);
+ }
+ clear_function_filters = 1;
+}
+
+static void add_func(struct func_list **list, const char *mod, const char *func)
+{
+ struct func_list *item;
+
+ item = malloc(sizeof(*item));
+ if (!item)
+ die("Failed to allocate function descriptor");
+ item->func = func;
+ item->mod = mod;
+ item->next = *list;
+ *list = item;
+}
+
+static int find_ts(struct tep_event *event, struct tep_record *record,
+ int cpu, void *context)
+{
+ unsigned long long *ts = (unsigned long long *)context;
+ struct tep_format_field *field;
+
+ if (!ts)
+ return -1;
+
+ field = tep_find_field(event, "buf");
+ if (field && strcmp(STAMP"\n", record->data + field->offset) == 0) {
+ *ts = record->ts;
+ return 1;
+ }
+
+ return 0;
+}
+
+static unsigned long long find_time_stamp(struct tep_handle *tep,
+ struct tracefs_instance *instance)
+{
+ unsigned long long ts = 0;
+
+ if (!tracefs_iterate_raw_events(tep, instance, NULL, 0, find_ts, &ts))
+ return ts;
+
+ return 0;
+}
+
+
+static char *read_top_file(char *file, int *psize)
+{
+ return tracefs_instance_file_read(top_instance.tracefs, file, psize);
+}
+
+static struct tep_handle *get_ftrace_tep(void)
+{
+ const char *systems[] = {"ftrace", NULL};
+ struct tep_handle *tep;
+ char *buf;
+ int size;
+ int ret;
+
+ tep = tracefs_local_events_system(NULL, systems);
+ if (!tep)
+ return NULL;
+ tep_set_file_bigendian(tep, tracecmd_host_bigendian());
+ buf = read_top_file("events/header_page", &size);
+ if (!buf)
+ goto error;
+ ret = tep_parse_header_page(tep, buf, size, sizeof(unsigned long));
+ free(buf);
+ if (ret < 0)
+ goto error;
+
+ return tep;
+
+error:
+ tep_free(tep);
+ return NULL;
+}
+
+/*
+ * Try to write the date into the ftrace buffer and then
+ * read it back, mapping the timestamp to the date.
+ */
+static char *get_date_to_ts(void)
+{
+ struct tep_handle *tep;
+ unsigned long long min = -1ULL;
+ unsigned long long diff;
+ unsigned long long stamp;
+ unsigned long long min_stamp;
+ unsigned long long min_ts;
+ unsigned long long ts;
+ struct timespec start;
+ struct timespec end;
+ char *date2ts = NULL;
+ int tfd;
+ int i;
+
+ /* Set up a tep to read the raw format */
+ tep = get_ftrace_tep();
+ if (!tep) {
+ warning("failed to alloc tep, --date ignored");
+ return NULL;
+ }
+ tfd = tracefs_instance_file_open(NULL, "trace_marker", O_WRONLY);
+ if (tfd < 0) {
+ warning("Can not open 'trace_marker', --date ignored");
+ goto out_pevent;
+ }
+
+ for (i = 0; i < date2ts_tries; i++) {
+ tracecmd_disable_tracing();
+ clear_trace_instances();
+ tracecmd_enable_tracing();
+
+ clock_gettime(CLOCK_REALTIME, &start);
+ write(tfd, STAMP, 5);
+ clock_gettime(CLOCK_REALTIME, &end);
+
+ tracecmd_disable_tracing();
+ ts = find_time_stamp(tep, NULL);
+ if (!ts)
+ continue;
+
+ diff = (unsigned long long)end.tv_sec * 1000000000LL;
+ diff += (unsigned long long)end.tv_nsec;
+ stamp = diff;
+ diff -= (unsigned long long)start.tv_sec * 1000000000LL;
+ diff -= (unsigned long long)start.tv_nsec;
+
+ if (diff < min) {
+ min_ts = ts;
+ min_stamp = stamp - diff / 2;
+ min = diff;
+ }
+ }
+
+ close(tfd);
+
+ if (min == -1ULL) {
+ warning("Failed to make date offset, --date ignored");
+ goto out_pevent;
+ }
+
+ /* 16 hex chars + 0x + \0 */
+ date2ts = malloc(19);
+ if (!date2ts)
+ goto out_pevent;
+
+ /*
+ * The difference between the timestamp and the gtod is
+ * stored as an ASCII string in hex.
+ */
+ diff = min_stamp - min_ts;
+ snprintf(date2ts, 19, "0x%llx", diff/1000);
+ out_pevent:
+ tep_free(tep);
+
+ return date2ts;
+}
+
+static void set_buffer_size_instance(struct buffer_instance *instance)
+{
+ int buffer_size = instance->buffer_size;
+ char buf[BUFSIZ];
+ char *path;
+ int ret;
+ int fd;
+
+ if (is_guest(instance))
+ return;
+
+ if (!buffer_size)
+ return;
+
+ if (buffer_size < 0)
+ die("buffer size must be positive");
+
+ snprintf(buf, BUFSIZ, "%d", buffer_size);
+
+ path = tracefs_instance_get_file(instance->tracefs, "buffer_size_kb");
+ fd = open(path, O_WRONLY);
+ if (fd < 0) {
+ warning("can't open %s", path);
+ goto out;
+ }
+
+ ret = write(fd, buf, strlen(buf));
+ if (ret < 0)
+ warning("Can't write to %s", path);
+ close(fd);
+ out:
+ tracefs_put_tracing_file(path);
+}
+
+void set_buffer_size(void)
+{
+ struct buffer_instance *instance;
+
+ for_all_instances(instance)
+ set_buffer_size_instance(instance);
+}
+
+static int
+process_event_trigger(char *path, struct event_iter *iter)
+{
+ const char *system = iter->system_dent->d_name;
+ const char *event = iter->event_dent->d_name;
+ struct stat st;
+ char *trigger = NULL;
+ char *file;
+ int ret;
+
+ path = append_file(path, system);
+ file = append_file(path, event);
+ free(path);
+
+ ret = stat(file, &st);
+ if (ret < 0 || !S_ISDIR(st.st_mode))
+ goto out;
+
+ trigger = append_file(file, "trigger");
+
+ ret = stat(trigger, &st);
+ if (ret < 0)
+ goto out;
+
+ ret = clear_trigger(trigger);
+ out:
+ free(trigger);
+ free(file);
+ return ret;
+}
+
+static void clear_instance_triggers(struct buffer_instance *instance)
+{
+ enum event_iter_type type;
+ struct event_iter *iter;
+ char *system;
+ char *path;
+ int retry = 0;
+ int ret;
+
+ path = tracefs_instance_get_file(instance->tracefs, "events");
+ if (!path)
+ die("malloc");
+
+ iter = trace_event_iter_alloc(path);
+
+ system = NULL;
+ while ((type = trace_event_iter_next(iter, path, system))) {
+
+ if (type == EVENT_ITER_SYSTEM) {
+ system = iter->system_dent->d_name;
+ continue;
+ }
+
+ ret = process_event_trigger(path, iter);
+ if (ret > 0)
+ retry++;
+ }
+
+ trace_event_iter_free(iter);
+
+ if (retry) {
+ int i;
+
+ /* Order matters for some triggers */
+ for (i = 0; i < retry; i++) {
+ int tries = 0;
+
+ iter = trace_event_iter_alloc(path);
+ system = NULL;
+ while ((type = trace_event_iter_next(iter, path, system))) {
+
+ if (type == EVENT_ITER_SYSTEM) {
+ system = iter->system_dent->d_name;
+ continue;
+ }
+
+ ret = process_event_trigger(path, iter);
+ if (ret > 0)
+ tries++;
+ }
+ trace_event_iter_free(iter);
+ if (!tries)
+ break;
+ }
+ }
+
+ tracefs_put_tracing_file(path);
+}
+
+static void
+process_event_filter(char *path, struct event_iter *iter, enum event_process *processed)
+{
+ const char *system = iter->system_dent->d_name;
+ const char *event = iter->event_dent->d_name;
+ struct stat st;
+ char *filter = NULL;
+ char *file;
+ int ret;
+
+ path = append_file(path, system);
+ file = append_file(path, event);
+ free(path);
+
+ ret = stat(file, &st);
+ if (ret < 0 || !S_ISDIR(st.st_mode))
+ goto out;
+
+ filter = append_file(file, "filter");
+
+ ret = stat(filter, &st);
+ if (ret < 0)
+ goto out;
+
+ clear_filter(filter);
+ out:
+ free(filter);
+ free(file);
+}
+
+static void clear_instance_filters(struct buffer_instance *instance)
+{
+ struct event_iter *iter;
+ char *path;
+ char *system;
+ enum event_iter_type type;
+ enum event_process processed = PROCESSED_NONE;
+
+ path = tracefs_instance_get_file(instance->tracefs, "events");
+ if (!path)
+ die("malloc");
+
+ iter = trace_event_iter_alloc(path);
+
+ processed = PROCESSED_NONE;
+ system = NULL;
+ while ((type = trace_event_iter_next(iter, path, system))) {
+
+ if (type == EVENT_ITER_SYSTEM) {
+ system = iter->system_dent->d_name;
+ continue;
+ }
+
+ process_event_filter(path, iter, &processed);
+ }
+
+ trace_event_iter_free(iter);
+
+ tracefs_put_tracing_file(path);
+}
+
+static void clear_filters(void)
+{
+ struct buffer_instance *instance;
+
+ for_all_instances(instance)
+ clear_instance_filters(instance);
+}
+
+static void reset_clock(void)
+{
+ struct buffer_instance *instance;
+
+ for_all_instances(instance)
+ tracefs_instance_file_write(instance->tracefs,
+ "trace_clock", "local");
+}
+
+static void reset_cpu_mask(void)
+{
+ struct buffer_instance *instance;
+ int cpus = tracecmd_count_cpus();
+ int fullwords = (cpus - 1) / 32;
+ int bits = (cpus - 1) % 32 + 1;
+ int len = (fullwords + 1) * 9;
+ char buf[len + 1];
+
+ buf[0] = '\0';
+
+ sprintf(buf, "%x", (unsigned int)((1ULL << bits) - 1));
+ while (fullwords-- > 0)
+ strcat(buf, ",ffffffff");
+
+ for_all_instances(instance)
+ tracefs_instance_file_write(instance->tracefs,
+ "tracing_cpumask", buf);
+}
+
+static void reset_event_pid(void)
+{
+ struct buffer_instance *instance;
+
+ for_all_instances(instance)
+ add_event_pid(instance, "");
+}
+
+static void clear_triggers(void)
+{
+ struct buffer_instance *instance;
+
+ for_all_instances(instance)
+ clear_instance_triggers(instance);
+}
+
+static void clear_instance_error_log(struct buffer_instance *instance)
+{
+ char *file;
+
+ if (!tracefs_file_exists(instance->tracefs, "error_log"))
+ return;
+
+ file = tracefs_instance_get_file(instance->tracefs, "error_log");
+ if (!file)
+ return;
+ write_file(file, " ");
+ tracefs_put_tracing_file(file);
+}
+
+static void clear_error_log(void)
+{
+ struct buffer_instance *instance;
+
+ for_all_instances(instance)
+ clear_instance_error_log(instance);
+}
+
+static void clear_all_dynamic_events(void)
+{
+ /* Clear event probes first, as they may be attached to other dynamic event */
+ tracefs_dynevent_destroy_all(TRACEFS_DYNEVENT_EPROBE, true);
+ tracefs_dynevent_destroy_all(TRACEFS_DYNEVENT_ALL, true);
+}
+
+static void clear_func_filters(void)
+{
+ struct buffer_instance *instance;
+ char *path;
+ int i;
+ const char * const files[] = { "set_ftrace_filter",
+ "set_ftrace_notrace",
+ "set_graph_function",
+ "set_graph_notrace",
+ NULL };
+
+ for_all_instances(instance) {
+ for (i = 0; files[i]; i++) {
+ path = tracefs_instance_get_file(instance->tracefs, files[i]);
+ clear_func_filter(path);
+ tracefs_put_tracing_file(path);
+ }
+ }
+}
+
+static void make_instances(void)
+{
+ struct buffer_instance *instance;
+
+ for_each_instance(instance) {
+ if (is_guest(instance))
+ continue;
+ if (instance->name && !instance->tracefs) {
+ instance->tracefs = tracefs_instance_create(instance->name);
+ /* Don't delete instances that already exist */
+ if (instance->tracefs && !tracefs_instance_is_new(instance->tracefs))
+ instance->flags |= BUFFER_FL_KEEP;
+ }
+ }
+}
+
+void tracecmd_remove_instances(void)
+{
+ struct buffer_instance *instance;
+
+ for_each_instance(instance) {
+ /* Only delete what we created */
+ if (is_guest(instance) || (instance->flags & BUFFER_FL_KEEP))
+ continue;
+ if (instance->tracing_on_fd > 0) {
+ close(instance->tracing_on_fd);
+ instance->tracing_on_fd = 0;
+ }
+ tracefs_instance_destroy(instance->tracefs);
+ }
+}
+
+static void check_plugin(const char *plugin)
+{
+ char *buf;
+ char *str;
+ char *tok;
+
+ /*
+ * nop is special. We may want to just trace
+ * trace_printks, that are in the kernel.
+ */
+ if (strcmp(plugin, "nop") == 0)
+ return;
+
+ buf = read_top_file("available_tracers", NULL);
+ if (!buf)
+ die("No plugins available");
+
+ str = buf;
+ while ((tok = strtok(str, " "))) {
+ str = NULL;
+ if (strcmp(tok, plugin) == 0)
+ goto out;
+ }
+ die ("Plugin '%s' does not exist", plugin);
+ out:
+ if (!quiet)
+ fprintf(stderr, " plugin '%s'\n", plugin);
+ free(buf);
+}
+
+static void check_function_plugin(void)
+{
+ const char *plugin;
+
+ /* We only care about the top_instance */
+ if (no_top_instance())
+ return;
+
+ plugin = top_instance.plugin;
+ if (!plugin)
+ return;
+
+ if (plugin && strncmp(plugin, "function", 8) == 0 &&
+ func_stack && !top_instance.filter_funcs)
+ die("Must supply function filtering with --func-stack\n");
+}
+
+static int __check_doing_something(struct buffer_instance *instance)
+{
+ return is_guest(instance) || (instance->flags & BUFFER_FL_PROFILE) ||
+ instance->plugin || instance->events || instance->get_procmap;
+}
+
+static void check_doing_something(void)
+{
+ struct buffer_instance *instance;
+
+ for_all_instances(instance) {
+ if (__check_doing_something(instance))
+ return;
+ }
+
+ die("no event or plugin was specified... aborting");
+}
+
+static void
+update_plugin_instance(struct buffer_instance *instance,
+ enum trace_type type)
+{
+ const char *plugin = instance->plugin;
+
+ if (is_guest(instance))
+ return;
+
+ if (!plugin)
+ return;
+
+ check_plugin(plugin);
+
+ /*
+ * Latency tracers just save the trace and kill
+ * the threads.
+ */
+ if (strcmp(plugin, "irqsoff") == 0 ||
+ strcmp(plugin, "preemptoff") == 0 ||
+ strcmp(plugin, "preemptirqsoff") == 0 ||
+ strcmp(plugin, "wakeup") == 0 ||
+ strcmp(plugin, "wakeup_rt") == 0) {
+ latency = 1;
+ if (host)
+ die("Network tracing not available with latency tracer plugins");
+ if (type & TRACE_TYPE_STREAM)
+ die("Streaming is not available with latency tracer plugins");
+ } else if (type == TRACE_TYPE_RECORD) {
+ if (latency)
+ die("Can not record latency tracer and non latency trace together");
+ }
+
+ if (fset < 0 && (strcmp(plugin, "function") == 0 ||
+ strcmp(plugin, "function_graph") == 0))
+ die("function tracing not configured on this kernel");
+
+ if (type != TRACE_TYPE_EXTRACT)
+ set_plugin_instance(instance, plugin);
+}
+
+static void update_plugins(enum trace_type type)
+{
+ struct buffer_instance *instance;
+
+ for_all_instances(instance)
+ update_plugin_instance(instance, type);
+}
+
+static void allocate_seq(void)
+{
+ struct buffer_instance *instance;
+
+ for_all_instances(instance) {
+ instance->s_save = malloc(sizeof(struct trace_seq) * instance->cpu_count);
+ instance->s_print = malloc(sizeof(struct trace_seq) * instance->cpu_count);
+ if (!instance->s_save || !instance->s_print)
+ die("Failed to allocate instance info");
+ }
+}
+
+/* Find the overrun output, and add it to the print seq */
+static void add_overrun(int cpu, struct trace_seq *src, struct trace_seq *dst)
+{
+ const char overrun_str[] = "overrun: ";
+ const char commit_overrun_str[] = "commit overrun: ";
+ const char *p;
+ int overrun;
+ int commit_overrun;
+
+ p = strstr(src->buffer, overrun_str);
+ if (!p) {
+ /* Warn? */
+ trace_seq_printf(dst, "CPU %d: no overrun found?\n", cpu);
+ return;
+ }
+
+ overrun = atoi(p + strlen(overrun_str));
+
+ p = strstr(p + 9, commit_overrun_str);
+ if (p)
+ commit_overrun = atoi(p + strlen(commit_overrun_str));
+ else
+ commit_overrun = -1;
+
+ if (!overrun && !commit_overrun)
+ return;
+
+ trace_seq_printf(dst, "CPU %d:", cpu);
+
+ if (overrun)
+ trace_seq_printf(dst, " %d events lost", overrun);
+
+ if (commit_overrun)
+ trace_seq_printf(dst, " %d events lost due to commit overrun",
+ commit_overrun);
+
+ trace_seq_putc(dst, '\n');
+}
+
+static void record_stats(void)
+{
+ struct buffer_instance *instance;
+ struct trace_seq *s_save;
+ struct trace_seq *s_print;
+ int cpu;
+
+ for_all_instances(instance) {
+ if (is_guest(instance))
+ continue;
+
+ s_save = instance->s_save;
+ s_print = instance->s_print;
+ for (cpu = 0; cpu < instance->cpu_count; cpu++) {
+ trace_seq_init(&s_save[cpu]);
+ trace_seq_init(&s_print[cpu]);
+ trace_seq_printf(&s_save[cpu], "CPU: %d\n", cpu);
+ tracecmd_stat_cpu_instance(instance, &s_save[cpu], cpu);
+ add_overrun(cpu, &s_save[cpu], &s_print[cpu]);
+ }
+ }
+}
+
+static void print_stats(void)
+{
+ struct buffer_instance *instance;
+
+ for_all_instances(instance)
+ print_stat(instance);
+}
+
+static void destroy_stats(void)
+{
+ struct buffer_instance *instance;
+ int cpu;
+
+ for_all_instances(instance) {
+ if (is_guest(instance))
+ continue;
+
+ for (cpu = 0; cpu < instance->cpu_count; cpu++) {
+ trace_seq_destroy(&instance->s_save[cpu]);
+ trace_seq_destroy(&instance->s_print[cpu]);
+ }
+ }
+}
+
+static void list_event(const char *event)
+{
+ struct tracecmd_event_list *list;
+
+ list = malloc(sizeof(*list));
+ if (!list)
+ die("Failed to allocate list for event");
+ list->next = listed_events;
+ list->glob = event;
+ listed_events = list;
+}
+
+#define ALL_EVENTS "*/*"
+
+static void record_all_events(void)
+{
+ struct tracecmd_event_list *list;
+
+ while (listed_events) {
+ list = listed_events;
+ listed_events = list->next;
+ free(list);
+ }
+ list = malloc(sizeof(*list));
+ if (!list)
+ die("Failed to allocate list for all events");
+ list->next = NULL;
+ list->glob = ALL_EVENTS;
+ listed_events = list;
+}
+
+static int recording_all_events(void)
+{
+ return listed_events && strcmp(listed_events->glob, ALL_EVENTS) == 0;
+}
+
+static void add_trigger(struct event_list *event, const char *trigger)
+{
+ int ret;
+
+ if (event->trigger) {
+ event->trigger = realloc(event->trigger,
+ strlen(event->trigger) + strlen("\n") +
+ strlen(trigger) + 1);
+ strcat(event->trigger, "\n");
+ strcat(event->trigger, trigger);
+ } else {
+ ret = asprintf(&event->trigger, "%s", trigger);
+ if (ret < 0)
+ die("Failed to allocate event trigger");
+ }
+}
+
+static int test_stacktrace_trigger(struct buffer_instance *instance)
+{
+ char *path;
+ int ret = 0;
+ int fd;
+
+ path = tracefs_instance_get_file(instance->tracefs,
+ "events/sched/sched_switch/trigger");
+
+ clear_trigger(path);
+
+ fd = open(path, O_WRONLY);
+ if (fd < 0)
+ goto out;
+
+ ret = write(fd, "stacktrace", 10);
+ if (ret != 10)
+ ret = 0;
+ else
+ ret = 1;
+ close(fd);
+ out:
+ tracefs_put_tracing_file(path);
+
+ return ret;
+}
+
+static int
+profile_add_event(struct buffer_instance *instance, const char *event_str, int stack)
+{
+ struct event_list *event;
+ char buf[BUFSIZ];
+ char *p;
+
+ strcpy(buf, "events/");
+ strncpy(buf + 7, event_str, BUFSIZ - 7);
+ buf[BUFSIZ-1] = 0;
+
+ if ((p = strstr(buf, ":"))) {
+ *p = '/';
+ p++;
+ }
+
+ if (!trace_check_file_exists(instance, buf))
+ return -1;
+
+ /* Only add event if it isn't already added */
+ for (event = instance->events; event; event = event->next) {
+ if (p && strcmp(event->event, p) == 0)
+ break;
+ if (strcmp(event->event, event_str) == 0)
+ break;
+ }
+
+ if (!event) {
+ event = malloc(sizeof(*event));
+ if (!event)
+ die("Failed to allocate event");
+ memset(event, 0, sizeof(*event));
+ event->event = event_str;
+ add_event(instance, event);
+ }
+
+ if (!recording_all_events())
+ list_event(event_str);
+
+ if (stack) {
+ if (!event->trigger || !strstr(event->trigger, "stacktrace"))
+ add_trigger(event, "stacktrace");
+ }
+
+ return 0;
+}
+
+int tracecmd_add_event(const char *event_str, int stack)
+{
+ return profile_add_event(first_instance, event_str, stack);
+}
+
+static void enable_profile(struct buffer_instance *instance)
+{
+ int stacktrace = 0;
+ int i;
+ char *trigger_events[] = {
+ "sched:sched_switch",
+ "sched:sched_wakeup",
+ NULL,
+ };
+ char *events[] = {
+ "exceptions:page_fault_user",
+ "irq:irq_handler_entry",
+ "irq:irq_handler_exit",
+ "irq:softirq_entry",
+ "irq:softirq_exit",
+ "irq:softirq_raise",
+ "sched:sched_process_exec",
+ "raw_syscalls",
+ NULL,
+ };
+
+ if (!instance->plugin) {
+ if (trace_check_file_exists(instance, "max_graph_depth")) {
+ instance->plugin = "function_graph";
+ set_max_graph_depth(instance, "1");
+ } else
+ warning("Kernel does not support max_graph_depth\n"
+ " Skipping user/kernel profiling");
+ }
+
+ if (test_stacktrace_trigger(instance))
+ stacktrace = 1;
+ else
+ /*
+ * The stacktrace trigger is not implemented with this
+ * kernel, then we need to default to the stack trace option.
+ * This is less efficient but still works.
+ */
+ save_option(instance, "stacktrace");
+
+
+ for (i = 0; trigger_events[i]; i++)
+ profile_add_event(instance, trigger_events[i], stacktrace);
+
+ for (i = 0; events[i]; i++)
+ profile_add_event(instance, events[i], 0);
+}
+
+static struct event_list *
+create_hook_event(struct buffer_instance *instance,
+ const char *system, const char *event)
+{
+ struct event_list *event_list;
+ char *event_name;
+ int len;
+
+ if (!system)
+ system = "*";
+
+ len = strlen(event);
+ len += strlen(system) + 2;
+
+ event_name = malloc(len);
+ if (!event_name)
+ die("Failed to allocate %s/%s", system, event);
+ sprintf(event_name, "%s:%s", system, event);
+
+ event_list = malloc(sizeof(*event_list));
+ if (!event_list)
+ die("Failed to allocate event list for %s", event_name);
+ memset(event_list, 0, sizeof(*event_list));
+ event_list->event = event_name;
+ add_event(instance, event_list);
+
+ list_event(event_name);
+
+ return event_list;
+}
+
+static void add_hook(struct buffer_instance *instance, const char *arg)
+{
+ struct event_list *event;
+ struct hook_list *hook;
+
+ hook = tracecmd_create_event_hook(arg);
+ if (!hook)
+ die("Failed to create event hook %s", arg);
+
+ hook->instance = instance;
+ hook->next = hooks;
+ hooks = hook;
+
+ /* Make sure the event is enabled */
+ event = create_hook_event(instance, hook->start_system, hook->start_event);
+ create_hook_event(instance, hook->end_system, hook->end_event);
+
+ if (hook->stack) {
+ if (!event->trigger || !strstr(event->trigger, "stacktrace"))
+ add_trigger(event, "stacktrace");
+ }
+}
+
+void update_first_instance(struct buffer_instance *instance, int topt)
+{
+ if (topt || instance == &top_instance)
+ first_instance = &top_instance;
+ else
+ first_instance = buffer_instances;
+}
+
+void init_top_instance(void)
+{
+ if (!top_instance.tracefs)
+ top_instance.tracefs = tracefs_instance_create(NULL);
+ top_instance.cpu_count = tracecmd_count_cpus();
+ top_instance.flags = BUFFER_FL_KEEP;
+ top_instance.trace_id = tracecmd_generate_traceid();
+ init_instance(&top_instance);
+}
+
+enum {
+ OPT_compression = 237,
+ OPT_file_ver = 238,
+ OPT_verbose = 239,
+ OPT_tsc2nsec = 240,
+ OPT_fork = 241,
+ OPT_tsyncinterval = 242,
+ OPT_user = 243,
+ OPT_procmap = 244,
+ OPT_quiet = 245,
+ OPT_debug = 246,
+ OPT_no_filter = 247,
+ OPT_max_graph_depth = 248,
+ OPT_tsoffset = 249,
+ OPT_bycomm = 250,
+ OPT_stderr = 251,
+ OPT_profile = 252,
+ OPT_nosplice = 253,
+ OPT_funcstack = 254,
+ OPT_date = 255,
+ OPT_module = 256,
+ OPT_nofifos = 257,
+ OPT_cmdlines_size = 258,
+ OPT_poll = 259,
+ OPT_name = 260,
+};
+
+void trace_stop(int argc, char **argv)
+{
+ int topt = 0;
+ struct buffer_instance *instance = &top_instance;
+
+ init_top_instance();
+
+ for (;;) {
+ int c;
+
+ c = getopt(argc-1, argv+1, "hatB:");
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'h':
+ usage(argv);
+ break;
+ case 'B':
+ instance = allocate_instance(optarg);
+ if (!instance)
+ die("Failed to create instance");
+ add_instance(instance, local_cpu_count);
+ break;
+ case 'a':
+ add_all_instances();
+ break;
+ case 't':
+ /* Force to use top instance */
+ topt = 1;
+ instance = &top_instance;
+ break;
+ default:
+ usage(argv);
+ }
+ }
+ update_first_instance(instance, topt);
+ tracecmd_disable_tracing();
+ exit(0);
+}
+
+void trace_restart(int argc, char **argv)
+{
+ int topt = 0;
+ struct buffer_instance *instance = &top_instance;
+
+ init_top_instance();
+
+ for (;;) {
+ int c;
+
+ c = getopt(argc-1, argv+1, "hatB:");
+ if (c == -1)
+ break;
+ switch (c) {
+ case 'h':
+ usage(argv);
+ break;
+ case 'B':
+ instance = allocate_instance(optarg);
+ if (!instance)
+ die("Failed to create instance");
+ add_instance(instance, local_cpu_count);
+ break;
+ case 'a':
+ add_all_instances();
+ break;
+ case 't':
+ /* Force to use top instance */
+ topt = 1;
+ instance = &top_instance;
+ break;
+ default:
+ usage(argv);
+ }
+
+ }
+ update_first_instance(instance, topt);
+ tracecmd_enable_tracing();
+ exit(0);
+}
+
+void trace_reset(int argc, char **argv)
+{
+ int c;
+ int topt = 0;
+ struct buffer_instance *instance = &top_instance;
+
+ init_top_instance();
+
+ /* if last arg is -a, then -b and -d apply to all instances */
+ int last_specified_all = 0;
+ struct buffer_instance *inst; /* iterator */
+
+ while ((c = getopt(argc-1, argv+1, "hab:B:td")) >= 0) {
+
+ switch (c) {
+ case 'h':
+ usage(argv);
+ break;
+ case 'b':
+ {
+ int size = atoi(optarg);
+ /* Min buffer size is 1 */
+ if (size <= 1)
+ size = 1;
+ if (last_specified_all) {
+ for_each_instance(inst) {
+ inst->buffer_size = size;
+ }
+ } else {
+ instance->buffer_size = size;
+ }
+ break;
+ }
+ case 'B':
+ last_specified_all = 0;
+ instance = allocate_instance(optarg);
+ if (!instance)
+ die("Failed to create instance");
+ add_instance(instance, local_cpu_count);
+ /* -d will remove keep */
+ instance->flags |= BUFFER_FL_KEEP;
+ break;
+ case 't':
+ /* Force to use top instance */
+ last_specified_all = 0;
+ topt = 1;
+ instance = &top_instance;
+ break;
+ case 'a':
+ last_specified_all = 1;
+ add_all_instances();
+ for_each_instance(inst) {
+ inst->flags |= BUFFER_FL_KEEP;
+ }
+ break;
+ case 'd':
+ if (last_specified_all) {
+ for_each_instance(inst) {
+ inst->flags &= ~BUFFER_FL_KEEP;
+ }
+ } else {
+ if (is_top_instance(instance))
+ die("Can not delete top level buffer");
+ instance->flags &= ~BUFFER_FL_KEEP;
+ }
+ break;
+ }
+ }
+ update_first_instance(instance, topt);
+ tracecmd_disable_all_tracing(1);
+ set_buffer_size();
+ clear_filters();
+ clear_triggers();
+ clear_all_dynamic_events();
+ clear_error_log();
+ /* set clock to "local" */
+ reset_clock();
+ reset_event_pid();
+ reset_max_latency_instance();
+ reset_cpu_mask();
+ tracecmd_remove_instances();
+ clear_func_filters();
+ /* restore tracing_on to 1 */
+ tracecmd_enable_tracing();
+ exit(0);
+}
+
+static void init_common_record_context(struct common_record_context *ctx,
+ enum trace_cmd curr_cmd)
+{
+ memset(ctx, 0, sizeof(*ctx));
+ ctx->instance = &top_instance;
+ ctx->curr_cmd = curr_cmd;
+ local_cpu_count = tracecmd_count_cpus();
+ ctx->file_version = tracecmd_default_file_version();
+ init_top_instance();
+}
+
+#define IS_EXTRACT(ctx) ((ctx)->curr_cmd == CMD_extract)
+#define IS_START(ctx) ((ctx)->curr_cmd == CMD_start)
+#define IS_CMDSET(ctx) ((ctx)->curr_cmd == CMD_set)
+#define IS_STREAM(ctx) ((ctx)->curr_cmd == CMD_stream)
+#define IS_PROFILE(ctx) ((ctx)->curr_cmd == CMD_profile)
+#define IS_RECORD(ctx) ((ctx)->curr_cmd == CMD_record)
+#define IS_RECORD_AGENT(ctx) ((ctx)->curr_cmd == CMD_record_agent)
+
+static void add_argv(struct buffer_instance *instance, char *arg, bool prepend)
+{
+ instance->argv = realloc(instance->argv,
+ (instance->argc + 1) * sizeof(char *));
+ if (!instance->argv)
+ die("Can not allocate instance args");
+ if (prepend) {
+ memmove(instance->argv + 1, instance->argv,
+ instance->argc * sizeof(*instance->argv));
+ instance->argv[0] = arg;
+ } else {
+ instance->argv[instance->argc] = arg;
+ }
+ instance->argc++;
+}
+
+static void add_arg(struct buffer_instance *instance,
+ int c, const char *opts,
+ struct option *long_options, char *optarg)
+{
+ char *ptr, *arg;
+ int i, ret;
+
+ /* Short or long arg */
+ if (!(c & 0x80)) {
+ ptr = strchr(opts, c);
+ if (!ptr)
+ return; /* Not found? */
+ ret = asprintf(&arg, "-%c", c);
+ if (ret < 0)
+ die("Can not allocate argument");
+ add_argv(instance, arg, false);
+ if (ptr[1] == ':') {
+ arg = strdup(optarg);
+ if (!arg)
+ die("Can not allocate arguments");
+ add_argv(instance, arg, false);
+ }
+ return;
+ }
+ for (i = 0; long_options[i].name; i++) {
+ if (c != long_options[i].val)
+ continue;
+ ret = asprintf(&arg, "--%s", long_options[i].name);
+ if (ret < 0)
+ die("Can not allocate argument");
+ add_argv(instance, arg, false);
+ if (long_options[i].has_arg) {
+ arg = strdup(optarg);
+ if (!arg)
+ die("Can not allocate arguments");
+ add_argv(instance, arg, false);
+ }
+ return;
+ }
+ /* Not found? */
+}
+
+static inline void cmd_check_die(struct common_record_context *ctx,
+ enum trace_cmd id, char *cmd, char *param)
+{
+ if (ctx->curr_cmd == id)
+ die("%s has no effect with the command %s\n"
+ "Did you mean 'record'?", param, cmd);
+}
+
+static inline void remove_instances(struct buffer_instance *instances)
+{
+ struct buffer_instance *del;
+
+ while (instances) {
+ del = instances;
+ instances = instances->next;
+ free(del->name);
+ tracefs_instance_destroy(del->tracefs);
+ tracefs_instance_free(del->tracefs);
+ free(del);
+ }
+}
+
+static inline void
+check_instance_die(struct buffer_instance *instance, char *param)
+{
+ if (instance->delete)
+ die("Instance %s is marked for deletion, invalid option %s",
+ tracefs_instance_get_name(instance->tracefs), param);
+}
+
+static bool clock_is_supported(struct tracefs_instance *instance, const char *clock)
+{
+ char *all_clocks = NULL;
+ char *ret = NULL;
+
+ all_clocks = tracefs_instance_file_read(instance, "trace_clock", NULL);
+ if (!all_clocks)
+ return false;
+
+ ret = strstr(all_clocks, clock);
+ if (ret && (ret == all_clocks || ret[-1] == ' ' || ret[-1] == '[')) {
+ switch (ret[strlen(clock)]) {
+ case ' ':
+ case '\0':
+ case ']':
+ case '\n':
+ break;
+ default:
+ ret = NULL;
+ }
+ } else {
+ ret = NULL;
+ }
+ free(all_clocks);
+
+ return ret != NULL;
+}
+
+#ifdef PERF
+static int get_tsc_nsec(int *shift, int *mult)
+{
+ static int cpu_shift, cpu_mult;
+ static int supported;
+ int cpus = tracecmd_count_cpus();
+ struct trace_perf perf;
+ int i;
+
+ if (supported)
+ goto out;
+
+ supported = -1;
+ if (trace_perf_init(&perf, 1, 0, getpid()))
+ return -1;
+ if (trace_perf_open(&perf))
+ return -1;
+ cpu_shift = perf.mmap->time_shift;
+ cpu_mult = perf.mmap->time_mult;
+ for (i = 1; i < cpus; i++) {
+ trace_perf_close(&perf);
+ if (trace_perf_init(&perf, 1, i, getpid()))
+ break;
+ if (trace_perf_open(&perf))
+ break;
+ if (perf.mmap->time_shift != cpu_shift ||
+ perf.mmap->time_mult != cpu_mult) {
+ warning("Found different TSC multiplier and shift for CPU %d: %d;%d instead of %d;%d",
+ i, perf.mmap->time_mult, perf.mmap->time_shift, cpu_mult, cpu_shift);
+ break;
+ }
+ }
+ trace_perf_close(&perf);
+ if (i < cpus)
+ return -1;
+
+ if (cpu_shift || cpu_mult)
+ supported = 1;
+out:
+ if (supported < 0)
+ return -1;
+
+ if (shift)
+ *shift = cpu_shift;
+ if (mult)
+ *mult = cpu_mult;
+
+ return 0;
+}
+#else
+static int get_tsc_nsec(int *shift, int *mult)
+{
+ return -1;
+}
+#endif
+
+bool trace_tsc2nsec_is_supported(void)
+{
+ return get_tsc_nsec(NULL, NULL) == 0;
+}
+
+static void parse_record_options(int argc,
+ char **argv,
+ enum trace_cmd curr_cmd,
+ struct common_record_context *ctx)
+{
+ const char *plugin = NULL;
+ const char *option;
+ struct event_list *event = NULL;
+ struct event_list *last_event = NULL;
+ struct addrinfo *result;
+ char *pids;
+ char *pid;
+ char *sav;
+ int name_counter = 0;
+ int negative = 0;
+ struct buffer_instance *instance, *del_list = NULL;
+ int do_children = 0;
+ int fpids_count = 0;
+
+ init_common_record_context(ctx, curr_cmd);
+
+ if (IS_CMDSET(ctx))
+ keep = 1;
+
+ for (;;) {
+ int option_index = 0;
+ int ret;
+ int c;
+ const char *opts;
+ static struct option long_options[] = {
+ {"date", no_argument, NULL, OPT_date},
+ {"func-stack", no_argument, NULL, OPT_funcstack},
+ {"nosplice", no_argument, NULL, OPT_nosplice},
+ {"nofifos", no_argument, NULL, OPT_nofifos},
+ {"profile", no_argument, NULL, OPT_profile},
+ {"stderr", no_argument, NULL, OPT_stderr},
+ {"by-comm", no_argument, NULL, OPT_bycomm},
+ {"ts-offset", required_argument, NULL, OPT_tsoffset},
+ {"max-graph-depth", required_argument, NULL, OPT_max_graph_depth},
+ {"cmdlines-size", required_argument, NULL, OPT_cmdlines_size},
+ {"no-filter", no_argument, NULL, OPT_no_filter},
+ {"debug", no_argument, NULL, OPT_debug},
+ {"quiet", no_argument, NULL, OPT_quiet},
+ {"help", no_argument, NULL, '?'},
+ {"proc-map", no_argument, NULL, OPT_procmap},
+ {"user", required_argument, NULL, OPT_user},
+ {"module", required_argument, NULL, OPT_module},
+ {"tsync-interval", required_argument, NULL, OPT_tsyncinterval},
+ {"fork", no_argument, NULL, OPT_fork},
+ {"tsc2nsec", no_argument, NULL, OPT_tsc2nsec},
+ {"poll", no_argument, NULL, OPT_poll},
+ {"name", required_argument, NULL, OPT_name},
+ {"verbose", optional_argument, NULL, OPT_verbose},
+ {"compression", required_argument, NULL, OPT_compression},
+ {"file-version", required_argument, NULL, OPT_file_ver},
+ {NULL, 0, NULL, 0}
+ };
+
+ if (IS_EXTRACT(ctx))
+ opts = "+haf:Fp:co:O:sr:g:l:n:P:N:tb:B:ksiT";
+ else
+ opts = "+hae:f:FA:p:cC:dDGo:O:s:r:V:vg:l:n:P:N:tb:R:B:ksSiTm:M:H:q";
+ c = getopt_long (argc-1, argv+1, opts, long_options, &option_index);
+ if (c == -1)
+ break;
+
+ /*
+ * If the current instance is to record a guest, then save
+ * all the arguments for this instance.
+ */
+ if (c != 'B' && c != 'A' && c != OPT_name && is_guest(ctx->instance)) {
+ add_arg(ctx->instance, c, opts, long_options, optarg);
+ if (c == 'C')
+ ctx->instance->flags |= BUFFER_FL_HAS_CLOCK;
+ continue;
+ }
+
+ switch (c) {
+ case 'h':
+ usage(argv);
+ break;
+ case 'a':
+ cmd_check_die(ctx, CMD_set, *(argv+1), "-a");
+ if (IS_EXTRACT(ctx)) {
+ add_all_instances();
+ } else {
+ ctx->record_all = 1;
+ record_all_events();
+ }
+ break;
+ case 'e':
+ check_instance_die(ctx->instance, "-e");
+ ctx->events = 1;
+ event = malloc(sizeof(*event));
+ if (!event)
+ die("Failed to allocate event %s", optarg);
+ memset(event, 0, sizeof(*event));
+ event->event = optarg;
+ add_event(ctx->instance, event);
+ event->neg = negative;
+ event->filter = NULL;
+ last_event = event;
+
+ if (!ctx->record_all)
+ list_event(optarg);
+ break;
+ case 'f':
+ if (!last_event)
+ die("filter must come after event");
+ if (last_event->filter) {
+ last_event->filter =
+ realloc(last_event->filter,
+ strlen(last_event->filter) +
+ strlen("&&()") +
+ strlen(optarg) + 1);
+ strcat(last_event->filter, "&&(");
+ strcat(last_event->filter, optarg);
+ strcat(last_event->filter, ")");
+ } else {
+ ret = asprintf(&last_event->filter, "(%s)", optarg);
+ if (ret < 0)
+ die("Failed to allocate filter %s", optarg);
+ }
+ break;
+
+ case 'R':
+ if (!last_event)
+ die("trigger must come after event");
+ add_trigger(event, optarg);
+ break;
+
+ case OPT_name:
+ if (!ctx->instance)
+ die("No instance defined for name option\n");
+ if (!is_guest(ctx->instance))
+ die(" --name is only used for -A options\n");
+ free(ctx->instance->name);
+ ctx->instance->name = strdup(optarg);
+ if (!ctx->instance->name)
+ die("Failed to allocate name");
+ break;
+
+ case 'A': {
+ char *name = NULL;
+ int cid = -1, port = -1;
+
+ if (!IS_RECORD(ctx))
+ die("-A is only allowed for record operations");
+
+ name = parse_guest_name(optarg, &cid, &port, &result);
+ if (cid == -1 && !result)
+ die("guest %s not found", optarg);
+ if (port == -1)
+ port = TRACE_AGENT_DEFAULT_PORT;
+ if (!name || !*name) {
+ ret = asprintf(&name, "unnamed-%d", name_counter++);
+ if (ret < 0)
+ name = NULL;
+ } else {
+ /* Needs to be allocate */
+ name = strdup(name);
+ }
+ if (!name)
+ die("Failed to allocate guest name");
+
+ ctx->instance = allocate_instance(name);
+ if (!ctx->instance)
+ die("Failed to allocate instance");
+
+ if (result) {
+ ctx->instance->flags |= BUFFER_FL_NETWORK;
+ ctx->instance->port_type = USE_TCP;
+ }
+
+ ctx->instance->flags |= BUFFER_FL_GUEST;
+ ctx->instance->result = result;
+ ctx->instance->cid = cid;
+ ctx->instance->port = port;
+ ctx->instance->name = name;
+ add_instance(ctx->instance, 0);
+ ctx->data_flags |= DATA_FL_GUEST;
+ break;
+ }
+ case 'F':
+ test_set_event_pid(ctx->instance);
+ filter_task = 1;
+ break;
+ case 'G':
+ cmd_check_die(ctx, CMD_set, *(argv+1), "-G");
+ ctx->global = 1;
+ break;
+ case 'P':
+ check_instance_die(ctx->instance, "-P");
+ test_set_event_pid(ctx->instance);
+ pids = strdup(optarg);
+ if (!pids)
+ die("strdup");
+ pid = strtok_r(pids, ",", &sav);
+ while (pid) {
+ fpids_count += add_filter_pid(ctx->instance,
+ atoi(pid), 0);
+ pid = strtok_r(NULL, ",", &sav);
+ ctx->instance->nr_process_pids++;
+ }
+ ctx->instance->process_pids = ctx->instance->filter_pids;
+ free(pids);
+ break;
+ case 'c':
+ check_instance_die(ctx->instance, "-c");
+ test_set_event_pid(ctx->instance);
+ do_children = 1;
+ if (!ctx->instance->have_event_fork) {
+#ifdef NO_PTRACE
+ die("-c invalid: ptrace not supported");
+#endif
+ do_ptrace = 1;
+ ctx->instance->ptrace_child = 1;
+
+ } else {
+ save_option(ctx->instance, "event-fork");
+ }
+ if (ctx->instance->have_func_fork)
+ save_option(ctx->instance, "function-fork");
+ break;
+ case 'C':
+ check_instance_die(ctx->instance, "-C");
+ if (strcmp(optarg, TSCNSEC_CLOCK) == 0) {
+ ret = get_tsc_nsec(&ctx->tsc2nsec.shift,
+ &ctx->tsc2nsec.mult);
+ if (ret)
+ die("TSC to nanosecond is not supported");
+ ctx->instance->flags |= BUFFER_FL_TSC2NSEC;
+ ctx->instance->clock = TSC_CLOCK;
+ } else {
+ ctx->instance->clock = optarg;
+ }
+ if (!clock_is_supported(NULL, ctx->instance->clock))
+ die("Clock %s is not supported", ctx->instance->clock);
+ ctx->instance->clock = strdup(ctx->instance->clock);
+ if (!ctx->instance->clock)
+ die("Failed allocation");
+ ctx->instance->flags |= BUFFER_FL_HAS_CLOCK;
+ if (!ctx->clock && !is_guest(ctx->instance))
+ ctx->clock = ctx->instance->clock;
+ break;
+ case 'v':
+ negative = 1;
+ break;
+ case 'l':
+ add_func(&ctx->instance->filter_funcs,
+ ctx->instance->filter_mod, optarg);
+ ctx->filtered = 1;
+ break;
+ case 'n':
+ check_instance_die(ctx->instance, "-n");
+ add_func(&ctx->instance->notrace_funcs,
+ ctx->instance->filter_mod, optarg);
+ ctx->filtered = 1;
+ break;
+ case 'g':
+ check_instance_die(ctx->instance, "-g");
+ add_func(&graph_funcs, ctx->instance->filter_mod, optarg);
+ ctx->filtered = 1;
+ break;
+ case 'p':
+ check_instance_die(ctx->instance, "-p");
+ if (ctx->instance->plugin)
+ die("only one plugin allowed");
+ for (plugin = optarg; isspace(*plugin); plugin++)
+ ;
+ ctx->instance->plugin = plugin;
+ for (optarg += strlen(optarg) - 1;
+ optarg > plugin && isspace(*optarg); optarg--)
+ ;
+ optarg++;
+ optarg[0] = '\0';
+ break;
+ case 'D':
+ ctx->total_disable = 1;
+ /* fall through */
+ case 'd':
+ ctx->disable = 1;
+ break;
+ case 'o':
+ cmd_check_die(ctx, CMD_set, *(argv+1), "-o");
+ if (IS_RECORD_AGENT(ctx))
+ die("-o incompatible with agent recording");
+ if (host)
+ die("-o incompatible with -N");
+ if (IS_START(ctx))
+ die("start does not take output\n"
+ "Did you mean 'record'?");
+ if (IS_STREAM(ctx))
+ die("stream does not take output\n"
+ "Did you mean 'record'?");
+ if (ctx->output)
+ die("only one output file allowed");
+ ctx->output = optarg;
+
+ if (IS_PROFILE(ctx)) {
+ int fd;
+
+ /* pipe the output to this file instead of stdout */
+ save_stdout = dup(1);
+ close(1);
+ fd = open(optarg, O_WRONLY | O_CREAT | O_TRUNC, 0644);
+ if (fd < 0)
+ die("can't write to %s", optarg);
+ if (fd != 1) {
+ dup2(fd, 1);
+ close(fd);
+ }
+ }
+ break;
+ case 'O':
+ check_instance_die(ctx->instance, "-O");
+ option = optarg;
+ save_option(ctx->instance, option);
+ break;
+ case 'T':
+ check_instance_die(ctx->instance, "-T");
+ save_option(ctx->instance, "stacktrace");
+ break;
+ case 'H':
+ cmd_check_die(ctx, CMD_set, *(argv+1), "-H");
+ check_instance_die(ctx->instance, "-H");
+ add_hook(ctx->instance, optarg);
+ ctx->events = 1;
+ break;
+ case 's':
+ cmd_check_die(ctx, CMD_set, *(argv+1), "-s");
+ if (IS_EXTRACT(ctx)) {
+ if (optarg)
+ usage(argv);
+ recorder_flags |= TRACECMD_RECORD_SNAPSHOT;
+ break;
+ }
+ if (!optarg)
+ usage(argv);
+ sleep_time = atoi(optarg);
+ break;
+ case 'S':
+ cmd_check_die(ctx, CMD_set, *(argv+1), "-S");
+ ctx->manual = 1;
+ /* User sets events for profiling */
+ if (!event)
+ ctx->events = 0;
+ break;
+ case 'r':
+ cmd_check_die(ctx, CMD_set, *(argv+1), "-r");
+ rt_prio = atoi(optarg);
+ break;
+ case 'N':
+ cmd_check_die(ctx, CMD_set, *(argv+1), "-N");
+ if (!IS_RECORD(ctx))
+ die("-N only available with record");
+ if (IS_RECORD_AGENT(ctx))
+ die("-N incompatible with agent recording");
+ if (ctx->output)
+ die("-N incompatible with -o");
+ host = optarg;
+ break;
+ case 'V':
+ cmd_check_die(ctx, CMD_set, *(argv+1), "-V");
+ if (!IS_RECORD(ctx))
+ die("-V only available with record");
+ if (IS_RECORD_AGENT(ctx))
+ die("-V incompatible with agent recording");
+ if (ctx->output)
+ die("-V incompatible with -o");
+ host = optarg;
+ ctx->instance->port_type = USE_VSOCK;
+ break;
+ case 'm':
+ if (max_kb)
+ die("-m can only be specified once");
+ if (!IS_RECORD(ctx))
+ die("only record take 'm' option");
+ max_kb = atoi(optarg);
+ break;
+ case 'M':
+ check_instance_die(ctx->instance, "-M");
+ ctx->instance->cpumask = alloc_mask_from_hex(ctx->instance, optarg);
+ break;
+ case 't':
+ cmd_check_die(ctx, CMD_set, *(argv+1), "-t");
+ if (IS_EXTRACT(ctx))
+ ctx->topt = 1; /* Extract top instance also */
+ else
+ ctx->instance->port_type = USE_TCP;
+ break;
+ case 'b':
+ check_instance_die(ctx->instance, "-b");
+ ctx->instance->buffer_size = atoi(optarg);
+ break;
+ case 'B':
+ ctx->instance = allocate_instance(optarg);
+ if (!ctx->instance)
+ die("Failed to create instance");
+ ctx->instance->delete = negative;
+ negative = 0;
+ if (ctx->instance->delete) {
+ ctx->instance->next = del_list;
+ del_list = ctx->instance;
+ } else
+ add_instance(ctx->instance, local_cpu_count);
+ if (IS_PROFILE(ctx))
+ ctx->instance->flags |= BUFFER_FL_PROFILE;
+ break;
+ case 'k':
+ cmd_check_die(ctx, CMD_set, *(argv+1), "-k");
+ keep = 1;
+ break;
+ case 'i':
+ ignore_event_not_found = 1;
+ break;
+ case OPT_user:
+ ctx->user = strdup(optarg);
+ if (!ctx->user)
+ die("Failed to allocate user name");
+ break;
+ case OPT_procmap:
+ cmd_check_die(ctx, CMD_start, *(argv+1), "--proc-map");
+ cmd_check_die(ctx, CMD_set, *(argv+1), "--proc-map");
+ check_instance_die(ctx->instance, "--proc-map");
+ ctx->instance->get_procmap = 1;
+ break;
+ case OPT_date:
+ cmd_check_die(ctx, CMD_set, *(argv+1), "--date");
+ ctx->date = 1;
+ if (ctx->data_flags & DATA_FL_OFFSET)
+ die("Can not use both --date and --ts-offset");
+ ctx->data_flags |= DATA_FL_DATE;
+ break;
+ case OPT_funcstack:
+ func_stack = 1;
+ break;
+ case OPT_nosplice:
+ cmd_check_die(ctx, CMD_set, *(argv+1), "--nosplice");
+ recorder_flags |= TRACECMD_RECORD_NOSPLICE;
+ break;
+ case OPT_nofifos:
+ cmd_check_die(ctx, CMD_set, *(argv+1), "--nofifos");
+ no_fifos = true;
+ break;
+ case OPT_profile:
+ cmd_check_die(ctx, CMD_set, *(argv+1), "--profile");
+ check_instance_die(ctx->instance, "--profile");
+ handle_init = trace_init_profile;
+ ctx->instance->flags |= BUFFER_FL_PROFILE;
+ ctx->events = 1;
+ break;
+ case OPT_stderr:
+ /* if -o was used (for profile), ignore this */
+ if (save_stdout >= 0)
+ break;
+ save_stdout = dup(1);
+ close(1);
+ dup2(2, 1);
+ break;
+ case OPT_bycomm:
+ cmd_check_die(ctx, CMD_set, *(argv+1), "--by-comm");
+ trace_profile_set_merge_like_comms();
+ break;
+ case OPT_tsoffset:
+ cmd_check_die(ctx, CMD_set, *(argv+1), "--ts-offset");
+ ctx->date2ts = strdup(optarg);
+ if (ctx->data_flags & DATA_FL_DATE)
+ die("Can not use both --date and --ts-offset");
+ ctx->data_flags |= DATA_FL_OFFSET;
+ break;
+ case OPT_max_graph_depth:
+ check_instance_die(ctx->instance, "--max-graph-depth");
+ free(ctx->instance->max_graph_depth);
+ ctx->instance->max_graph_depth = strdup(optarg);
+ if (!ctx->instance->max_graph_depth)
+ die("Could not allocate option");
+ break;
+ case OPT_cmdlines_size:
+ ctx->saved_cmdlines_size = atoi(optarg);
+ break;
+ case OPT_no_filter:
+ cmd_check_die(ctx, CMD_set, *(argv+1), "--no-filter");
+ no_filter = true;
+ break;
+ case OPT_debug:
+ tracecmd_set_debug(true);
+ break;
+ case OPT_module:
+ check_instance_die(ctx->instance, "--module");
+ if (ctx->instance->filter_mod)
+ add_func(&ctx->instance->filter_funcs,
+ ctx->instance->filter_mod, "*");
+ ctx->instance->filter_mod = optarg;
+ ctx->filtered = 0;
+ break;
+ case OPT_tsyncinterval:
+ cmd_check_die(ctx, CMD_set, *(argv+1), "--tsync-interval");
+ ctx->tsync_loop_interval = atoi(optarg);
+ break;
+ case OPT_fork:
+ if (!IS_START(ctx))
+ die("--fork option used for 'start' command only");
+ fork_process = true;
+ break;
+ case OPT_tsc2nsec:
+ ret = get_tsc_nsec(&ctx->tsc2nsec.shift,
+ &ctx->tsc2nsec.mult);
+ if (ret)
+ die("TSC to nanosecond is not supported");
+ ctx->instance->flags |= BUFFER_FL_TSC2NSEC;
+ break;
+ case OPT_poll:
+ cmd_check_die(ctx, CMD_set, *(argv+1), "--poll");
+ recorder_flags |= TRACECMD_RECORD_POLL;
+ break;
+ case OPT_compression:
+ cmd_check_die(ctx, CMD_start, *(argv+1), "--compression");
+ cmd_check_die(ctx, CMD_set, *(argv+1), "--compression");
+ cmd_check_die(ctx, CMD_extract, *(argv+1), "--compression");
+ cmd_check_die(ctx, CMD_stream, *(argv+1), "--compression");
+ cmd_check_die(ctx, CMD_profile, *(argv+1), "--compression");
+ if (strcmp(optarg, "any") && strcmp(optarg, "none") &&
+ !tracecmd_compress_is_supported(optarg, NULL))
+ die("Compression algorithm %s is not supported", optarg);
+ ctx->compression = strdup(optarg);
+ break;
+ case OPT_file_ver:
+ if (ctx->curr_cmd != CMD_record && ctx->curr_cmd != CMD_record_agent)
+ die("--file_version has no effect with the command %s\n",
+ *(argv+1));
+ ctx->file_version = atoi(optarg);
+ if (ctx->file_version < FILE_VERSION_MIN ||
+ ctx->file_version > FILE_VERSION_MAX)
+ die("Unsupported file version %d, "
+ "supported versions are from %d to %d",
+ ctx->file_version, FILE_VERSION_MIN, FILE_VERSION_MAX);
+ break;
+ case OPT_quiet:
+ case 'q':
+ quiet = true;
+ break;
+ case OPT_verbose:
+ if (trace_set_verbose(optarg) < 0)
+ die("invalid verbose level %s", optarg);
+ break;
+ default:
+ usage(argv);
+ }
+ }
+
+ remove_instances(del_list);
+
+ /* If --date is specified, prepend it to all guest VM flags */
+ if (ctx->date) {
+ struct buffer_instance *instance;
+
+ for_all_instances(instance) {
+ if (is_guest(instance))
+ add_argv(instance, "--date", true);
+ }
+ }
+
+ if (!ctx->filtered && ctx->instance->filter_mod)
+ add_func(&ctx->instance->filter_funcs,
+ ctx->instance->filter_mod, "*");
+
+ if (do_children && !filter_task && !fpids_count)
+ die(" -c can only be used with -F (or -P with event-fork support)");
+
+ if ((argc - optind) >= 2) {
+ if (IS_EXTRACT(ctx))
+ die("Command extract does not take any commands\n"
+ "Did you mean 'record'?");
+ ctx->run_command = 1;
+ }
+ if (ctx->user && !ctx->run_command)
+ warning("--user %s is ignored, no command is specified",
+ ctx->user);
+
+ if (top_instance.get_procmap) {
+ /* use ptrace to get procmap on the command exit */
+ if (ctx->run_command) {
+ do_ptrace = 1;
+ } else if (!top_instance.nr_filter_pids) {
+ warning("--proc-map is ignored for top instance, "
+ "no command or filtered PIDs are specified.");
+ top_instance.get_procmap = 0;
+ }
+ }
+
+ for_all_instances(instance) {
+ if (instance->get_procmap && !instance->nr_filter_pids) {
+ warning("--proc-map is ignored for instance %s, "
+ "no filtered PIDs are specified.",
+ tracefs_instance_get_name(instance->tracefs));
+ instance->get_procmap = 0;
+ }
+ }
+}
+
+static enum trace_type get_trace_cmd_type(enum trace_cmd cmd)
+{
+ const static struct {
+ enum trace_cmd cmd;
+ enum trace_type ttype;
+ } trace_type_per_command[] = {
+ {CMD_record, TRACE_TYPE_RECORD},
+ {CMD_stream, TRACE_TYPE_STREAM},
+ {CMD_extract, TRACE_TYPE_EXTRACT},
+ {CMD_profile, TRACE_TYPE_STREAM},
+ {CMD_start, TRACE_TYPE_START},
+ {CMD_record_agent, TRACE_TYPE_RECORD},
+ {CMD_set, TRACE_TYPE_SET}
+ };
+
+ for (int i = 0; i < ARRAY_SIZE(trace_type_per_command); i++) {
+ if (trace_type_per_command[i].cmd == cmd)
+ return trace_type_per_command[i].ttype;
+ }
+
+ die("Trace type UNKNOWN for the given cmd_fun");
+}
+
+static void finalize_record_trace(struct common_record_context *ctx)
+{
+ struct buffer_instance *instance;
+
+ if (keep)
+ return;
+
+ update_reset_files();
+ update_reset_triggers();
+ if (clear_function_filters)
+ clear_func_filters();
+
+ set_plugin("nop");
+
+ tracecmd_remove_instances();
+
+ /* If tracing_on was enabled before we started, set it on now */
+ for_all_instances(instance) {
+ if (instance->flags & BUFFER_FL_KEEP)
+ write_tracing_on(instance,
+ instance->tracing_on_init_val);
+ if (is_agent(instance)) {
+ tracecmd_msg_send_close_resp_msg(instance->msg_handle);
+ tracecmd_output_close(instance->network_handle);
+ }
+ }
+
+ if (host)
+ tracecmd_output_close(ctx->instance->network_handle);
+}
+
+static bool has_local_instances(void)
+{
+ struct buffer_instance *instance;
+
+ for_all_instances(instance) {
+ if (is_guest(instance))
+ continue;
+ if (host && instance->msg_handle)
+ continue;
+ return true;
+ }
+ return false;
+}
+
+static void set_tsync_params(struct common_record_context *ctx)
+{
+ struct buffer_instance *instance;
+ int shift, mult;
+ bool force_tsc = false;
+ char *clock = NULL;
+
+ if (!ctx->clock) {
+ /*
+ * If no clock is configured &&
+ * KVM time sync protocol is available &&
+ * there is information of each guest PID process &&
+ * tsc-x86 clock is supported &&
+ * TSC to nsec multiplier and shift are available:
+ * force using the x86-tsc clock for this host-guest tracing session
+ * and store TSC to nsec multiplier and shift.
+ */
+ if (tsync_proto_is_supported("kvm") &&
+ trace_have_guests_pid() &&
+ clock_is_supported(NULL, TSC_CLOCK) &&
+ !get_tsc_nsec(&shift, &mult) && mult) {
+ clock = strdup(TSC_CLOCK);
+ if (!clock)
+ die("Cannot not allocate clock");
+ ctx->tsc2nsec.mult = mult;
+ ctx->tsc2nsec.shift = shift;
+ force_tsc = true;
+ } else { /* Use the current clock of the first host instance */
+ clock = get_trace_clock(true);
+ }
+ } else {
+ clock = strdup(ctx->clock);
+ if (!clock)
+ die("Cannot not allocate clock");
+ }
+
+ if (!clock && !ctx->tsync_loop_interval)
+ goto out;
+ for_all_instances(instance) {
+ if (clock && !(instance->flags & BUFFER_FL_HAS_CLOCK)) {
+ /* use the same clock in all tracing peers */
+ if (is_guest(instance)) {
+ if (!instance->clock) {
+ instance->clock = strdup(clock);
+ if (!instance->clock)
+ die("Can not allocate instance clock");
+ }
+ add_argv(instance, (char *)instance->clock, true);
+ add_argv(instance, "-C", true);
+ if (ctx->tsc2nsec.mult)
+ instance->flags |= BUFFER_FL_TSC2NSEC;
+ } else if (force_tsc && !instance->clock) {
+ instance->clock = strdup(clock);
+ if (!instance->clock)
+ die("Can not allocate instance clock");
+ }
+ }
+ instance->tsync_loop_interval = ctx->tsync_loop_interval;
+ }
+out:
+ free(clock);
+}
+
+static void record_trace(int argc, char **argv,
+ struct common_record_context *ctx)
+{
+ enum trace_type type = get_trace_cmd_type(ctx->curr_cmd);
+ struct buffer_instance *instance;
+ struct filter_pids *pid;
+
+ /*
+ * If top_instance doesn't have any plugins or events, then
+ * remove it from being processed.
+ */
+ if (!__check_doing_something(&top_instance) && !filter_task)
+ first_instance = buffer_instances;
+ else
+ ctx->topt = 1;
+
+ update_first_instance(ctx->instance, ctx->topt);
+ if (!IS_CMDSET(ctx)) {
+ check_doing_something();
+ check_function_plugin();
+ }
+
+ if (!ctx->output)
+ ctx->output = DEFAULT_INPUT_FILE;
+
+ if (ctx->data_flags & DATA_FL_GUEST)
+ set_tsync_params(ctx);
+
+ make_instances();
+
+ /* Save the state of tracing_on before starting */
+ for_all_instances(instance) {
+ instance->output_file = strdup(ctx->output);
+ if (!instance->output_file)
+ die("Failed to allocate output file name for instance");
+ if (!ctx->manual && instance->flags & BUFFER_FL_PROFILE)
+ enable_profile(instance);
+
+ instance->tracing_on_init_val = read_tracing_on(instance);
+ /* Some instances may not be created yet */
+ if (instance->tracing_on_init_val < 0)
+ instance->tracing_on_init_val = 1;
+ }
+
+ if (ctx->events)
+ expand_event_list();
+
+ page_size = getpagesize();
+
+ if (!is_guest(ctx->instance))
+ fset = set_ftrace(ctx->instance, !ctx->disable, ctx->total_disable);
+ if (!IS_CMDSET(ctx))
+ tracecmd_disable_all_tracing(1);
+
+ for_all_instances(instance)
+ set_clock(ctx, instance);
+
+
+ /* Record records the date first */
+ if (ctx->date &&
+ ((IS_RECORD(ctx) && has_local_instances()) || IS_RECORD_AGENT(ctx)))
+ ctx->date2ts = get_date_to_ts();
+
+ for_all_instances(instance) {
+ set_funcs(instance);
+ set_mask(instance);
+ }
+
+ if (ctx->events) {
+ for_all_instances(instance)
+ enable_events(instance);
+ }
+
+ set_saved_cmdlines_size(ctx);
+ set_buffer_size();
+ update_plugins(type);
+ set_options();
+
+ for_all_instances(instance) {
+ if (instance->max_graph_depth) {
+ set_max_graph_depth(instance, instance->max_graph_depth);
+ free(instance->max_graph_depth);
+ instance->max_graph_depth = NULL;
+ }
+ }
+
+ allocate_seq();
+
+ if (type & (TRACE_TYPE_RECORD | TRACE_TYPE_STREAM)) {
+ signal(SIGINT, finish);
+ if (!latency)
+ start_threads(type, ctx);
+ }
+
+ if (ctx->run_command) {
+ run_cmd(type, ctx->user, (argc - optind) - 1, &argv[optind + 1]);
+ } else if (ctx->instance && is_agent(ctx->instance)) {
+ update_task_filter();
+ tracecmd_enable_tracing();
+ tracecmd_msg_wait_close(ctx->instance->msg_handle);
+ } else {
+ bool pwait = false;
+ bool wait_indefinitely = false;
+
+ update_task_filter();
+
+ if (!IS_CMDSET(ctx))
+ tracecmd_enable_tracing();
+
+ if (type & (TRACE_TYPE_START | TRACE_TYPE_SET))
+ exit(0);
+
+ /* We don't ptrace ourself */
+ if (do_ptrace) {
+ for_all_instances(instance) {
+ for (pid = instance->filter_pids; pid; pid = pid->next) {
+ if (!pid->exclude && instance->ptrace_child) {
+ ptrace_attach(instance, pid->pid);
+ pwait = true;
+ }
+ }
+ }
+ }
+ /* sleep till we are woken with Ctrl^C */
+ printf("Hit Ctrl^C to stop recording\n");
+ for_all_instances(instance) {
+ /* If an instance is not tracing individual processes
+ * or there is an error while waiting for a process to
+ * exit, fallback to waiting indefinitely.
+ */
+ if (!instance->nr_process_pids ||
+ trace_wait_for_processes(instance))
+ wait_indefinitely = true;
+ }
+ while (!finished && wait_indefinitely)
+ trace_or_sleep(type, pwait);
+ }
+
+ tell_guests_to_stop(ctx);
+ tracecmd_disable_tracing();
+ if (!latency)
+ stop_threads(type);
+
+ record_stats();
+
+ if (!latency)
+ wait_threads();
+
+ if (IS_RECORD(ctx)) {
+ record_data(ctx);
+ delete_thread_data();
+ } else
+ print_stats();
+
+ if (!keep)
+ tracecmd_disable_all_tracing(0);
+
+ destroy_stats();
+ finalize_record_trace(ctx);
+}
+
+/*
+ * This function contains common code for the following commands:
+ * record, start, stream, profile.
+ */
+static void record_trace_command(int argc, char **argv,
+ struct common_record_context *ctx)
+{
+ tracecmd_tsync_init();
+ record_trace(argc, argv, ctx);
+}
+
+void trace_start(int argc, char **argv)
+{
+ struct common_record_context ctx;
+
+ parse_record_options(argc, argv, CMD_start, &ctx);
+ record_trace_command(argc, argv, &ctx);
+ exit(0);
+}
+
+void trace_set(int argc, char **argv)
+{
+ struct common_record_context ctx;
+
+ parse_record_options(argc, argv, CMD_set, &ctx);
+ record_trace_command(argc, argv, &ctx);
+ exit(0);
+}
+
+void trace_extract(int argc, char **argv)
+{
+ struct common_record_context ctx;
+ struct buffer_instance *instance;
+ enum trace_type type;
+
+ parse_record_options(argc, argv, CMD_extract, &ctx);
+
+ type = get_trace_cmd_type(ctx.curr_cmd);
+
+ update_first_instance(ctx.instance, 1);
+ check_function_plugin();
+
+ if (!ctx.output)
+ ctx.output = DEFAULT_INPUT_FILE;
+
+ /* Save the state of tracing_on before starting */
+ for_all_instances(instance) {
+ instance->output_file = strdup(ctx.output);
+ if (!instance->output_file)
+ die("Failed to allocate output file name for instance");
+
+ if (!ctx.manual && instance->flags & BUFFER_FL_PROFILE)
+ enable_profile(ctx.instance);
+
+ instance->tracing_on_init_val = read_tracing_on(instance);
+ /* Some instances may not be created yet */
+ if (instance->tracing_on_init_val < 0)
+ instance->tracing_on_init_val = 1;
+ }
+
+ /* Extracting data records all events in the system. */
+ if (!ctx.record_all)
+ record_all_events();
+
+ if (ctx.events)
+ expand_event_list();
+
+ page_size = getpagesize();
+ update_plugins(type);
+ set_options();
+
+ for_all_instances(instance) {
+ if (instance->max_graph_depth) {
+ set_max_graph_depth(instance, instance->max_graph_depth);
+ free(instance->max_graph_depth);
+ instance->max_graph_depth = NULL;
+ }
+ }
+
+ allocate_seq();
+ flush_threads();
+ record_stats();
+
+ if (!keep)
+ tracecmd_disable_all_tracing(0);
+
+ /* extract records the date after extraction */
+ if (ctx.date) {
+ /*
+ * We need to start tracing, don't let other traces
+ * screw with our trace_marker.
+ */
+ tracecmd_disable_all_tracing(1);
+ ctx.date2ts = get_date_to_ts();
+ }
+
+ record_data(&ctx);
+ delete_thread_data();
+ destroy_stats();
+ finalize_record_trace(&ctx);
+ exit(0);
+}
+
+void trace_stream(int argc, char **argv)
+{
+ struct common_record_context ctx;
+
+ parse_record_options(argc, argv, CMD_stream, &ctx);
+ record_trace_command(argc, argv, &ctx);
+ exit(0);
+}
+
+void trace_profile(int argc, char **argv)
+{
+ struct common_record_context ctx;
+
+ parse_record_options(argc, argv, CMD_profile, &ctx);
+
+ handle_init = trace_init_profile;
+ ctx.events = 1;
+
+ /*
+ * If no instances were set, then enable profiling on the top instance.
+ */
+ if (!buffer_instances)
+ top_instance.flags |= BUFFER_FL_PROFILE;
+
+ record_trace_command(argc, argv, &ctx);
+ do_trace_profile();
+ exit(0);
+}
+
+void trace_record(int argc, char **argv)
+{
+ struct common_record_context ctx;
+
+ parse_record_options(argc, argv, CMD_record, &ctx);
+ record_trace_command(argc, argv, &ctx);
+ exit(0);
+}
+
+int trace_record_agent(struct tracecmd_msg_handle *msg_handle,
+ int cpus, int *fds,
+ int argc, char **argv,
+ bool use_fifos,
+ unsigned long long trace_id, const char *host)
+{
+ struct common_record_context ctx;
+ char **argv_plus;
+
+ /* Reset optind for getopt_long */
+ optind = 1;
+ /*
+ * argc is the number of elements in argv, but we need to convert
+ * argc and argv into "trace-cmd", "record", argv.
+ * where argc needs to grow by two.
+ */
+ argv_plus = calloc(argc + 2, sizeof(char *));
+ if (!argv_plus)
+ die("Failed to allocate record arguments");
+
+ argv_plus[0] = "trace-cmd";
+ argv_plus[1] = "record";
+ memmove(argv_plus + 2, argv, argc * sizeof(char *));
+ argc += 2;
+
+ parse_record_options(argc, argv_plus, CMD_record_agent, &ctx);
+ if (ctx.run_command)
+ return -EINVAL;
+
+ ctx.instance->fds = fds;
+ ctx.instance->use_fifos = use_fifos;
+ ctx.instance->flags |= BUFFER_FL_AGENT;
+ ctx.instance->msg_handle = msg_handle;
+ ctx.instance->host = host;
+ msg_handle->version = V3_PROTOCOL;
+ top_instance.trace_id = trace_id;
+ record_trace(argc, argv, &ctx);
+
+ free(argv_plus);
+ return 0;
+}
diff --git a/tracecmd/trace-restore.c b/tracecmd/trace-restore.c
new file mode 100644
index 00000000..5bf29c52
--- /dev/null
+++ b/tracecmd/trace-restore.c
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#define _LARGEFILE64_SOURCE
+#include <dirent.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <errno.h>
+
+#include "trace-local.h"
+
+static struct tracecmd_output *create_output(const char *file,
+ const char *tracing_dir, const char *kallsyms)
+{
+ struct tracecmd_output *out;
+
+ out = tracecmd_output_create(file);
+ if (!out)
+ goto error;
+
+ if (tracing_dir && tracecmd_output_set_trace_dir(out, tracing_dir))
+ goto error;
+ if (kallsyms && tracecmd_output_set_kallsyms(out, kallsyms))
+ goto error;
+ if (tracecmd_output_write_headers(out, NULL))
+ goto error;
+ return out;
+error:
+ if (out)
+ tracecmd_output_close(out);
+ unlink(file);
+ return NULL;
+}
+
+void trace_restore (int argc, char **argv)
+{
+ struct tracecmd_output *handle;
+ const char *output_file = DEFAULT_INPUT_FILE;
+ const char *output = NULL;
+ const char *input = NULL;
+ const char *tracing_dir = NULL;
+ const char *kallsyms = NULL;
+ struct stat st1;
+ struct stat st2;
+ int first_arg;
+ int create_only = 0;
+ int args;
+ int c;
+
+ if (argc < 2)
+ usage(argv);
+
+ if (strcmp(argv[1], "restore") != 0)
+ usage(argv);
+
+ while ((c = getopt(argc-1, argv+1, "+hco:i:t:k:")) >= 0) {
+ switch (c) {
+ case 'h':
+ usage(argv);
+ break;
+ case 'c':
+ if (input)
+ die("-c and -i are incompatible");
+ create_only = 1;
+ /* make output default to partial */
+ output_file = "trace-partial.dat";
+ break;
+
+ case 't':
+ tracing_dir = optarg;
+ break;
+ case 'k':
+ kallsyms = optarg;
+ break;
+ case 'o':
+ if (output)
+ die("only one output file allowed");
+ output = optarg;
+ break;
+
+ case 'i':
+ if (input)
+ die("only one input file allowed");
+ if (create_only)
+ die("-c and -i are incompatible");
+ input = optarg;
+ break;
+
+ default:
+ usage(argv);
+ }
+ }
+
+ if (!output)
+ output = output_file;
+
+ if ((argc - optind) <= 1) {
+ if (!create_only) {
+ warning("No data files found");
+ usage(argv);
+ }
+
+ handle = create_output(output, tracing_dir, kallsyms);
+ if (!handle)
+ die("Unabled to create output file %s", output);
+ if (tracecmd_write_cmdlines(handle) < 0)
+ die("Failed to write command lines");
+ tracecmd_output_close(handle);
+ exit(0);
+ }
+ first_arg = optind + 1;
+ args = argc - first_arg;
+ printf("first = %d %s args=%d\n", first_arg, argv[first_arg], args);
+
+ /* Make sure input and output are not the same file */
+ if (input && output) {
+ if (stat(input, &st1) < 0)
+ die("%s:", input);
+ /* output exists? otherwise we don't care */
+ if (stat(output, &st2) == 0) {
+ if (st1.st_ino == st2.st_ino &&
+ st1.st_dev == st2.st_dev)
+ die("input and output file are the same");
+ }
+ }
+
+ if (input) {
+ struct tracecmd_input *ihandle;
+
+ ihandle = tracecmd_alloc(input, 0);
+ if (!ihandle)
+ die("error reading file %s", input);
+ /* make sure headers are ok */
+ if (tracecmd_read_headers(ihandle, TRACECMD_FILE_CMD_LINES) < 0)
+ die("error reading file %s headers", input);
+
+ handle = tracecmd_copy(ihandle, output, TRACECMD_FILE_CMD_LINES, 0, NULL);
+ tracecmd_close(ihandle);
+ } else {
+ handle = tracecmd_output_create(output);
+ tracecmd_output_write_headers(handle, NULL);
+ }
+
+ if (!handle)
+ die("error writing to %s", output);
+
+ if (tracecmd_append_cpu_data(handle, args, &argv[first_arg]) < 0)
+ die("failed to append data");
+
+ return;
+}
diff --git a/tracecmd/trace-setup-guest.c b/tracecmd/trace-setup-guest.c
new file mode 100644
index 00000000..f20b48e2
--- /dev/null
+++ b/tracecmd/trace-setup-guest.c
@@ -0,0 +1,252 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 VMware Inc, Slavomir Kaslev <kaslevs@vmware.com>
+ *
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <grp.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "trace-local.h"
+#include "trace-msg.h"
+
+static int make_dir(const char *path, mode_t mode)
+{
+ char buf[PATH_MAX+2], *p;
+
+ strncpy(buf, path, sizeof(buf));
+ if (buf[PATH_MAX])
+ return -E2BIG;
+
+ for (p = buf; *p; p++) {
+ p += strspn(p, "/");
+ p += strcspn(p, "/");
+ *p = '\0';
+ if (mkdir(buf, mode) < 0 && errno != EEXIST)
+ return -errno;
+ *p = '/';
+ }
+
+ return 0;
+}
+
+static int make_fifo(const char *path, mode_t mode)
+{
+ struct stat st;
+
+ if (!stat(path, &st)) {
+ if (S_ISFIFO(st.st_mode))
+ return 0;
+ return -EEXIST;
+ }
+
+ if (mkfifo(path, mode))
+ return -errno;
+ return 0;
+}
+
+static int make_guest_dir(const char *guest)
+{
+ char path[PATH_MAX];
+
+ snprintf(path, sizeof(path), GUEST_DIR_FMT, guest);
+ return make_dir(path, 0750);
+}
+
+static int make_guest_fifo(const char *guest, int cpu, mode_t mode)
+{
+ static const char *exts[] = {".in", ".out"};
+ char path[PATH_MAX];
+ int i, ret = 0;
+
+ for (i = 0; i < ARRAY_SIZE(exts); i++) {
+ snprintf(path, sizeof(path), GUEST_FIFO_FMT "%s",
+ guest, cpu, exts[i]);
+ ret = make_fifo(path, mode);
+ if (ret < 0)
+ break;
+ }
+
+ return ret;
+}
+
+static int make_guest_fifos(const char *guest, int nr_cpus, mode_t mode)
+{
+ int i, ret = 0;
+ mode_t mask;
+
+ mask = umask(0);
+ for (i = 0; i < nr_cpus; i++) {
+ ret = make_guest_fifo(guest, i, mode);
+ if (ret < 0)
+ break;
+ }
+ umask(mask);
+
+ return ret;
+}
+
+static int get_guest_cpu_count(const char *guest)
+{
+ const char *cmd_fmt = "virsh vcpucount --maximum '%s' 2>/dev/null";
+ int nr_cpus = -1;
+ char cmd[1024];
+ FILE *f;
+
+ snprintf(cmd, sizeof(cmd), cmd_fmt, guest);
+ f = popen(cmd, "r");
+ if (!f)
+ return -errno;
+
+ fscanf(f, "%d", &nr_cpus);
+ pclose(f);
+
+ return nr_cpus;
+}
+
+static int attach_guest_fifos(const char *guest, int nr_cpus)
+{
+ const char *cmd_fmt =
+ "virsh attach-device --config '%s' '%s' >/dev/null 2>/dev/null";
+ const char *xml_fmt =
+ "<channel type='pipe'>\n"
+ " <source path='%s'/>\n"
+ " <target type='virtio' name='%s%d'/>\n"
+ "</channel>";
+ char tmp_path[PATH_MAX], path[PATH_MAX];
+ char cmd[PATH_MAX], xml[PATH_MAX];
+ int i, fd, ret = 0;
+
+#ifdef __ANDROID__
+ strcpy(tmp_path, "/data/local/tmp/pipexmlXXXXXX");
+#else /* !__ANDROID__ */
+ strcpy(tmp_path, "/tmp/pipexmlXXXXXX");
+#endif /* __ANDROID__ */
+
+ fd = mkstemp(tmp_path);
+ if (fd < 0)
+ return fd;
+
+ for (i = 0; i < nr_cpus; i++) {
+ snprintf(path, sizeof(path), GUEST_FIFO_FMT, guest, i);
+ snprintf(xml, sizeof(xml), xml_fmt, path, GUEST_PIPE_NAME, i);
+ pwrite(fd, xml, strlen(xml), 0);
+
+ snprintf(cmd, sizeof(cmd), cmd_fmt, guest, tmp_path);
+ errno = 0;
+ if (system(cmd) != 0) {
+ ret = -errno;
+ break;
+ }
+ }
+
+ close(fd);
+ unlink(tmp_path);
+
+ return ret;
+}
+
+static void do_setup_guest(const char *guest, int nr_cpus,
+ mode_t mode, gid_t gid, bool attach)
+{
+ gid_t save_egid;
+ int ret;
+
+ if (gid != -1) {
+ save_egid = getegid();
+ ret = setegid(gid);
+ if (ret < 0)
+ die("failed to set effective group ID");
+ }
+
+ ret = make_guest_dir(guest);
+ if (ret < 0)
+ die("failed to create guest directory for %s", guest);
+
+ ret = make_guest_fifos(guest, nr_cpus, mode);
+ if (ret < 0)
+ die("failed to create FIFOs for %s", guest);
+
+ if (attach) {
+ ret = attach_guest_fifos(guest, nr_cpus);
+ if (ret < 0)
+ die("failed to attach FIFOs to %s", guest);
+ }
+
+ if (gid != -1) {
+ ret = setegid(save_egid);
+ if (ret < 0)
+ die("failed to restore effective group ID");
+ }
+}
+
+void trace_setup_guest(int argc, char **argv)
+{
+ bool attach = false;
+ struct group *group;
+ mode_t mode = 0660;
+ int nr_cpus = -1;
+ gid_t gid = -1;
+ char *guest;
+
+ if (argc < 2)
+ usage(argv);
+
+ if (strcmp(argv[1], "setup-guest") != 0)
+ usage(argv);
+
+ for (;;) {
+ int c, option_index = 0;
+ static struct option long_options[] = {
+ {"help", no_argument, NULL, '?'},
+ {NULL, 0, NULL, 0}
+ };
+
+ c = getopt_long(argc-1, argv+1, "+hc:p:g:a",
+ long_options, &option_index);
+ if (c == -1)
+ break;
+ switch (c) {
+ case 'h':
+ usage(argv);
+ break;
+ case 'c':
+ nr_cpus = atoi(optarg);
+ break;
+ case 'p':
+ mode = strtol(optarg, NULL, 8);
+ break;
+ case 'g':
+ group = getgrnam(optarg);
+ if (!group)
+ die("group %s does not exist", optarg);
+ gid = group->gr_gid;
+ break;
+ case 'a':
+ attach = true;
+ break;
+ default:
+ usage(argv);
+ }
+ }
+
+ if (optind != argc-2)
+ usage(argv);
+
+ guest = argv[optind+1];
+
+ if (nr_cpus <= 0)
+ nr_cpus = get_guest_cpu_count(guest);
+
+ if (nr_cpus <= 0)
+ die("invalid number of cpus for guest %s", guest);
+
+ do_setup_guest(guest, nr_cpus, mode, gid, attach);
+}
diff --git a/tracecmd/trace-show.c b/tracecmd/trace-show.c
new file mode 100644
index 00000000..eb328527
--- /dev/null
+++ b/tracecmd/trace-show.c
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#include <stdlib.h>
+#include <getopt.h>
+#include <errno.h>
+
+#include "tracefs.h"
+#include "trace-local.h"
+
+enum {
+ OPT_tracing_on = 255,
+ OPT_current_tracer = 254,
+ OPT_buffer_size_kb = 253,
+ OPT_buffer_total_size_kb = 252,
+ OPT_ftrace_filter = 251,
+ OPT_ftrace_notrace = 250,
+ OPT_ftrace_pid = 249,
+ OPT_graph_function = 248,
+ OPT_graph_notrace = 247,
+ OPT_cpumask = 246,
+};
+
+void trace_show(int argc, char **argv)
+{
+ const char *buffer = NULL;
+ const char *file = "trace";
+ const char *cpu = NULL;
+ struct buffer_instance *instance = &top_instance;
+ char cpu_path[128];
+ char *path;
+ int snap = 0;
+ int pipe = 0;
+ int show_name = 0;
+ int option_index = 0;
+ int stop = 0;
+ int c;
+ static struct option long_options[] = {
+ {"tracing_on", no_argument, NULL, OPT_tracing_on},
+ {"current_tracer", no_argument, NULL, OPT_current_tracer},
+ {"buffer_size", no_argument, NULL, OPT_buffer_size_kb},
+ {"buffer_total_size", no_argument, NULL, OPT_buffer_total_size_kb},
+ {"ftrace_filter", no_argument, NULL, OPT_ftrace_filter},
+ {"ftrace_notrace", no_argument, NULL, OPT_ftrace_notrace},
+ {"ftrace_pid", no_argument, NULL, OPT_ftrace_pid},
+ {"graph_function", no_argument, NULL, OPT_graph_function},
+ {"graph_notrace", no_argument, NULL, OPT_graph_notrace},
+ {"cpumask", no_argument, NULL, OPT_cpumask},
+ {"help", no_argument, NULL, '?'},
+ {NULL, 0, NULL, 0}
+ };
+
+ init_top_instance();
+
+ while ((c = getopt_long(argc-1, argv+1, "B:c:fsp",
+ long_options, &option_index)) >= 0) {
+ switch (c) {
+ case 'h':
+ usage(argv);
+ break;
+ case 'B':
+ if (buffer)
+ die("Can only show one buffer at a time");
+ buffer = optarg;
+ instance = allocate_instance(optarg);
+ if (!instance)
+ die("Failed to create instance");
+ break;
+ case 'c':
+ if (cpu)
+ die("Can only show one CPU at a time");
+ cpu = optarg;
+ break;
+ case 'f':
+ show_name = 1;
+ break;
+ case 's':
+ snap = 1;
+ if (pipe)
+ die("Can not have -s and -p together");
+ break;
+ case 'p':
+ pipe = 1;
+ if (snap)
+ die("Can not have -s and -p together");
+ break;
+ case OPT_tracing_on:
+ show_instance_file(instance, "tracing_on");
+ stop = 1;
+ break;
+ case OPT_current_tracer:
+ show_instance_file(instance, "current_tracer");
+ stop = 1;
+ break;
+ case OPT_buffer_size_kb:
+ show_instance_file(instance, "buffer_size_kb");
+ stop = 1;
+ break;
+ case OPT_buffer_total_size_kb:
+ show_instance_file(instance, "buffer_total_size_kb");
+ stop = 1;
+ break;
+ case OPT_ftrace_filter:
+ show_instance_file(instance, "set_ftrace_filter");
+ stop = 1;
+ break;
+ case OPT_ftrace_notrace:
+ show_instance_file(instance, "set_ftrace_notrace");
+ stop = 1;
+ break;
+ case OPT_ftrace_pid:
+ show_instance_file(instance, "set_ftrace_pid");
+ stop = 1;
+ break;
+ case OPT_graph_function:
+ show_instance_file(instance, "set_graph_function");
+ stop = 1;
+ break;
+ case OPT_graph_notrace:
+ show_instance_file(instance, "set_graph_notrace");
+ stop = 1;
+ break;
+ case OPT_cpumask:
+ show_instance_file(instance, "tracing_cpumask");
+ stop = 1;
+ break;
+ default:
+ usage(argv);
+ }
+ }
+ if (stop)
+ exit(0);
+ if (pipe)
+ file = "trace_pipe";
+ else if (snap)
+ file = "snapshot";
+
+ if (cpu) {
+ char *endptr;
+ long val;
+
+ errno = 0;
+ val = strtol(cpu, &endptr, 0);
+ if (errno || cpu == endptr)
+ die("Invalid CPU index '%s'", cpu);
+ snprintf(cpu_path, 128, "per_cpu/cpu%ld/%s", val, file);
+ file = cpu_path;
+ }
+
+ if (buffer) {
+ int ret;
+
+ ret = asprintf(&path, "instances/%s/%s", buffer, file);
+ if (ret < 0)
+ die("Failed to allocate instance path %s", file);
+ file = path;
+ }
+
+ if (show_name) {
+ char *name;
+ name = tracefs_get_tracing_file(file);
+ printf("%s\n", name);
+ tracefs_put_tracing_file(name);
+ }
+ show_file(file);
+ if (buffer)
+ free(path);
+
+ return;
+}
diff --git a/tracecmd/trace-snapshot.c b/tracecmd/trace-snapshot.c
new file mode 100644
index 00000000..34630b4f
--- /dev/null
+++ b/tracecmd/trace-snapshot.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2013 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "tracefs.h"
+#include "trace-local.h"
+
+static void write_file(const char *name, char *val)
+{
+ char *path;
+ int fd;
+ ssize_t n;
+
+ path = tracefs_get_tracing_file(name);
+ fd = open(path, O_WRONLY);
+ if (fd < 0)
+ die("writing %s", path);
+
+ n = write(fd, val, strlen(val));
+ if (n < 0)
+ die("failed to write to %s\n", path);
+
+ tracefs_put_tracing_file(path);
+ close(fd);
+}
+
+void trace_snapshot (int argc, char **argv)
+{
+ const char *buffer = NULL;
+ const char *file = "snapshot";
+ struct stat st;
+ char *name;
+ char cpu_path[128];
+ int take_snap = 0;
+ int reset_snap = 0;
+ int free_snap = 0;
+ int cpu = -1;
+ int ret;
+ int c;
+
+ if (argc < 2)
+ usage(argv);
+
+ if (strcmp(argv[1], "snapshot") != 0)
+ usage(argv);
+
+ while ((c = getopt(argc-1, argv+1, "srfB:c:")) >= 0) {
+ switch (c) {
+ case 'h':
+ usage(argv);
+ break;
+ case 's':
+ take_snap = 1;
+ if (free_snap)
+ die("can't take snapshot and free it at the same time");
+ break;
+ case 'f':
+ free_snap = 1;
+ if (take_snap)
+ die("can't take snapshot and free it at the same time");
+ break;
+ case 'r':
+ reset_snap = 1;
+ break;
+ case 'B':
+ if (buffer)
+ die("Can only do one buffer at a time");
+ buffer = optarg;
+ break;
+ case 'c':
+ if (cpu >= 0)
+ die("Can only do one CPU (or all) at a time");
+ cpu = atoi(optarg);
+ break;
+ default:
+ usage(argv);
+ }
+ }
+
+ if (cpu >= 0) {
+ snprintf(cpu_path, 128, "per_cpu/cpu%d/%s", cpu, file);
+ file = cpu_path;
+ }
+
+ name = tracefs_get_tracing_file(file);
+ ret = stat(name, &st);
+ if (ret < 0)
+ die("Snapshot feature is not supported by this kernel");
+ tracefs_put_tracing_file(name);
+
+ if (!reset_snap && !take_snap && !free_snap) {
+ show_file(file);
+ exit(0);
+ }
+
+ if (reset_snap)
+ write_file(file, "2");
+
+ if (free_snap)
+ write_file(file, "0");
+
+ if (take_snap)
+ write_file(file, "1");
+}
diff --git a/tracecmd/trace-split.c b/tracecmd/trace-split.c
new file mode 100644
index 00000000..83c5402c
--- /dev/null
+++ b/tracecmd/trace-split.c
@@ -0,0 +1,556 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#define _LARGEFILE64_SOURCE
+#include <dirent.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <libgen.h>
+#include <getopt.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <errno.h>
+
+#include "trace-local.h"
+
+static unsigned int page_size;
+static const char *default_input_file = DEFAULT_INPUT_FILE;
+static const char *input_file;
+
+enum split_types {
+ SPLIT_NONE,
+ /* The order of these must be reverse of the case statement in the options */
+ SPLIT_SECONDS,
+ SPLIT_MSECS,
+ SPLIT_USECS,
+ SPLIT_EVENTS,
+ SPLIT_PAGES,
+ SPLIT_NR_TYPES,
+};
+
+struct cpu_data {
+ unsigned long long ts;
+ unsigned long long offset;
+ struct tep_record *record;
+ int cpu;
+ int fd;
+ int index;
+ void *commit;
+ void *page;
+ char *file;
+};
+
+static int create_type_len(struct tep_handle *pevent, int time, int len)
+{
+ static int bigendian = -1;
+ char *ptr;
+ int test;
+
+ if (bigendian < 0) {
+ test = 0x4321;
+ ptr = (char *)&test;
+ if (*ptr == 0x21)
+ bigendian = 0;
+ else
+ bigendian = 1;
+ }
+
+ if (tep_is_file_bigendian(pevent))
+ time |= (len << 27);
+ else
+ time = (time << 5) | len;
+
+ return tep_read_number(pevent, &time, 4);
+}
+
+static int write_record(struct tracecmd_input *handle,
+ struct tep_record *record,
+ struct cpu_data *cpu_data,
+ enum split_types type)
+{
+ unsigned long long diff;
+ struct tep_handle *pevent;
+ void *page;
+ int len = 0;
+ char *ptr;
+ int index = 0;
+ int time;
+
+ page = cpu_data->page;
+
+ pevent = tracecmd_get_tep(handle);
+
+ ptr = page + cpu_data->index;
+
+ diff = record->ts - cpu_data->ts;
+ if (diff > (1 << 27)) {
+ /* Add a time stamp */
+ len = RINGBUF_TYPE_TIME_EXTEND;
+ time = (unsigned int)(diff & ((1ULL << 27) - 1));
+ time = create_type_len(pevent, time, len);
+ *(unsigned *)ptr = time;
+ ptr += 4;
+ time = (unsigned int)(diff >> 27);
+ *(unsigned *)ptr = tep_read_number(pevent, &time, 4);
+ cpu_data->ts = record->ts;
+ cpu_data->index += 8;
+ return 0;
+ }
+
+ if (record->size && (record->size <= 28 * 4))
+ len = record->size / 4;
+
+ time = (unsigned)diff;
+ time = create_type_len(pevent, time, len);
+
+ memcpy(ptr, &time, 4);
+ ptr += 4;
+ index = 4;
+
+ if (!len) {
+ len = record->size + 4;
+ if ((len + 4) > record->record_size)
+ die("Bad calculation of record len (expect:%d actual:%d)",
+ record->record_size, len + 4);
+ *(unsigned *)ptr = tep_read_number(pevent, &len, 4);
+ ptr += 4;
+ index += 4;
+ }
+
+ len = (record->size + 3) & ~3;
+ index += len;
+
+ memcpy(ptr, record->data, len);
+
+ cpu_data->index += index;
+ cpu_data->ts = record->ts;
+
+ return 1;
+}
+
+static void write_page(struct tep_handle *pevent,
+ struct cpu_data *cpu_data, int long_size)
+{
+ if (long_size == 8) {
+ unsigned long long index = cpu_data->index - 16;
+ *(unsigned long long *)cpu_data->commit =
+ tep_read_number(pevent, &index, 8);
+ } else {
+ unsigned int index = cpu_data->index - 12;
+ *(unsigned int *)cpu_data->commit =
+ tep_read_number(pevent, &index, 4);
+ }
+ write(cpu_data->fd, cpu_data->page, page_size);
+}
+
+static struct tep_record *read_record(struct tracecmd_input *handle,
+ int percpu, int *cpu)
+{
+ if (percpu)
+ return tracecmd_read_data(handle, *cpu);
+
+ return tracecmd_read_next_data(handle, cpu);
+}
+
+static void set_cpu_time(struct tracecmd_input *handle,
+ int percpu, unsigned long long start, int cpu, int cpus)
+{
+ if (percpu) {
+ tracecmd_set_cpu_to_timestamp(handle, cpu, start);
+ return;
+ }
+
+ for (cpu = 0; cpu < cpus; cpu++)
+ tracecmd_set_cpu_to_timestamp(handle, cpu, start);
+ return;
+}
+
+static int parse_cpu(struct tracecmd_input *handle,
+ struct cpu_data *cpu_data,
+ unsigned long long start,
+ unsigned long long end,
+ int count_limit, int percpu, int cpu,
+ enum split_types type)
+{
+ struct tep_record *record;
+ struct tep_handle *pevent;
+ void *ptr;
+ int page_size;
+ int long_size = 0;
+ int cpus;
+ int count = 0;
+ int pages = 0;
+
+ cpus = tracecmd_cpus(handle);
+
+ long_size = tracecmd_long_size(handle);
+ page_size = tracecmd_page_size(handle);
+ pevent = tracecmd_get_tep(handle);
+
+ /* Force new creation of first page */
+ if (percpu) {
+ cpu_data[cpu].index = page_size + 1;
+ cpu_data[cpu].page = NULL;
+ } else {
+ for (cpu = 0; cpu < cpus; cpu++) {
+ cpu_data[cpu].index = page_size + 1;
+ cpu_data[cpu].page = NULL;
+ }
+ }
+
+ /*
+ * Get the cpu pointers up to the start of the
+ * start time stamp.
+ */
+
+ record = read_record(handle, percpu, &cpu);
+
+ if (start) {
+ set_cpu_time(handle, percpu, start, cpu, cpus);
+ while (record && record->ts < start) {
+ tracecmd_free_record(record);
+ record = read_record(handle, percpu, &cpu);
+ }
+ } else if (record)
+ start = record->ts;
+
+ while (record && (!end || record->ts <= end)) {
+ if (cpu_data[cpu].index + record->record_size > page_size) {
+
+ if (type == SPLIT_PAGES && ++pages > count_limit)
+ break;
+
+ if (cpu_data[cpu].page)
+ write_page(pevent, &cpu_data[cpu], long_size);
+ else {
+ cpu_data[cpu].page = malloc(page_size);
+ if (!cpu_data[cpu].page)
+ die("Failed to allocate page");
+ }
+
+ memset(cpu_data[cpu].page, 0, page_size);
+ ptr = cpu_data[cpu].page;
+
+ *(unsigned long long*)ptr =
+ tep_read_number(pevent, &(record->ts), 8);
+ cpu_data[cpu].ts = record->ts;
+ ptr += 8;
+ cpu_data[cpu].commit = ptr;
+ ptr += long_size;
+ cpu_data[cpu].index = 8 + long_size;
+ }
+
+ cpu_data[cpu].offset = record->offset;
+
+ if (write_record(handle, record, &cpu_data[cpu], type)) {
+ tracecmd_free_record(record);
+ record = read_record(handle, percpu, &cpu);
+
+ /* if we hit the end of the cpu, clear the offset */
+ if (!record) {
+ if (percpu)
+ cpu_data[cpu].offset = 0;
+ else
+ for (cpu = 0; cpu < cpus; cpu++)
+ cpu_data[cpu].offset = 0;
+ }
+
+ switch (type) {
+ case SPLIT_NONE:
+ break;
+ case SPLIT_SECONDS:
+ if (record &&
+ record->ts >
+ (start + (unsigned long long)count_limit * 1000000000ULL)) {
+ tracecmd_free_record(record);
+ record = NULL;
+ }
+ break;
+ case SPLIT_MSECS:
+ if (record &&
+ record->ts >
+ (start + (unsigned long long)count_limit * 1000000ULL)) {
+ tracecmd_free_record(record);
+ record = NULL;
+ }
+ break;
+ case SPLIT_USECS:
+ if (record &&
+ record->ts >
+ (start + (unsigned long long)count_limit * 1000ULL)) {
+ tracecmd_free_record(record);
+ record = NULL;
+ }
+ break;
+ case SPLIT_EVENTS:
+ if (++count >= count_limit) {
+ tracecmd_free_record(record);
+ record = NULL;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ if (record)
+ tracecmd_free_record(record);
+
+ if (percpu) {
+ if (cpu_data[cpu].page) {
+ write_page(pevent, &cpu_data[cpu], long_size);
+ free(cpu_data[cpu].page);
+ cpu_data[cpu].page = NULL;
+ }
+ } else {
+ for (cpu = 0; cpu < cpus; cpu++) {
+ if (cpu_data[cpu].page) {
+ write_page(pevent, &cpu_data[cpu], long_size);
+ free(cpu_data[cpu].page);
+ cpu_data[cpu].page = NULL;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static double parse_file(struct tracecmd_input *handle,
+ const char *output_file,
+ unsigned long long start,
+ unsigned long long end, int percpu, int only_cpu,
+ int count, enum split_types type)
+{
+ unsigned long long current;
+ struct tracecmd_output *ohandle;
+ struct cpu_data *cpu_data;
+ struct tep_record *record;
+ char **cpu_list;
+ char *output;
+ char *base;
+ char *file;
+ char *dir;
+ int cpus;
+ int cpu;
+ int fd;
+
+ output = strdup(output_file);
+ dir = dirname(output);
+ base = basename(output);
+
+ ohandle = tracecmd_copy(handle, output_file, TRACECMD_FILE_CMD_LINES, 0, NULL);
+
+ cpus = tracecmd_cpus(handle);
+ cpu_data = malloc(sizeof(*cpu_data) * cpus);
+ if (!cpu_data)
+ die("Failed to allocate cpu_data for %d cpus", cpus);
+
+ for (cpu = 0; cpu < cpus; cpu++) {
+ int ret;
+
+ ret = asprintf(&file, "%s/.tmp.%s.%d", dir, base, cpu);
+ if (ret < 0)
+ die("Failed to allocate file for %s %s %d", dir, base, cpu);
+ fd = open(file, O_WRONLY | O_CREAT | O_TRUNC | O_LARGEFILE, 0644);
+ cpu_data[cpu].cpu = cpu;
+ cpu_data[cpu].fd = fd;
+ cpu_data[cpu].file = file;
+ cpu_data[cpu].offset = 0;
+ if (start)
+ tracecmd_set_cpu_to_timestamp(handle, cpu, start);
+ }
+
+ if (only_cpu >= 0) {
+ parse_cpu(handle, cpu_data, start, end, count,
+ 1, only_cpu, type);
+ } else if (percpu) {
+ for (cpu = 0; cpu < cpus; cpu++)
+ parse_cpu(handle, cpu_data, start,
+ end, count, percpu, cpu, type);
+ } else
+ parse_cpu(handle, cpu_data, start,
+ end, count, percpu, -1, type);
+
+ cpu_list = malloc(sizeof(*cpu_list) * cpus);
+ if (!cpu_list)
+ die("Failed to allocate cpu_list for %d cpus", cpus);
+ for (cpu = 0; cpu < cpus; cpu ++)
+ cpu_list[cpu] = cpu_data[cpu].file;
+
+ tracecmd_set_out_clock(ohandle, tracecmd_get_trace_clock(handle));
+ if (tracecmd_append_cpu_data(ohandle, cpus, cpu_list) < 0)
+ die("Failed to append tracing data\n");
+
+ current = end;
+ for (cpu = 0; cpu < cpus; cpu++) {
+ /* Set the tracecmd cursor to the next set of records */
+ if (cpu_data[cpu].offset) {
+ record = tracecmd_read_at(handle, cpu_data[cpu].offset, NULL);
+ if (record && (!current || record->ts > current))
+ current = record->ts + 1;
+ tracecmd_free_record(record);
+ }
+ unlink(cpu_data[cpu].file);
+ free(cpu_data[cpu].file);
+ }
+ free(cpu_data);
+ free(cpu_list);
+ free(output);
+ tracecmd_output_close(ohandle);
+
+ return current;
+}
+
+void trace_split (int argc, char **argv)
+{
+ struct tracecmd_input *handle;
+ unsigned long long start_ns = 0, end_ns = 0;
+ unsigned long long current;
+ double start, end;
+ char *endptr;
+ char *output = NULL;
+ char *output_file;
+ enum split_types split_type = SPLIT_NONE;
+ enum split_types type = SPLIT_NONE;
+ int count;
+ int repeat = 0;
+ int percpu = 0;
+ int cpu = -1;
+ int ac;
+ int c;
+
+ if (strcmp(argv[1], "split") != 0)
+ usage(argv);
+
+ while ((c = getopt(argc-1, argv+1, "+ho:i:s:m:u:e:p:rcC:")) >= 0) {
+ switch (c) {
+ case 'h':
+ usage(argv);
+ break;
+ case 'p':
+ type++;
+ case 'e':
+ type++;
+ case 'u':
+ type++;
+ case 'm':
+ type++;
+ case 's':
+ type++;
+ if (split_type != SPLIT_NONE)
+ die("Only one type of split is allowed");
+ count = atoi(optarg);
+ if (count <= 0)
+ die("Units must be greater than 0");
+ split_type = type;
+
+ /* Spliting by pages only makes sense per cpu */
+ if (type == SPLIT_PAGES)
+ percpu = 1;
+ break;
+ case 'r':
+ repeat = 1;
+ break;
+ case 'c':
+ percpu = 1;
+ break;
+ case 'C':
+ cpu = atoi(optarg);
+ break;
+ case 'o':
+ if (output)
+ die("only one output file allowed");
+ output = strdup(optarg);
+ break;
+ case 'i':
+ input_file = optarg;
+ break;
+ default:
+ usage(argv);
+ }
+ }
+
+ ac = (argc - optind);
+
+ if (ac >= 2) {
+ optind++;
+ start = strtod(argv[optind], &endptr);
+ if (ac > 3)
+ usage(argv);
+
+ /* Make sure a true start value was entered */
+ if (*endptr != 0)
+ die("Start value not floating point: %s", argv[optind]);
+
+ start_ns = (unsigned long long)(start * 1000000000.0);
+ optind++;
+ if (ac == 3) {
+ end = strtod(argv[optind], &endptr);
+
+ /* Make sure a true end value was entered */
+ if (*endptr != 0)
+ die("End value not floating point: %s",
+ argv[optind]);
+
+ end_ns = (unsigned long long)(end * 1000000000.0);
+ if (end_ns < start_ns)
+ die("Error: end is less than start");
+ }
+ }
+
+ if (!input_file)
+ input_file = default_input_file;
+
+ handle = tracecmd_open(input_file, 0);
+ if (!handle)
+ die("error reading %s", input_file);
+
+ if (tracecmd_get_file_state(handle) == TRACECMD_FILE_CPU_LATENCY)
+ die("trace-cmd split does not work with latency traces\n");
+
+ page_size = tracecmd_page_size(handle);
+
+ if (!output)
+ output = strdup(input_file);
+
+ if (!repeat) {
+ output = realloc(output, strlen(output) + 3);
+ strcat(output, ".1");
+ }
+
+ current = start_ns;
+ output_file = malloc(strlen(output) + 50);
+ if (!output_file)
+ die("Failed to allocate for %s", output);
+ c = 1;
+
+ do {
+ if (repeat)
+ sprintf(output_file, "%s.%04d", output, c++);
+ else
+ strcpy(output_file, output);
+
+ current = parse_file(handle, output_file, start_ns, end_ns,
+ percpu, cpu, count, type);
+ if (!repeat)
+ break;
+ start_ns = 0;
+ } while (current && (!end_ns || current < end_ns));
+
+ free(output);
+ free(output_file);
+
+ tracecmd_close(handle);
+
+ return;
+}
diff --git a/tracecmd/trace-stack.c b/tracecmd/trace-stack.c
new file mode 100644
index 00000000..80364949
--- /dev/null
+++ b/tracecmd/trace-stack.c
@@ -0,0 +1,216 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <getopt.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include "tracefs.h"
+#include "trace-local.h"
+
+#define PROC_FILE "/proc/sys/kernel/stack_tracer_enabled"
+
+enum stack_type {
+ STACK_START,
+ STACK_STOP,
+ STACK_RESET,
+ STACK_REPORT
+};
+
+static void test_available(void)
+{
+ struct stat buf;
+ int fd;
+
+ fd = stat(PROC_FILE, &buf);
+ if (fd < 0)
+ die("stack tracer not configured on running kernel");
+}
+
+/* NOTE: this implementation only accepts new_status in the range [0..9]. */
+static void change_stack_tracer_status(unsigned new_status)
+{
+ char buf[1];
+ int status;
+ int ret;
+ int fd;
+ int n;
+
+ if (new_status > 9) {
+ warning("invalid status %d\n", new_status);
+ return;
+ }
+
+ ret = tracecmd_stack_tracer_status(&status);
+ if (ret < 0)
+ die("error reading %s", PROC_FILE);
+
+ if (ret > 0 && status == new_status)
+ return; /* nothing to do */
+
+ fd = open(PROC_FILE, O_WRONLY);
+ if (fd < 0)
+ die("writing %s", PROC_FILE);
+
+ buf[0] = new_status + '0';
+
+ n = write(fd, buf, 1);
+ if (n < 0)
+ die("writing into %s", PROC_FILE);
+ close(fd);
+}
+
+static void start_trace(void)
+{
+ change_stack_tracer_status(1);
+}
+
+static void stop_trace(void)
+{
+ change_stack_tracer_status(0);
+}
+
+static void reset_trace(void)
+{
+ char *path;
+ char buf[1];
+ int fd;
+ int n;
+
+ path = tracefs_get_tracing_file("stack_max_size");
+ fd = open(path, O_WRONLY);
+ if (fd < 0)
+ die("writing %s", path);
+
+ buf[0] = '0';
+ n = write(fd, buf, 1);
+ if (n < 0)
+ die("writing into %s", path);
+ tracefs_put_tracing_file(path);
+ close(fd);
+}
+
+static void read_trace(void)
+{
+ char *buf = NULL;
+ int status;
+ char *path;
+ FILE *fp;
+ size_t n;
+ int r;
+
+ if (tracecmd_stack_tracer_status(&status) <= 0)
+ die("Invalid stack tracer state");
+
+ if (status > 0)
+ printf("(stack tracer running)\n");
+ else
+ printf("(stack tracer not running)\n");
+
+ path = tracefs_get_tracing_file("stack_trace");
+ fp = fopen(path, "r");
+ if (!fp)
+ die("reading to '%s'", path);
+ tracefs_put_tracing_file(path);
+
+ while ((r = getline(&buf, &n, fp)) >= 0) {
+ /*
+ * Skip any line that starts with a '#'.
+ * Those talk about how to enable stack tracing
+ * within the debugfs system. We don't care about that.
+ */
+ if (buf[0] != '#')
+ printf("%s", buf);
+
+ free(buf);
+ buf = NULL;
+ }
+
+ fclose(fp);
+}
+
+enum {
+ OPT_verbose = 252,
+ OPT_reset = 253,
+ OPT_stop = 254,
+ OPT_start = 255,
+};
+
+void trace_stack (int argc, char **argv)
+{
+ enum stack_type trace_type = STACK_REPORT;
+ int c;
+
+ if (argc < 2)
+ usage(argv);
+
+ if (strcmp(argv[1], "stack") != 0)
+ usage(argv);
+
+ for (;;) {
+ int option_index = 0;
+ static struct option long_options[] = {
+ {"start", no_argument, NULL, OPT_start},
+ {"stop", no_argument, NULL, OPT_stop},
+ {"reset", no_argument, NULL, OPT_reset},
+ {"help", no_argument, NULL, '?'},
+ {"verbose", optional_argument, NULL, OPT_verbose},
+ {NULL, 0, NULL, 0}
+ };
+
+ c = getopt_long (argc-1, argv+1, "+h?",
+ long_options, &option_index);
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'h':
+ usage(argv);
+ break;
+ case OPT_start:
+ trace_type = STACK_START;
+ break;
+ case OPT_stop:
+ trace_type = STACK_STOP;
+ break;
+ case OPT_reset:
+ trace_type = STACK_RESET;
+ break;
+ case OPT_verbose:
+ if (trace_set_verbose(optarg) < 0)
+ die("invalid verbose level %s", optarg);
+ break;
+ default:
+ usage(argv);
+ }
+ }
+
+ test_available();
+
+ switch (trace_type) {
+ case STACK_START:
+ start_trace();
+ break;
+ case STACK_STOP:
+ stop_trace();
+ break;
+ case STACK_RESET:
+ reset_trace();
+ break;
+ default:
+ read_trace();
+ break;
+ }
+
+ return;
+}
diff --git a/tracecmd/trace-stat.c b/tracecmd/trace-stat.c
new file mode 100644
index 00000000..a5fb777b
--- /dev/null
+++ b/tracecmd/trace-stat.c
@@ -0,0 +1,926 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2014 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <ctype.h>
+
+#include "tracefs.h"
+#include "trace-local.h"
+
+#ifndef BUFSIZ
+#define BUFSIZ 1024
+#endif
+
+static inline int is_top_instance(struct buffer_instance *instance)
+{
+ return instance == &top_instance;
+}
+
+static int get_instance_file_fd(struct buffer_instance *instance,
+ const char *file)
+{
+ char *path;
+ int fd;
+
+ path = tracefs_instance_get_file(instance->tracefs, file);
+ fd = open(path, O_RDONLY);
+ tracefs_put_tracing_file(path);
+
+ return fd;
+}
+
+char *strstrip(char *str)
+{
+ char *s;
+
+ if (!str)
+ return NULL;
+
+ s = str + strlen(str) - 1;
+ while (s >= str && isspace(*s))
+ s--;
+ s++;
+ *s = '\0';
+
+ for (s = str; *s && isspace(*s); s++)
+ ;
+
+ return s;
+}
+
+/* FIXME: append_file() is duplicated and could be consolidated */
+char *append_file(const char *dir, const char *name)
+{
+ char *file;
+ int ret;
+
+ ret = asprintf(&file, "%s/%s", dir, name);
+ if (ret < 0)
+ die("Failed to allocate %s/%s", dir, name);
+
+ return file;
+}
+
+static char *get_fd_content(int fd, const char *file)
+{
+ char *str = NULL;
+ int cnt = 0;
+ int ret;
+
+ for (;;) {
+ str = realloc(str, BUFSIZ * ++cnt);
+ if (!str)
+ die("malloc");
+ ret = read(fd, str + BUFSIZ * (cnt - 1), BUFSIZ);
+ if (ret < 0)
+ die("reading %s\n", file);
+ if (ret < BUFSIZ)
+ break;
+ }
+ str[BUFSIZ * (cnt-1) + ret] = 0;
+
+ return str;
+}
+
+char *get_file_content(const char *file)
+{
+ char *str;
+ int fd;
+
+ fd = open(file, O_RDONLY);
+ if (fd < 0)
+ return NULL;
+
+ str = get_fd_content(fd, file);
+ close(fd);
+
+ return str;
+}
+
+static char *get_instance_file_content(struct buffer_instance *instance,
+ const char *file)
+{
+ char *str = NULL;
+ int fd;
+
+ fd = get_instance_file_fd(instance, file);
+ if (fd < 0)
+ return NULL;
+
+ str = get_fd_content(fd, file);
+
+ close(fd);
+ return str;
+}
+
+static void report_file(struct buffer_instance *instance,
+ char *name, char *def_value, char *description)
+{
+ char *str;
+ char *cont;
+
+ if (!tracefs_file_exists(instance->tracefs, name))
+ return;
+ str = get_instance_file_content(instance, name);
+ if (!str)
+ return;
+ cont = strstrip(str);
+ if (cont[0] && strcmp(cont, def_value) != 0)
+ printf("\n%s%s\n", description, cont);
+
+ free(str);
+}
+
+static int report_instance(const char *name, void *data)
+{
+ bool *first = (bool *)data;
+
+ if (*first) {
+ *first = false;
+ printf("\nInstances:\n");
+ }
+ printf(" %s\n", name);
+ return 0;
+}
+
+static void report_instances(void)
+{
+ bool first = true;
+
+ tracefs_instances_walk(report_instance, &first);
+}
+
+struct event_iter *trace_event_iter_alloc(const char *path)
+{
+ struct event_iter *iter;
+
+ iter = malloc(sizeof(*iter));
+ if (!iter)
+ die("Failed to allocate event_iter for path %s", path);
+ memset(iter, 0, sizeof(*iter));
+
+ iter->system_dir = opendir(path);
+ if (!iter->system_dir)
+ die("opendir");
+
+ return iter;
+}
+
+enum event_iter_type
+trace_event_iter_next(struct event_iter *iter, const char *path, const char *system)
+{
+ struct dirent *dent;
+
+ if (system && !iter->event_dir) {
+ char *event;
+ struct stat st;
+
+ event = append_file(path, system);
+
+ stat(event, &st);
+ if (!S_ISDIR(st.st_mode)) {
+ free(event);
+ goto do_system;
+ }
+
+ iter->event_dir = opendir(event);
+ if (!iter->event_dir)
+ die("opendir %s", event);
+ free(event);
+ }
+
+ if (iter->event_dir) {
+ while ((dent = readdir(iter->event_dir))) {
+ const char *name = dent->d_name;
+
+ if (strcmp(name, ".") == 0 ||
+ strcmp(name, "..") == 0)
+ continue;
+
+ iter->event_dent = dent;
+ return EVENT_ITER_EVENT;
+ }
+ closedir(iter->event_dir);
+ iter->event_dir = NULL;
+ }
+
+ do_system:
+ while ((dent = readdir(iter->system_dir))) {
+ const char *name = dent->d_name;
+
+ if (strcmp(name, ".") == 0 ||
+ strcmp(name, "..") == 0)
+ continue;
+
+ iter->system_dent = dent;
+
+ return EVENT_ITER_SYSTEM;
+ }
+
+ return EVENT_ITER_NONE;
+}
+
+void trace_event_iter_free(struct event_iter *iter)
+{
+ if (!iter)
+ return;
+
+ if (iter->event_dir)
+ closedir(iter->event_dir);
+
+ closedir(iter->system_dir);
+ free(iter);
+}
+
+static void reset_event_iter(struct event_iter *iter)
+{
+ if (iter->event_dir) {
+ closedir(iter->event_dir);
+ iter->event_dir = NULL;
+ }
+
+ rewinddir(iter->system_dir);
+}
+
+static int process_individual_events(const char *path, struct event_iter *iter)
+{
+ struct stat st;
+ const char *system = iter->system_dent->d_name;
+ char *file;
+ char *enable = NULL;
+ char *str;
+ int ret = 0;
+
+ file = append_file(path, system);
+
+ stat(file, &st);
+ if (!S_ISDIR(st.st_mode))
+ goto out;
+
+ enable = append_file(file, "enable");
+ str = get_file_content(enable);
+ if (!str)
+ goto out;
+
+ if (*str != '1' && *str != '0')
+ ret = 1;
+ free(str);
+
+ out:
+ free(enable);
+ free(file);
+
+ return ret;
+}
+
+static void
+process_event_enable(char *path, const char *system, const char *name,
+ enum event_process *processed)
+{
+ struct stat st;
+ char *enable = NULL;
+ char *file;
+ char *str;
+
+ if (system)
+ path = append_file(path, system);
+
+ file = append_file(path, name);
+
+ if (system)
+ free(path);
+
+ stat(file, &st);
+ if (!S_ISDIR(st.st_mode))
+ goto out;
+
+ enable = append_file(file, "enable");
+ str = get_file_content(enable);
+ if (!str)
+ goto out;
+
+ if (*str == '1') {
+ if (!system) {
+ if (!*processed)
+ printf(" Individual systems:\n");
+ printf( " %s\n", name);
+ *processed = PROCESSED_SYSTEM;
+ } else {
+ if (!*processed) {
+ printf(" Individual events:\n");
+ *processed = PROCESSED_SYSTEM;
+ }
+ if (*processed == PROCESSED_SYSTEM) {
+ printf(" %s\n", system);
+ *processed = PROCESSED_EVENT;
+ }
+ printf( " %s\n", name);
+ }
+ }
+ free(str);
+
+ out:
+ free(enable);
+ free(file);
+}
+
+static void report_events(struct buffer_instance *instance)
+{
+ struct event_iter *iter;
+ char *str;
+ char *cont;
+ char *path;
+ char *system;
+ enum event_iter_type type;
+ enum event_process processed = PROCESSED_NONE;
+ enum event_process processed_part = PROCESSED_NONE;
+
+ str = get_instance_file_content(instance, "events/enable");
+ if (!str)
+ return;
+
+ cont = strstrip(str);
+
+ printf("\nEvents:\n");
+
+ switch(*cont) {
+ case '1':
+ printf(" All enabled\n");
+ free(str);
+ return;
+ case '0':
+ printf(" All disabled\n");
+ free(str);
+ return;
+ }
+
+ free(str);
+
+ path = tracefs_instance_get_file(instance->tracefs, "events");
+ if (!path)
+ die("malloc");
+
+ iter = trace_event_iter_alloc(path);
+
+ while (trace_event_iter_next(iter, path, NULL)) {
+ process_event_enable(path, NULL, iter->system_dent->d_name, &processed);
+ }
+
+ reset_event_iter(iter);
+
+ system = NULL;
+ while ((type = trace_event_iter_next(iter, path, system))) {
+
+ if (type == EVENT_ITER_SYSTEM) {
+
+ /* Only process systems that are not fully enabled */
+ if (!process_individual_events(path, iter))
+ continue;
+
+ system = iter->system_dent->d_name;
+ if (processed_part)
+ processed_part = PROCESSED_SYSTEM;
+ continue;
+ }
+
+ process_event_enable(path, iter->system_dent->d_name,
+ iter->event_dent->d_name, &processed_part);
+ }
+
+ trace_event_iter_free(iter);
+
+ if (!processed && !processed_part)
+ printf(" (none enabled)\n");
+
+ tracefs_put_tracing_file(path);
+}
+
+static void
+process_event_filter(char *path, struct event_iter *iter, enum event_process *processed)
+{
+ const char *system = iter->system_dent->d_name;
+ const char *event = iter->event_dent->d_name;
+ struct stat st;
+ char *filter = NULL;
+ char *file;
+ char *str;
+ char *cont;
+
+ path = append_file(path, system);
+ file = append_file(path, event);
+ free(path);
+
+ stat(file, &st);
+ if (!S_ISDIR(st.st_mode))
+ goto out;
+
+ filter = append_file(file, "filter");
+ str = get_file_content(filter);
+ if (!str)
+ goto out;
+
+ cont = strstrip(str);
+
+ if (strcmp(cont, "none") == 0) {
+ free(str);
+ goto out;
+ }
+
+ if (!*processed)
+ printf("\nFilters:\n");
+ printf( " %s:%s \"%s\"\n", system, event, cont);
+ *processed = PROCESSED_SYSTEM;
+ free(str);
+
+ out:
+ free(filter);
+ free(file);
+}
+
+static void report_event_filters(struct buffer_instance *instance)
+{
+ struct event_iter *iter;
+ char *path;
+ char *system;
+ enum event_iter_type type;
+ enum event_process processed = PROCESSED_NONE;
+
+ path = tracefs_instance_get_file(instance->tracefs, "events");
+ if (!path)
+ die("malloc");
+
+ iter = trace_event_iter_alloc(path);
+
+ processed = PROCESSED_NONE;
+ system = NULL;
+ while ((type = trace_event_iter_next(iter, path, system))) {
+
+ if (type == EVENT_ITER_SYSTEM) {
+ system = iter->system_dent->d_name;
+ continue;
+ }
+
+ process_event_filter(path, iter, &processed);
+ }
+
+ trace_event_iter_free(iter);
+
+ tracefs_put_tracing_file(path);
+}
+
+static void
+process_event_trigger(char *path, struct event_iter *iter, enum event_process *processed)
+{
+ const char *system = iter->system_dent->d_name;
+ const char *event = iter->event_dent->d_name;
+ struct stat st;
+ char *trigger = NULL;
+ char *file;
+ char *str;
+ char *cont;
+
+ path = append_file(path, system);
+ file = append_file(path, event);
+ free(path);
+
+ stat(file, &st);
+ if (!S_ISDIR(st.st_mode))
+ goto out;
+
+ trigger = append_file(file, "trigger");
+ str = get_file_content(trigger);
+ if (!str)
+ goto out;
+
+ cont = strstrip(str);
+
+ if (cont[0] == '#') {
+ free(str);
+ goto out;
+ }
+
+ if (!*processed)
+ printf("\nTriggers:\n");
+ printf( " %s:%s \"%s\"\n", system, event, cont);
+ *processed = PROCESSED_SYSTEM;
+ free(str);
+
+ out:
+ free(trigger);
+ free(file);
+}
+
+static void report_event_triggers(struct buffer_instance *instance)
+{
+ struct event_iter *iter;
+ char *path;
+ char *system;
+ enum event_iter_type type;
+ enum event_process processed = PROCESSED_NONE;
+
+ path = tracefs_instance_get_file(instance->tracefs, "events");
+ if (!path)
+ die("malloc");
+
+ iter = trace_event_iter_alloc(path);
+
+ processed = PROCESSED_NONE;
+ system = NULL;
+ while ((type = trace_event_iter_next(iter, path, system))) {
+
+ if (type == EVENT_ITER_SYSTEM) {
+ system = iter->system_dent->d_name;
+ continue;
+ }
+
+ process_event_trigger(path, iter, &processed);
+ }
+
+ trace_event_iter_free(iter);
+
+ tracefs_put_tracing_file(path);
+}
+
+enum func_states {
+ FUNC_STATE_START,
+ FUNC_STATE_SKIP,
+ FUNC_STATE_PRINT,
+};
+
+static void list_functions(const char *path, char *string)
+{
+ enum func_states state;
+ struct stat st;
+ char *str;
+ int ret = 0;
+ int len;
+ int i;
+ int first = 0;
+
+ /* Ignore if it does not exist. */
+ ret = stat(path, &st);
+ if (ret < 0)
+ return;
+
+ str = get_file_content(path);
+ if (!str)
+ return;
+
+ len = strlen(str);
+
+ state = FUNC_STATE_START;
+
+ /* Skip all lines that start with '#' */
+ for (i = 0; i < len; i++) {
+
+ if (state == FUNC_STATE_PRINT)
+ putchar(str[i]);
+
+ if (str[i] == '\n') {
+ state = FUNC_STATE_START;
+ continue;
+ }
+
+ if (state == FUNC_STATE_SKIP)
+ continue;
+
+ if (state == FUNC_STATE_START && str[i] == '#') {
+ state = FUNC_STATE_SKIP;
+ continue;
+ }
+
+ if (!first) {
+ printf("\n%s:\n", string);
+ first = 1;
+ }
+
+ if (state != FUNC_STATE_PRINT) {
+ state = FUNC_STATE_PRINT;
+ printf(" ");
+ putchar(str[i]);
+ }
+ }
+ free(str);
+}
+
+static void report_graph_funcs(struct buffer_instance *instance)
+{
+ char *path;
+
+ path = tracefs_instance_get_file(instance->tracefs, "set_graph_function");
+ if (!path)
+ die("malloc");
+
+ list_functions(path, "Function Graph Filter");
+
+ tracefs_put_tracing_file(path);
+
+ path = tracefs_instance_get_file(instance->tracefs, "set_graph_notrace");
+ if (!path)
+ die("malloc");
+
+ list_functions(path, "Function Graph No Trace");
+
+ tracefs_put_tracing_file(path);
+}
+
+static void report_ftrace_filters(struct buffer_instance *instance)
+{
+ char *path;
+
+ path = tracefs_instance_get_file(instance->tracefs, "set_ftrace_filter");
+ if (!path)
+ die("malloc");
+
+ list_functions(path, "Function Filter");
+
+ tracefs_put_tracing_file(path);
+
+ path = tracefs_instance_get_file(instance->tracefs, "set_ftrace_notrace");
+ if (!path)
+ die("malloc");
+
+ list_functions(path, "Function No Trace");
+
+ tracefs_put_tracing_file(path);
+}
+
+static void report_buffers(struct buffer_instance *instance)
+{
+#define FILE_SIZE 100
+ char *str;
+ char *cont;
+ char file[FILE_SIZE];
+ int cpu;
+
+ str = get_instance_file_content(instance, "buffer_size_kb");
+ if (!str)
+ return;
+
+ cont = strstrip(str);
+
+ /* If it's not expanded yet, just skip */
+ if (strstr(cont, "expanded") != NULL)
+ goto out;
+
+ if (strcmp(cont, "X") != 0) {
+ printf("\nBuffer size in kilobytes (per cpu):\n");
+ printf(" %s\n", str);
+ goto total;
+ }
+
+ /* Read the sizes of each CPU buffer */
+ for (cpu = 0; ; cpu++) {
+
+ snprintf(file, FILE_SIZE, "per_cpu/cpu%d/buffer_size_kb", cpu);
+ str = get_instance_file_content(instance, file);
+ if (!str)
+ break;
+
+ cont = strstrip(str);
+ if (!cpu)
+ putchar('\n');
+
+ printf("CPU %d buffer size (kb): %s\n", cpu, cont);
+ free(str);
+ }
+
+ total:
+ free(str);
+
+ str = get_instance_file_content(instance, "buffer_total_size_kb");
+ if (!str)
+ return;
+
+ cont = strstrip(str);
+ printf("\nBuffer total size in kilobytes:\n");
+ printf(" %s\n", str);
+
+ out:
+ free(str);
+}
+
+static void report_clock(struct buffer_instance *instance)
+{
+ struct tracefs_instance *tracefs = instance ? instance->tracefs : NULL;
+ char *clock;
+
+ clock = tracefs_get_clock(tracefs);
+
+ /* Default clock is "local", only show others */
+ if (clock && strcmp(clock, "local") != 0)
+ printf("\nClock: %s\n", clock);
+
+ free(clock);
+}
+
+static void report_cpumask(struct buffer_instance *instance)
+{
+ char *str;
+ char *cont;
+ int cpus;
+ int n;
+ int i;
+
+ str = get_instance_file_content(instance, "tracing_cpumask");
+ if (!str)
+ return;
+
+ cont = strstrip(str);
+
+ /* check to make sure all CPUs on this machine are set */
+ cpus = tracecmd_count_cpus();
+
+ for (i = strlen(cont) - 1; i >= 0 && cpus > 0; i--) {
+ if (cont[i] == ',')
+ continue;
+
+ if (cont[i] == 'f') {
+ cpus -= 4;
+ continue;
+ }
+
+ if (cpus >= 4)
+ break;
+
+ if (cont[i] >= '0' && cont[i] <= '9')
+ n = cont[i] - '0';
+ else
+ n = 10 + (cont[i] - 'a');
+
+ while (cpus > 0) {
+ if (!(n & 1))
+ break;
+ n >>= 1;
+ cpus--;
+ }
+ break;
+ }
+
+ /* If cpus is greater than zero, one isn't set */
+ if (cpus > 0)
+ printf("\nCPU mask: %s\n", cont);
+
+ free(str);
+}
+
+static void report_probes(struct buffer_instance *instance,
+ const char *file, const char *string)
+{
+ char *str;
+ char *cont;
+ int newline;
+ int i;
+
+ str = get_instance_file_content(instance, file);
+ if (!str)
+ return;
+
+ cont = strstrip(str);
+ if (strlen(cont) == 0)
+ goto out;
+
+ printf("\n%s:\n", string);
+
+ newline = 1;
+ for (i = 0; cont[i]; i++) {
+ if (newline)
+ printf(" ");
+ putchar(cont[i]);
+ if (cont[i] == '\n')
+ newline = 1;
+ else
+ newline = 0;
+ }
+ putchar('\n');
+ out:
+ free(str);
+}
+
+static void report_kprobes(struct buffer_instance *instance)
+{
+ report_probes(instance, "kprobe_events", "Kprobe events");
+}
+
+static void report_uprobes(struct buffer_instance *instance)
+{
+ report_probes(instance, "uprobe_events", "Uprobe events");
+}
+
+static void report_traceon(struct buffer_instance *instance)
+{
+ char *str;
+ char *cont;
+
+ str = get_instance_file_content(instance, "tracing_on");
+ if (!str)
+ return;
+
+ cont = strstrip(str);
+
+ /* double newline as this is the last thing printed */
+ if (strcmp(cont, "0") == 0)
+ printf("\nTracing is disabled\n\n");
+ else
+ printf("\nTracing is enabled\n\n");
+
+ free(str);
+}
+
+static void stat_instance(struct buffer_instance *instance, bool opt)
+{
+ if (instance != &top_instance) {
+ if (instance != first_instance)
+ printf("---------------\n");
+ printf("Instance: %s\n",
+ tracefs_instance_get_name(instance->tracefs));
+ }
+
+ report_file(instance, "current_tracer", "nop", "Tracer: ");
+ report_events(instance);
+ report_event_filters(instance);
+ report_event_triggers(instance);
+ report_ftrace_filters(instance);
+ report_graph_funcs(instance);
+ report_buffers(instance);
+ report_clock(instance);
+ report_cpumask(instance);
+ report_file(instance, "tracing_max_latency", "0", "Max Latency: ");
+ report_kprobes(instance);
+ report_uprobes(instance);
+ report_file(instance, "set_event_pid", "", "Filtered event PIDs:\n");
+ report_file(instance, "set_ftrace_pid", "no pid",
+ "Filtered function tracer PIDs:\n");
+ if (opt) {
+ printf("\nOptions:\n");
+ show_options(" ", instance);
+ }
+ report_traceon(instance);
+ report_file(instance, "error_log", "", "Error log:\n");
+ if (instance == &top_instance)
+ report_instances();
+}
+
+void trace_stat (int argc, char **argv)
+{
+ struct buffer_instance *instance = &top_instance;
+ bool opt = false;
+ int topt = 0;
+ int status;
+ int c;
+
+ init_top_instance();
+
+ for (;;) {
+ c = getopt(argc-1, argv+1, "htoB:");
+ if (c == -1)
+ break;
+ switch (c) {
+ case 'h':
+ usage(argv);
+ break;
+ case 'B':
+ instance = allocate_instance(optarg);
+ if (!instance)
+ die("Failed to create instance");
+ add_instance(instance, tracecmd_count_cpus());
+ /* top instance requires direct access */
+ if (!topt && is_top_instance(first_instance))
+ first_instance = instance;
+ break;
+ case 't':
+ /* Force to use top instance */
+ topt = 1;
+ instance = &top_instance;
+ break;
+ case 'o':
+ opt = 1;
+ break;
+ default:
+ usage(argv);
+ }
+ }
+
+ update_first_instance(instance, topt);
+
+ for_all_instances(instance) {
+ stat_instance(instance, opt);
+ }
+
+ if (tracecmd_stack_tracer_status(&status) >= 0) {
+ if (status > 0)
+ printf("Stack tracing is enabled\n\n");
+ } else {
+ printf("Error reading stack tracer status\n\n");
+ }
+
+ exit(0);
+}
diff --git a/tracecmd/trace-stream.c b/tracecmd/trace-stream.c
new file mode 100644
index 00000000..ee310f3d
--- /dev/null
+++ b/tracecmd/trace-stream.c
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2014 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+
+#include <sys/time.h>
+#include <sys/types.h>
+
+#include "trace-local.h"
+
+/*
+ * Stream runs for a single machine. We are going to cheat
+ * and use the trace-output and trace-input code to create
+ * our pevent. First just create a trace.dat file and then read
+ * it to create the pevent and handle.
+ */
+struct tracecmd_input *
+trace_stream_init(struct buffer_instance *instance, int cpu, int fd, int cpus,
+ struct hook_list *hooks,
+ tracecmd_handle_init_func handle_init, int global)
+{
+ struct tracecmd_input *trace_input;
+ struct tracecmd_output *trace_output;
+ static FILE *fp = NULL;
+ static int tfd;
+ static int ofd;
+ long flags;
+
+ if (instance->handle) {
+ trace_input = instance->handle;
+ goto make_pipe;
+ }
+
+ if (!fp) {
+ fp = tmpfile();
+ if (!fp)
+ return NULL;
+ tfd = fileno(fp);
+
+ ofd = dup(tfd);
+ trace_output = tracecmd_output_create_fd(ofd);
+ if (!trace_output) {
+ fclose(fp);
+ return NULL;
+ }
+ tracecmd_output_write_headers(trace_output, NULL);
+ tracecmd_output_free(trace_output);
+ }
+
+ lseek(ofd, 0, SEEK_SET);
+
+ trace_input = tracecmd_alloc_fd(ofd, 0);
+ if (!trace_input) {
+ close(ofd);
+ goto fail;
+ }
+
+ if (tracecmd_read_headers(trace_input, TRACECMD_FILE_PRINTK) < 0)
+ goto fail_free_input;
+
+ if (handle_init)
+ handle_init(trace_input, hooks, global);
+
+ make_pipe:
+ /* Do not block on this pipe */
+ flags = fcntl(fd, F_GETFL);
+ fcntl(fd, F_SETFL, flags | O_NONBLOCK);
+
+ if (tracecmd_make_pipe(trace_input, cpu, fd, cpus) < 0)
+ goto fail_free_input;
+
+ instance->handle = trace_input;
+
+ return trace_input;
+
+ fail_free_input:
+ tracecmd_close(trace_input);
+ fail:
+ fclose(fp);
+
+ return NULL;
+}
+
+int trace_stream_read(struct pid_record_data *pids, int nr_pids, struct timeval *tv)
+{
+ struct tep_record *record;
+ struct pid_record_data *pid;
+ struct pid_record_data *last_pid;
+ fd_set rfds;
+ int top_rfd = 0;
+ int nr_fd;
+ int ret;
+ int i;
+
+ last_pid = NULL;
+
+ again:
+ for (i = 0; i < nr_pids; i++) {
+ pid = &pids[i];
+
+ if (!pid->record)
+ pid->record = tracecmd_read_data(pid->instance->handle, pid->cpu);
+ record = pid->record;
+ if (!record && errno == EINVAL)
+ /* pipe has closed */
+ pid->closed = 1;
+
+ if (record &&
+ (!last_pid || record->ts < last_pid->record->ts))
+ last_pid = pid;
+ }
+ if (last_pid) {
+ trace_show_data(last_pid->instance->handle, last_pid->record);
+ tracecmd_free_record(last_pid->record);
+ last_pid->record = NULL;
+ return 1;
+ }
+
+ nr_fd = 0;
+ FD_ZERO(&rfds);
+
+ for (i = 0; i < nr_pids; i++) {
+ /* Do not process closed pipes */
+ if (pids[i].closed)
+ continue;
+ nr_fd++;
+ if (pids[i].brass[0] > top_rfd)
+ top_rfd = pids[i].brass[0];
+
+ FD_SET(pids[i].brass[0], &rfds);
+ }
+
+ if (!nr_fd)
+ return 0;
+
+ ret = select(top_rfd + 1, &rfds, NULL, NULL, tv);
+
+ if (ret > 0)
+ goto again;
+
+ return ret;
+}
diff --git a/tracecmd/trace-usage.c b/tracecmd/trace-usage.c
new file mode 100644
index 00000000..2cfa64f5
--- /dev/null
+++ b/tracecmd/trace-usage.c
@@ -0,0 +1,492 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <libgen.h>
+
+#include "trace-local.h"
+#include "version.h"
+
+struct usage_help {
+ char *name;
+ char *short_help;
+ char *long_help;
+};
+
+static struct usage_help usage_help[] = {
+ {
+ "record",
+ "record a trace into a trace.dat file",
+ " %s record [-v][-e event [-f filter]][-p plugin][-F][-d][-D][-o file] \\\n"
+ " [-q][-s usecs][-O option ][-l func][-g func][-n func] \\\n"
+ " [-P pid][-N host:port][-t][-r prio][-b size][-B buf][command ...]\n"
+ " [-m max][-C clock]\n"
+ " -e run command with event enabled\n"
+ " -f filter for previous -e event\n"
+ " -R trigger for previous -e event\n"
+ " -p run command with plugin enabled\n"
+ " -F filter only on the given process\n"
+ " -P trace the given pid like -F for the command\n"
+ " -c also trace the children of -F (or -P if kernel supports it)\n"
+ " -C set the trace clock\n"
+ " -T do a stacktrace on all events\n"
+ " -l filter function name\n"
+ " -g set graph function\n"
+ " -n do not trace function\n"
+ " -m max size per CPU in kilobytes\n"
+ " -M set CPU mask to trace\n"
+ " -v will negate all -e (disable those events) and -B (delete those instances) after it\n"
+ " -d disable function tracer when running\n"
+ " -D Full disable of function tracing (for all users)\n"
+ " -o data output file [default trace.dat]\n"
+ " -O option to enable (or disable)\n"
+ " -r real time priority to run the capture threads\n"
+ " -s sleep interval between recording (in usecs) [default: 1000]\n"
+ " -S used with --profile, to enable only events in command line\n"
+ " -N host:port to connect to (see listen)\n"
+ " -V cid:port to connect to via vsocket (see listen)\n"
+ " -t used with -N, forces use of tcp in live trace\n"
+ " -b change kernel buffersize (in kilobytes per CPU)\n"
+ " -B create sub buffer and following events will be enabled here\n"
+ " -k do not reset the buffers after tracing.\n"
+ " -i do not fail if an event is not found\n"
+ " -q print no output to the screen\n"
+ " -G when profiling, set soft and hard irqs as global\n"
+ " --quiet print no output to the screen\n"
+ " --module filter module name\n"
+ " --by-comm used with --profile, merge events for related comms\n"
+ " --profile enable tracing options needed for report --profile\n"
+ " --func-stack perform a stack trace for function tracer\n"
+ " (use with caution)\n"
+ " --max-graph-depth limit function_graph depth\n"
+ " --cmdlines-size change kernel saved_cmdlines_size\n"
+ " --no-filter include trace-cmd threads in the trace\n"
+ " --proc-map save the traced processes address map into the trace.dat file\n"
+ " --user execute the specified [command ...] as given user\n"
+ " --tsc2nsec Convert the current clock to nanoseconds, using tsc multiplier and shift from the Linux"
+ " kernel's perf interface\n"
+ " --tsync-interval set the loop interval, in ms, for timestamps synchronization with guests:"
+ " If a negative number is specified, timestamps synchronization is disabled"
+ " If 0 is specified, no loop is performed - timestamps offset is calculated only twice,"
+ " at the beginnig and at the end of the trace\n"
+ " --poll don't block while reading from the trace buffer\n"
+ " --name used with -A to give the agent a specific name\n"
+ " --file-version set the desired trace file version\n"
+ " --compression compress the trace output file, one of these strings can be passed:\n"
+ " any - auto select the best available compression algorithm\n"
+ " none - do not compress the trace file\n"
+ " name - the name of the desired compression algorithms\n"
+ " available algorithms can be listed with trace-cmd list -c\n"
+ },
+ {
+ "set",
+ "set a ftrace configuration parameter",
+ " %s set [-v][-e event [-f filter]][-p plugin][-F][-d][-D] \\\n"
+ " [-q][-s usecs][-O option ][-l func][-g func][-n func] \\\n"
+ " [-P pid][-b size][-B buf][-m max][-C clock][command ...]\n"
+ " -e enable event\n"
+ " -f filter for previous -e event\n"
+ " -R trigger for previous -e event\n"
+ " -p set ftrace plugin\n"
+ " -P set PIDs to be traced\n"
+ " -c also trace the children of -F (or -P if kernel supports it)\n"
+ " -C set the trace clock\n"
+ " -T do a stacktrace on all events\n"
+ " -l filter function name\n"
+ " -g set graph function\n"
+ " -n do not trace function\n"
+ " -m max size per CPU in kilobytes\n"
+ " -M set CPU mask to trace\n"
+ " -v will negate all -e (disable those events) and -B (delete those instances) after it\n"
+ " -d disable function tracer when running\n"
+ " -D Full disable of function tracing (for all users)\n"
+ " -O option to enable (or disable)\n"
+ " -b change kernel buffersize (in kilobytes per CPU)\n"
+ " -B create sub buffer and following events will be enabled here\n"
+ " -i do not fail if an event is not found\n"
+ " -q print no output to the screen\n"
+ " --quiet print no output to the screen\n"
+ " --module filter module name\n"
+ " --func-stack perform a stack trace for function tracer\n"
+ " (use with caution)\n"
+ " --max-graph-depth limit function_graph depth\n"
+ " --cmdlines-size change kernel saved_cmdlines_size\n"
+ " --user execute the specified [command ...] as given user\n"
+ " --fork return immediately if a command is specified\n"
+ " --verbose 'level' Set the desired log level\n"
+ },
+ {
+ "start",
+ "start tracing without recording into a file",
+ " %s start [-e event][-p plugin][-d][-O option ][-P pid]\n"
+ " Uses same options as record.\n"
+ " It only enables the tracing and exits\n"
+ "\n"
+ " --fork: If a command is specified, then return right after it forks\n"
+ " --verbose 'level' Set the desired log level\n"
+ },
+ {
+ "extract",
+ "extract a trace from the kernel",
+ " %s extract [-p plugin][-O option][-o file][-B buf][-s][-a][-t]\n"
+ " Uses similar options as record, but only reads an existing trace.\n"
+ " -s : extract the snapshot instead of the main buffer\n"
+ " -B : extract a given buffer (more than one may be specified)\n"
+ " -a : extract all buffers (except top one)\n"
+ " -t : extract the top level buffer (useful with -B and -a)\n"
+ " --verbose 'level' Set the desired log level\n"
+ },
+ {
+ "stop",
+ "stop the kernel from recording trace data",
+ " %s stop [-B buf [-B buf]..] [-a] [-t]\n"
+ " Stops the tracer from recording more data.\n"
+ " Used in conjunction with start\n"
+ " -B stop a given buffer (more than one may be specified)\n"
+ " -a stop all buffers (except top one)\n"
+ " -t stop the top level buffer (useful with -B or -a)\n"
+ },
+ {
+ "restart",
+ "restart the kernel trace data recording",
+ " %s restart [-B buf [-B buf]..] [-a] [-t]\n"
+ " Restarts recording after a trace-cmd stop.\n"
+ " Used in conjunction with stop\n"
+ " -B restart a given buffer (more than one may be specified)\n"
+ " -a restart all buffers (except top one)\n"
+ " -t restart the top level buffer (useful with -B or -a)\n"
+ },
+ {
+ "show",
+ "show the contents of the kernel tracing buffer",
+ " %s show [-p|-s][-c cpu][-B buf][options]\n"
+ " Basically, this is a cat of the trace file.\n"
+ " -p read the trace_pipe file instead\n"
+ " -s read the snapshot file instance\n"
+ " (Can't have both -p and -s)\n"
+ " -c just show the file associated with a given CPU\n"
+ " -B read from a tracing buffer instance.\n"
+ " -f display the file path that is being dumped\n"
+ " The following options shows the corresponding file name\n"
+ " and then exits.\n"
+ " --tracing_on\n"
+ " --current_tracer\n"
+ " --buffer_size (for buffer_size_kb)\n"
+ " --buffer_total_size (for buffer_total_size_kb)\n"
+ " --ftrace_filter (for set_ftrace_filter)\n"
+ " --ftrace_notrace (for set_ftrace_notrace)\n"
+ " --ftrace_pid (for set_ftrace_pid)\n"
+ " --graph_function (for set_graph_function)\n"
+ " --graph_notrace (for set_graph_notrace)\n"
+ " --cpumask (for tracing_cpumask)\n"
+ },
+ {
+ "reset",
+ "disable all kernel tracing and clear the trace buffers",
+ " %s reset [-b size][-B buf][-a][-d][-t]\n"
+ " Disables the tracer (may reset trace file)\n"
+ " Used in conjunction with start\n"
+ " -b change the kernel buffer size (in kilobytes per CPU)\n"
+ " -d delete the previous specified instance\n"
+ " -B reset the given buffer instance (may specify multiple -B)\n"
+ " -a reset all instances (except top one)\n"
+ " -t reset the top level instance (useful with -B or -a)\n"
+ },
+ {
+ "clear",
+ "clear the trace buffers",
+ " %s clear [-B buf][-a]\n"
+ " -B clear the given buffer (may specify multiple -B)\n"
+ " -a clear all existing buffers, including the top level one\n"
+ },
+ {
+ "report",
+ "read out the trace stored in a trace.dat file",
+ " %s report [-i file] [--cpu cpu] [-e][-f][-l][-P][-L][-N][-R][-E]\\\n"
+ " [-r events][-n events][-F filter][-v][-V[1-6]][-T][-O option]\n"
+ " [-H [start_system:]start_event,start_match[,pid]/[end_system:]end_event,end_match[,flags]\n"
+ " [-G]\n"
+ " -i input file [default trace.dat]\n"
+ " -e show file endianess\n"
+ " -f show function mapping list\n"
+ " -P show printk list\n"
+ " -E show event files stored\n"
+ " -F filter to filter output on\n"
+ " -I filter out events with the HARDIRQ flag set\n"
+ " -S filter out events with the SOFTIRQ flag set\n"
+ " -t print out full timestamp. Do not truncate to 6 places.\n"
+ " -R raw format: ignore print format and only show field data\n"
+ " -r raw format the events that match the option\n"
+ " -v will negate all -F after it (Not show matches)\n"
+ " -T print out the filter strings created and exit\n"
+ " -V[level] verbose (shows plugins being loaded)\n"
+ " With optional level (see --verbose numbers)\n"
+ " -L load only local (~/.trace-cmd/plugins) plugins\n"
+ " -N do not load any plugins\n"
+ " -n ignore plugin handlers for events that match the option\n"
+ " -w show wakeup latencies\n"
+ " -l show latency format (default with latency tracers)\n"
+ " -O plugin option -O [plugin:]var[=val]\n"
+ " --cpu <cpu1,cpu2,...> - filter events according to the given cpu list.\n"
+ " A range of CPUs can be specified using 'cpuX-cpuY' notation.\n"
+ " --cpus - List the CPUs that have content in it then exit.\n"
+ " --check-events return whether all event formats can be parsed\n"
+ " --stat - show the buffer stats that were reported at the end of the record.\n"
+ " --uname - show uname of the record, if it was saved\n"
+ " --version - show version used to build the trace-cmd exec that created the file\n"
+ " --profile report stats on where tasks are blocked and such\n"
+ " -G when profiling, set soft and hard irqs as global\n"
+ " -H Allows users to hook two events together for timings\n"
+ " (used with --profile)\n"
+ " --by-comm used with --profile, merge events for related comms\n"
+ " --ts-offset will add amount to timestamp of all events of the\n"
+ " previous data file.\n"
+ " --ts2secs HZ, pass in the timestamp frequency (per second)\n"
+ " to convert the displayed timestamps to seconds\n"
+ " Affects the previous data file, unless there was no\n"
+ " previous data file, in which case it becomes default\n"
+ " --ts-diff Show the delta timestamp between events.\n"
+ " --ts-check Check to make sure no time stamp on any CPU goes backwards.\n"
+ " --nodate Ignore the --date processing of trace-cmd record.\n"
+ " --raw-ts Display raw timestamps, without any corrections.\n"
+ " --align-ts Display timestamps aligned to the first event.\n"
+ " --verbose[=level] Set the desired log level\n"
+ " 0 or none - no error messages\n"
+ " 1 or crit - only critical messages\n"
+ " 2 or err - 'crit' and error messages\n"
+ " 3 or warn - 'err' and warning messages\n"
+ " 4 or info - 'warn' and informational messages\n"
+ " 5 or debug - 'info' and debugging messages\n"
+ " 6 or all - same as debug\n"
+ },
+ {
+ "stream",
+ "Start tracing and read the output directly",
+ " %s stream [-e event][-p plugin][-d][-O option ][-P pid]\n"
+ " Uses same options as record but does not write to files or the network.\n"
+ " --verbose 'level' Set the desired log level\n"
+ },
+ {
+ "profile",
+ "Start profiling and read the output directly",
+ " %s profile [-e event][-p plugin][-d][-O option ][-P pid][-G][-S][-o output]\n"
+ " [-H [start_system:]start_event,start_match[,pid]/[end_system:]end_event,end_match[,flags]\n\n"
+ " Uses same options as record --profile.\n"
+ " -H Allows users to hook two events together for timings\n"
+ " --verbose 'level' Set the desired log level\n"
+ },
+ {
+ "hist",
+ "show a histogram of the trace.dat information",
+ " %s hist [-i file][-P] [file]"
+ " -P ignore pids (compact all functions)\n"
+ },
+ {
+ "stat",
+ "show the status of the running tracing (ftrace) system",
+ " %s stat [-B buf][-t][-o]"
+ " -B show the status of a instance buffer\n"
+ " -t show the top level status along with buffer specified by -B\n"
+ " -o list tracing options\n"
+ },
+ {
+ "split",
+ "parse a trace.dat file into smaller file(s)",
+ " %s split [options] -o file [start [end]]\n"
+ " -o output file to write to (file.1, file.2, etc)\n"
+ " -s n split file up by n seconds\n"
+ " -m n split file up by n milliseconds\n"
+ " -u n split file up by n microseconds\n"
+ " -e n split file up by n events\n"
+ " -p n split file up by n pages\n"
+ " -r repeat from start to end\n"
+ " -c per cpu, that is -p 2 will be 2 pages for each CPU\n"
+ " if option is specified, it will split the file\n"
+ " up starting at start, and ending at end\n"
+ " start - decimal start time in seconds (ex: 75678.923853)\n"
+ " if left out, will start at beginning of file\n"
+ " end - decimal end time in seconds\n"
+ },
+ {
+ "options",
+ "list the plugin options available for trace-cmd report",
+ " %s options\n"
+ },
+ {
+ "listen",
+ "listen on a network socket for trace clients",
+ " %s listen -p port[-D][-o file][-d dir][-l logfile]\n"
+ " Creates a socket to listen for clients.\n"
+ " -p port number to listen on.\n"
+ " -D run in daemon mode.\n"
+ " -V listen on a vsocket instead.\n"
+ " -o file name to use for clients.\n"
+ " -d directory to store client files.\n"
+ " -l logfile to write messages to.\n"
+ " --verbose 'level' Set the desired log level\n"
+ },
+ {
+ "agent",
+ "listen on a vsocket for trace clients",
+ " %s agent -p port[-D]\n"
+ " Creates a vsocket to listen for clients.\n"
+ " -N Connect to IP via TCP instead of vsockets\n"
+ " *** Insecure setting, only use on a trusted network ***\n"
+ " *** Only use if the client is totally trusted. ***\n"
+ " -p port number to listen on.\n"
+ " -D run in daemon mode.\n"
+ " --verbose 'level' Set the desired log level\n"
+ },
+ {
+ "setup-guest",
+ "create FIFOs for tracing guest VMs",
+ " %s setup-guest [-c cpus][-p perm][-g group][-a] guest\n"
+ " -c number of guest virtual CPUs\n"
+ " -p FIFOs permissions (default: 0660)\n"
+ " -g FIFOs group owner\n"
+ " -a Attach FIFOs to guest VM config\n"
+ },
+ {
+ "list",
+ "list the available events, plugins or options",
+ " %s list [-e [regex]][-t][-o][-f [regex]]\n"
+ " -e list available events\n"
+ " -F show event format\n"
+ " --full show the print fmt with -F\n"
+ " -R show event triggers\n"
+ " -l show event filters\n"
+ " -t list available tracers\n"
+ " -o list available options\n"
+ " -f [regex] list available functions to filter on\n"
+ " -P list loaded plugin files (by path)\n"
+ " -O list plugin options\n"
+ " -B list defined buffer instances\n"
+ " -C list the defined clocks (and active one)\n"
+ " -c list the supported trace file compression algorithms\n"
+ },
+ {
+ "restore",
+ "restore a crashed record",
+ " %s restore [-c][-o file][-i file] cpu-file [cpu-file ...]\n"
+ " -c create a partial trace.dat file only\n"
+ " -o output file\n"
+ " -i partial trace.dat file for input\n"
+ },
+ {
+ "snapshot",
+ "take snapshot of running trace",
+ " %s snapshot [-s][-r][-f][-B buf][-c cpu]\n"
+ " -s take a snapshot of the trace buffer\n"
+ " -r reset current snapshot\n"
+ " -f free the snapshot buffer\n"
+ " without the above three options, display snapshot\n"
+ " -c operate on the snapshot buffer for the given CPU\n"
+ " -B operate on the snapshot buffer for a tracing buffer instance.\n"
+ },
+ {
+ "stack",
+ "output, enable or disable kernel stack tracing",
+ " %s stack [--start][--stop][--reset]\n"
+ " --start enable the stack tracer\n"
+ " --stop disable the stack tracer\n"
+ " --reset reset the maximum stack found\n"
+ " --verbose 'level' Set the desired log level\n"
+ },
+ {
+ "check-events",
+ "parse trace event formats",
+ " %s check-events [-N]\n"
+ " -N do not load any plugins\n"
+ " --verbose 'level' Set the desired log level\n"
+ },
+ {
+ "dump",
+ "read out the meta data from a trace file",
+ " %s dump [options]\n"
+ " -i input file, default is trace.dat\n"
+ " -v validate a trace file\n"
+ " --all print all meta data from a trace file\n"
+ " --summary print a meta data summary\n"
+ " --head-page print header page information\n"
+ " --head-event print header event information\n"
+ " --ftrace-events print ftrace events format\n"
+ " --systems print recorded event systems\n"
+ " --events print format of recorded events\n"
+ " --kallsyms print information of the mapping of function addresses to the function names\n"
+ " --printk print trace_printk() format strings\n"
+ " --cmd-lines print information mapping a PID to a process name\n"
+ " --options print options\n"
+ " --flyrecord information of offset and count of recorded events per CPU\n"
+ " --clock trace clock, saved in the file\n"
+ " -h, --help show usage information\n"
+ " --verbose 'level' Set the desired log level\n"
+ },
+ {
+ "convert",
+ "convert trace file to different version",
+ " %s convert [options]\n"
+ " -i input file, default is trace.dat\n"
+ " -o output file, mandatory parameter.\n"
+ " The output file can be specified also as last argument of the command\n"
+ " --file-version set the desired trace file version\n"
+ " --compression compress the trace output file, one of these strings can be passed:\n"
+ " any - auto select the best available compression algorithm\n"
+ " none - do not compress the trace file\n"
+ " name - the name of the desired compression algorithms\n"
+ " available algorithms can be listed with trace-cmd list -c\n" },
+ {
+ NULL, NULL, NULL
+ }
+};
+
+static struct usage_help *find_help(char *cmd)
+{
+ struct usage_help *help;
+
+ help = usage_help;
+ while (help->name) {
+ if (strcmp(cmd, help->name) == 0)
+ return help;
+ help++;
+ }
+ return NULL;
+}
+
+void usage(char **argv)
+{
+ struct usage_help *help = NULL;
+ char *arg = argv[0];
+ char *p;
+
+ p = basename(arg);
+
+ printf("\n"
+ "%s version %s (%s)\n\n"
+ "usage:\n", p, VERSION_STRING, VERSION_GIT);
+
+ if (argv[1])
+ help = find_help(argv[1]);
+
+ if (help) {
+ printf(help->long_help, p);
+ goto out;
+ }
+
+ printf(" %s [COMMAND] ...\n\n"
+ " commands:\n", p);
+
+ help = usage_help;
+ while (help->name) {
+ printf(" %s - %s\n", help->name, help->short_help);
+ help++;
+ }
+ out:
+ printf("\n");
+ exit(-1);
+}
+
+
+void trace_usage(int argc, char **argv)
+{
+ usage(argv);
+}
diff --git a/tracecmd/trace-vm.c b/tracecmd/trace-vm.c
new file mode 100644
index 00000000..57dbef8d
--- /dev/null
+++ b/tracecmd/trace-vm.c
@@ -0,0 +1,388 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2008, 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ * Copyright (C) 2020, VMware, Tzvetomir Stoyanov <tz.stoyanov@gmail.com>
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <limits.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include "trace-local.h"
+#include "trace-msg.h"
+
+static struct trace_guest *guests;
+static size_t guests_len;
+
+static struct trace_guest *get_guest_by_cid(unsigned int guest_cid)
+{
+ int i;
+
+ if (!guests)
+ return NULL;
+
+ for (i = 0; i < guests_len; i++)
+ if (guest_cid == guests[i].cid)
+ return guests + i;
+ return NULL;
+}
+
+static struct trace_guest *get_guest_by_name(const char *name)
+{
+ int i;
+
+ if (!guests)
+ return NULL;
+
+ for (i = 0; i < guests_len; i++)
+ if (strcmp(name, guests[i].name) == 0)
+ return guests + i;
+ return NULL;
+}
+
+bool trace_have_guests_pid(void)
+{
+ for (int i = 0; i < guests_len; i++) {
+ if (guests[i].pid < 0)
+ return false;
+ }
+
+ return true;
+}
+
+static struct trace_guest *add_guest(unsigned int cid, const char *name)
+{
+ guests = realloc(guests, (guests_len + 1) * sizeof(*guests));
+ if (!guests)
+ die("allocating new guest");
+ memset(&guests[guests_len], 0, sizeof(struct trace_guest));
+ guests[guests_len].name = strdup(name);
+ if (!guests[guests_len].name)
+ die("allocating guest name");
+ guests[guests_len].cid = cid;
+ guests[guests_len].pid = -1;
+ guests_len++;
+
+ return &guests[guests_len - 1];
+}
+
+static struct tracefs_instance *start_trace_connect(void)
+{
+ struct tracefs_instance *open_instance;
+
+ open_instance = tracefs_instance_create("vsock_find_pid");
+ if (!open_instance)
+ return NULL;
+
+ tracefs_event_enable(open_instance, "sched", "sched_waking");
+ tracefs_event_enable(open_instance, "kvm", "kvm_exit");
+ tracefs_trace_on(open_instance);
+ return open_instance;
+}
+
+struct pids {
+ struct pids *next;
+ int pid;
+};
+
+struct trace_fields {
+ struct tep_event *sched_waking;
+ struct tep_event *kvm_exit;
+ struct tep_format_field *common_pid;
+ struct tep_format_field *sched_next;
+ struct pids *pids;
+ int found_pid;
+};
+
+static void free_pids(struct pids *pids)
+{
+ struct pids *next;
+
+ while (pids) {
+ next = pids;
+ pids = pids->next;
+ free(next);
+ }
+}
+
+static void add_pid(struct pids **pids, int pid)
+{
+ struct pids *new_pid;
+
+ new_pid = malloc(sizeof(*new_pid));
+ if (!new_pid)
+ return;
+
+ new_pid->pid = pid;
+ new_pid->next = *pids;
+ *pids = new_pid;
+}
+
+static bool match_pid(struct pids *pids, int pid)
+{
+ while (pids) {
+ if (pids->pid == pid)
+ return true;
+ pids = pids->next;
+ }
+ return false;
+}
+
+static int callback(struct tep_event *event, struct tep_record *record, int cpu,
+ void *data)
+{
+ struct trace_fields *fields = data;
+ struct tep_handle *tep = event->tep;
+ unsigned long long val;
+ int flags;
+ int type;
+ int pid;
+ int ret;
+
+ ret = tep_read_number_field(fields->common_pid, record->data, &val);
+ if (ret < 0)
+ return 0;
+
+ flags = tep_data_flags(tep, record);
+
+ /* Ignore events in interrupts */
+ if (flags & (TRACE_FLAG_HARDIRQ | TRACE_FLAG_SOFTIRQ))
+ return 0;
+
+ /*
+ * First make sure that this event comes from a PID from
+ * this task (or a task woken by this task)
+ */
+ pid = val;
+ if (!match_pid(fields->pids, pid))
+ return 0;
+
+ type = tep_data_type(tep, record);
+
+ /*
+ * If this event is a kvm_exit, we have our PID
+ * and we can stop processing.
+ */
+ if (type == fields->kvm_exit->id) {
+ fields->found_pid = pid;
+ return -1;
+ }
+
+ if (type != fields->sched_waking->id)
+ return 0;
+
+ ret = tep_read_number_field(fields->sched_next, record->data, &val);
+ if (ret < 0)
+ return 0;
+
+ /* This is a task woken by our task or a chain of wake ups */
+ add_pid(&fields->pids, (int)val);
+ return 0;
+}
+
+static int find_tgid(int pid)
+{
+ FILE *fp;
+ char *path;
+ char *buf = NULL;
+ char *save;
+ size_t l = 0;
+ int tgid = -1;
+
+ if (asprintf(&path, "/proc/%d/status", pid) < 0)
+ return -1;
+
+ fp = fopen(path, "r");
+ free(path);
+ if (!fp)
+ return -1;
+
+ while (getline(&buf, &l, fp) > 0) {
+ char *tok;
+
+ if (strncmp(buf, "Tgid:", 5) != 0)
+ continue;
+ tok = strtok_r(buf, ":", &save);
+ if (!tok)
+ continue;
+ tok = strtok_r(NULL, ":", &save);
+ if (!tok)
+ continue;
+ while (isspace(*tok))
+ tok++;
+ tgid = strtol(tok, NULL, 0);
+ break;
+ }
+ free(buf);
+ fclose(fp);
+
+ return tgid;
+}
+
+static int stop_trace_connect(struct tracefs_instance *open_instance)
+{
+ const char *systems[] = { "kvm", "sched", NULL};
+ struct tep_handle *tep;
+ struct trace_fields trace_fields;
+ int tgid = -1;
+
+ if (!open_instance)
+ return -1;
+
+ /* The connection is finished, stop tracing, we have what we want */
+ tracefs_trace_off(open_instance);
+ tracefs_event_disable(open_instance, NULL, NULL);
+
+ tep = tracefs_local_events_system(NULL, systems);
+
+ trace_fields.sched_waking = tep_find_event_by_name(tep, "sched", "sched_waking");
+ if (!trace_fields.sched_waking)
+ goto out;
+ trace_fields.kvm_exit = tep_find_event_by_name(tep, "kvm", "kvm_exit");
+ if (!trace_fields.kvm_exit)
+ goto out;
+ trace_fields.common_pid = tep_find_common_field(trace_fields.sched_waking,
+ "common_pid");
+ if (!trace_fields.common_pid)
+ goto out;
+ trace_fields.sched_next = tep_find_any_field(trace_fields.sched_waking,
+ "pid");
+ if (!trace_fields.sched_next)
+ goto out;
+
+ trace_fields.found_pid = -1;
+ trace_fields.pids = NULL;
+ add_pid(&trace_fields.pids, getpid());
+ tracefs_iterate_raw_events(tep, open_instance, NULL, 0, callback, &trace_fields);
+ free_pids(trace_fields.pids);
+ out:
+ tracefs_instance_destroy(open_instance);
+ tracefs_instance_free(open_instance);
+
+ if (trace_fields.found_pid > 0)
+ tgid = find_tgid(trace_fields.found_pid);
+
+ return tgid;
+}
+
+/*
+ * In order to find the guest that is associated to the given cid,
+ * trace the sched_waking and kvm_exit events, connect to the cid
+ * (doesn't matter what port, use -1 to not connect to anything)
+ * and find what task gets woken up from this code and calls kvm_exit,
+ * then that is the task that is running the guest.
+ * Then look at the /proc/<guest-pid>/status file to find the task group
+ * id (Tgid), and this is the PID of the task running all the threads.
+ */
+static void find_pid_by_cid(struct trace_guest *guest)
+{
+ struct tracefs_instance *instance;
+ int fd;
+
+ instance = start_trace_connect();
+ fd = trace_vsock_open(guest->cid, -1);
+ guest->pid = stop_trace_connect(instance);
+ /* Just in case! */
+ if (fd >= 0)
+ close(fd);
+}
+
+struct trace_guest *trace_get_guest(unsigned int cid, const char *name)
+{
+ struct trace_guest *guest = NULL;
+
+ if (name) {
+ guest = get_guest_by_name(name);
+ if (guest)
+ return guest;
+ }
+
+ if (cid > 0) {
+ guest = get_guest_by_cid(cid);
+ if (!guest && name) {
+ guest = add_guest(cid, name);
+ if (guest)
+ find_pid_by_cid(guest);
+ }
+ }
+ return guest;
+}
+
+#define VM_CID_CMD "virsh dumpxml"
+#define VM_CID_LINE "<cid auto="
+#define VM_CID_ID "address='"
+static void read_guest_cid(char *name)
+{
+ struct trace_guest *guest;
+ char *cmd = NULL;
+ char line[512];
+ char *cid;
+ unsigned int cid_id = 0;
+ FILE *f;
+
+ asprintf(&cmd, "%s %s", VM_CID_CMD, name);
+ f = popen(cmd, "r");
+ free(cmd);
+ if (f == NULL)
+ return;
+
+ while (fgets(line, sizeof(line), f) != NULL) {
+ if (!strstr(line, VM_CID_LINE))
+ continue;
+ cid = strstr(line, VM_CID_ID);
+ if (!cid)
+ continue;
+ cid_id = strtol(cid + strlen(VM_CID_ID), NULL, 10);
+ if ((cid_id == INT_MIN || cid_id == INT_MAX) && errno == ERANGE)
+ continue;
+ guest = add_guest(cid_id, name);
+ if (guest)
+ find_pid_by_cid(guest);
+ break;
+ }
+
+ /* close */
+ pclose(f);
+}
+
+#define VM_NAME_CMD "virsh list --name"
+void read_qemu_guests(void)
+{
+ char name[256];
+ FILE *f;
+
+ f = popen(VM_NAME_CMD, "r");
+ if (f == NULL)
+ return;
+
+ while (fgets(name, sizeof(name), f) != NULL) {
+ if (name[0] == '\n')
+ continue;
+ if (name[strlen(name) - 1] == '\n')
+ name[strlen(name) - 1] = '\0';
+ read_guest_cid(name);
+ }
+
+ /* close */
+ pclose(f);
+}
+
+int get_guest_vcpu_pid(unsigned int guest_cid, unsigned int guest_vcpu)
+{
+ int i;
+
+ if (!guests)
+ return -1;
+
+ for (i = 0; i < guests_len; i++) {
+ if (guests[i].cpu_pid < 0 || guest_vcpu >= guests[i].cpu_max)
+ continue;
+ if (guest_cid == guests[i].cid)
+ return guests[i].cpu_pid[guest_vcpu];
+ }
+ return -1;
+}
diff --git a/tracecmd/trace-vsock.c b/tracecmd/trace-vsock.c
new file mode 100644
index 00000000..39294e7a
--- /dev/null
+++ b/tracecmd/trace-vsock.c
@@ -0,0 +1,176 @@
+#include <unistd.h>
+#include <errno.h>
+#include <arpa/inet.h>
+#include <sys/ioctl.h>
+#include <linux/vm_sockets.h>
+
+#include "trace-cmd-private.h"
+
+int __hidden trace_vsock_open(unsigned int cid, unsigned int port)
+{
+ struct sockaddr_vm addr = {
+ .svm_family = AF_VSOCK,
+ .svm_cid = cid,
+ .svm_port = port,
+ };
+ int sd;
+
+ sd = socket(AF_VSOCK, SOCK_STREAM, 0);
+ if (sd < 0)
+ return -errno;
+
+ if (connect(sd, (struct sockaddr *)&addr, sizeof(addr)))
+ return -errno;
+
+ return sd;
+}
+
+int __hidden trace_vsock_make(unsigned int port)
+{
+ struct sockaddr_vm addr = {
+ .svm_family = AF_VSOCK,
+ .svm_cid = VMADDR_CID_ANY,
+ .svm_port = port,
+ };
+ int sd;
+
+ sd = socket(AF_VSOCK, SOCK_STREAM, 0);
+ if (sd < 0)
+ return -errno;
+
+ setsockopt(sd, SOL_SOCKET, SO_REUSEADDR, &(int){1}, sizeof(int));
+
+ if (bind(sd, (struct sockaddr *)&addr, sizeof(addr)))
+ return -errno;
+
+ if (listen(sd, SOMAXCONN))
+ return -errno;
+
+ return sd;
+}
+
+int __hidden trace_vsock_make_any(void)
+{
+ return trace_vsock_make(VMADDR_PORT_ANY);
+}
+
+int __hidden trace_vsock_get_port(int sd, unsigned int *port)
+{
+ struct sockaddr_vm addr;
+ socklen_t addr_len = sizeof(addr);
+
+ if (getsockname(sd, (struct sockaddr *)&addr, &addr_len))
+ return -errno;
+
+ if (addr.svm_family != AF_VSOCK)
+ return -EINVAL;
+
+ if (port)
+ *port = addr.svm_port;
+
+ return 0;
+}
+
+int get_vsocket_params(int fd, unsigned int *lcid, unsigned int *rcid)
+{
+ struct sockaddr_vm addr;
+ socklen_t addr_len = sizeof(addr);
+
+ memset(&addr, 0, sizeof(addr));
+ if (getsockname(fd, (struct sockaddr *)&addr, &addr_len))
+ return -1;
+ if (addr.svm_family != AF_VSOCK)
+ return -1;
+ *lcid = addr.svm_cid;
+
+ memset(&addr, 0, sizeof(addr));
+ addr_len = sizeof(addr);
+ if (getpeername(fd, (struct sockaddr *)&addr, &addr_len))
+ return -1;
+ if (addr.svm_family != AF_VSOCK)
+ return -1;
+ *rcid = addr.svm_cid;
+
+ return 0;
+}
+
+int trace_vsock_print_connection(int fd)
+{
+ struct sockaddr_vm vm_addr;
+ socklen_t addr_len;
+ int cid, port;
+
+ addr_len = sizeof(vm_addr);
+ if (getpeername(fd, (struct sockaddr *)&vm_addr, &addr_len))
+ return -1;
+ if (vm_addr.svm_family != AF_VSOCK)
+ return -1;
+ cid = vm_addr.svm_cid;
+ port = vm_addr.svm_port;
+ if (tracecmd_get_debug())
+ tracecmd_debug("Connected to @%u:%u fd:%d\n", cid, port, fd);
+ else
+ tracecmd_plog("Connected to @%u:%u\n", cid, port);
+ return 0;
+}
+
+static int try_splice_read_vsock(void)
+{
+ int ret, sd, brass[2];
+
+ sd = socket(AF_VSOCK, SOCK_STREAM, 0);
+ if (sd < 0)
+ return -errno;
+
+ ret = pipe(brass);
+ if (ret < 0)
+ goto out_close_sd;
+
+ /*
+ * On kernels that don't support splice reading from vsockets
+ * this will fail with EINVAL, or ENOTCONN otherwise.
+ * Technically, it should never succeed but if it does, claim splice
+ * reading is supported.
+ */
+ ret = splice(sd, NULL, brass[1], NULL, 10, 0);
+ if (ret < 0)
+ ret = errno != EINVAL;
+ else
+ ret = 1;
+
+ close(brass[0]);
+ close(brass[1]);
+out_close_sd:
+ close(sd);
+ return ret;
+}
+
+bool __hidden trace_vsock_can_splice_read(void)
+{
+ static bool initialized, res;
+
+ if (initialized)
+ return res;
+
+ res = try_splice_read_vsock() > 0;
+ initialized = true;
+ return res;
+}
+
+#define GET_LOCAL_CID 0x7b9
+
+int __hidden trace_vsock_local_cid(void)
+{
+ int cid;
+ int fd;
+
+ fd = open("/dev/vsock", O_RDONLY);
+ if (fd < 0)
+ return -errno;
+
+ if (ioctl(fd, GET_LOCAL_CID, &cid))
+ cid = -errno;
+
+ close(fd);
+ return cid;
+}
diff --git a/utest/Makefile b/utest/Makefile
new file mode 100644
index 00000000..2cf99745
--- /dev/null
+++ b/utest/Makefile
@@ -0,0 +1,41 @@
+# SPDX-License-Identifier: GPL-2.0
+
+include $(src)/scripts/utils.mk
+
+bdir:=$(obj)/utest
+
+TARGETS = $(bdir)/trace-utest
+
+OBJS =
+OBJS += trace-utest.o
+OBJS += tracefs-utest.o
+
+LIBS += -lcunit $(LIBTRACEEVENT_LDLAGS) $(LIBTRACEFS_LDLAGS)
+
+OBJS := $(OBJS:%.o=$(bdir)/%.o)
+DEPS := $(OBJS:$(bdir)/%.o=$(bdir)/.%.d)
+
+$(bdir):
+ @mkdir -p $(bdir)
+
+$(OBJS): | $(bdir)
+$(DEPS): | $(bdir)
+
+$(bdir)/trace-utest: $(OBJS)
+ $(Q)$(do_app_build)
+
+$(bdir)/%.o: %.c
+ $(Q)$(call do_fpic_compile)
+
+$(DEPS): $(bdir)/.%.d: %.c
+ $(Q)$(CC) -M $(CPPFLAGS) $(CFLAGS) $< > $@
+ $(Q)$(CC) -M -MT $(bdir)/$*.o $(CPPFLAGS) $(CFLAGS) $< > $@
+
+$(OBJS): $(bdir)/%.o : $(bdir)/.%.d
+
+dep_includes := $(wildcard $(DEPS))
+
+test: $(TARGETS)
+
+clean:
+ $(RM) $(TARGETS) $(bdir)/*.o $(bdir)/.*.d
diff --git a/utest/README b/utest/README
new file mode 100644
index 00000000..f93630d0
--- /dev/null
+++ b/utest/README
@@ -0,0 +1,15 @@
+
+Unit tests for trace-cmd libraries. The tests use CUnit framework:
+ http://cunit.sourceforge.net/
+which must be pre installed on the system, before building the unit tests.
+The framework can be downloaded, compiled and installed manually, or
+using a precompiled distro package:
+
+ Fedora:
+ CUnit
+ CUnit-devel
+
+ Ubuntu and Debian:
+ libcunit1
+ libcunit1-doc
+ libcunit1-dev
diff --git a/utest/trace-utest.c b/utest/trace-utest.c
new file mode 100644
index 00000000..58d4d4e4
--- /dev/null
+++ b/utest/trace-utest.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2020, VMware, Tzvetomir Stoyanov <tz.stoyanov@gmail.com>
+ *
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <stdlib.h>
+
+#include <CUnit/CUnit.h>
+#include <CUnit/Basic.h>
+
+#include "trace-utest.h"
+
+enum unit_tests {
+ RUN_NONE = 0,
+ RUN_TRACEFS = (1 << 0),
+ RUN_ALL = 0xFFFF
+};
+
+static void print_help(char **argv)
+{
+ printf("Usage: %s [OPTIONS]\n", basename(argv[0]));
+ printf("\t-s, --silent\tPrint test summary\n");
+ printf("\t-r, --run test\tRun specific test:\n");
+ printf("\t\t tracefs run libtracefs tests\n");
+ printf("\t-h, --help\tPrint usage information\n");
+ exit(0);
+}
+
+int main(int argc, char **argv)
+{
+ CU_BasicRunMode verbose = CU_BRM_VERBOSE;
+ enum unit_tests tests = RUN_NONE;
+
+ for (;;) {
+ int c;
+ int index = 0;
+ const char *opts = "+hsr:";
+ static struct option long_options[] = {
+ {"silent", no_argument, NULL, 's'},
+ {"run", required_argument, NULL, 'r'},
+ {"help", no_argument, NULL, 'h'},
+ {NULL, 0, NULL, 0}
+ };
+
+ c = getopt_long (argc, argv, opts, long_options, &index);
+ if (c == -1)
+ break;
+ switch (c) {
+ case 'r':
+ if (strcmp(optarg, "tracefs") == 0)
+ tests |= RUN_TRACEFS;
+ else
+ print_help(argv);
+ break;
+ case 's':
+ verbose = CU_BRM_SILENT;
+ break;
+ case 'h':
+ default:
+ print_help(argv);
+ break;
+ }
+ }
+
+ if (tests == RUN_NONE)
+ tests = RUN_ALL;
+
+ if (CU_initialize_registry() != CUE_SUCCESS) {
+ printf("Test registry cannot be initialized\n");
+ return -1;
+ }
+
+ if (tests & RUN_TRACEFS)
+ test_tracefs_lib();
+
+ CU_basic_set_mode(verbose);
+ CU_basic_run_tests();
+ CU_cleanup_registry();
+ return 0;
+}
diff --git a/utest/trace-utest.h b/utest/trace-utest.h
new file mode 100644
index 00000000..917c0e78
--- /dev/null
+++ b/utest/trace-utest.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+/*
+ * Copyright (C) 2020, VMware, Tzvetomir Stoyanov <tz.stoyanov@gmail.com>
+ *
+ */
+#ifndef _TRACE_UTEST_H_
+#define _TRACE_UTEST_H_
+
+void test_tracefs_lib(void);
+
+#endif /* _TRACE_UTEST_H_ */
diff --git a/utest/tracefs-utest.c b/utest/tracefs-utest.c
new file mode 100644
index 00000000..9c9eee06
--- /dev/null
+++ b/utest/tracefs-utest.c
@@ -0,0 +1,630 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2020, VMware, Tzvetomir Stoyanov <tz.stoyanov@gmail.com>
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <dirent.h>
+
+#include <CUnit/CUnit.h>
+#include <CUnit/Basic.h>
+
+#include "tracefs.h"
+
+#define TRACEFS_SUITE "trasefs library"
+#define TEST_INSTANCE_NAME "cunit_test_iter"
+#define TEST_ARRAY_SIZE 500
+
+static struct tracefs_instance *test_instance;
+static struct tep_handle *test_tep;
+struct test_sample {
+ int cpu;
+ int value;
+};
+static struct test_sample test_array[TEST_ARRAY_SIZE];
+static int test_found;
+
+static int test_callback(struct tep_event *event, struct tep_record *record,
+ int cpu, void *context)
+{
+ struct tep_format_field *field;
+ struct test_sample *sample;
+ int *cpu_test = (int *)context;
+ int i;
+
+ if (cpu_test && *cpu_test >= 0 && *cpu_test != cpu)
+ return 0;
+ field = tep_find_field(event, "buf");
+ if (field) {
+ sample = ((struct test_sample *)(record->data + field->offset));
+ for (i = 0; i < TEST_ARRAY_SIZE; i++) {
+ if (test_array[i].value == sample->value &&
+ test_array[i].cpu == cpu) {
+ test_array[i].value = 0;
+ test_found++;
+ break;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static void test_iter_write(void)
+{
+ int cpus = sysconf(_SC_NPROCESSORS_CONF);
+ cpu_set_t *cpuset, *cpusave;
+ int cpu_size;
+ char *path;
+ int i, fd;
+ int ret;
+ cpuset = CPU_ALLOC(cpus);
+ cpusave = CPU_ALLOC(cpus);
+ cpu_size = CPU_ALLOC_SIZE(cpus);
+ CPU_ZERO_S(cpu_size, cpuset);
+
+ sched_getaffinity(0, cpu_size, cpusave);
+
+ path = tracefs_instance_get_file(test_instance, "trace_marker");
+ CU_TEST(path != NULL);
+ fd = open(path, O_WRONLY);
+ tracefs_put_tracing_file(path);
+ CU_TEST(fd >= 0);
+
+ for (i = 0; i < TEST_ARRAY_SIZE; i++) {
+ test_array[i].cpu = rand() % cpus;
+ test_array[i].value = random();
+ if (!test_array[i].value)
+ test_array[i].value++;
+ CU_TEST(test_array[i].cpu < cpus);
+ CPU_ZERO_S(cpu_size, cpuset);
+ CPU_SET(test_array[i].cpu, cpuset);
+ sched_setaffinity(0, cpu_size, cpuset);
+ ret = write(fd, test_array + i, sizeof(struct test_sample));
+ CU_TEST(ret == sizeof(struct test_sample));
+ }
+
+ sched_setaffinity(0, cpu_size, cpusave);
+ close(fd);
+}
+
+
+static void iter_raw_events_on_cpu(int cpu)
+{
+ int check = 0;
+ int ret;
+ int i;
+
+ test_found = 0;
+ test_iter_write();
+ ret = tracefs_iterate_raw_events(test_tep, test_instance, NULL, 0,
+ test_callback, &cpu);
+ CU_TEST(ret == 0);
+ if (cpu < 0) {
+ CU_TEST(test_found == TEST_ARRAY_SIZE);
+ } else {
+ for (i = 0; i < TEST_ARRAY_SIZE; i++) {
+ if (test_array[i].cpu == cpu) {
+ check++;
+ CU_TEST(test_array[i].value == 0)
+ } else {
+ CU_TEST(test_array[i].value != 0)
+ }
+ }
+ CU_TEST(test_found == check);
+ }
+}
+
+static void test_iter_raw_events(void)
+{
+ int cpus = sysconf(_SC_NPROCESSORS_CONF);
+ int ret;
+ int i;
+
+ ret = tracefs_iterate_raw_events(NULL, test_instance, NULL, 0, test_callback, NULL);
+ CU_TEST(ret < 0);
+ ret = tracefs_iterate_raw_events(test_tep, NULL, NULL, 0, test_callback, NULL);
+ CU_TEST(ret == 0);
+ ret = tracefs_iterate_raw_events(test_tep, test_instance, NULL, 0, NULL, NULL);
+ CU_TEST(ret < 0);
+
+ iter_raw_events_on_cpu(-1);
+ for (i = 0; i < cpus; i++)
+ iter_raw_events_on_cpu(i);
+}
+
+#define RAND_STR_SIZE 20
+#define RAND_ASCII "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
+static const char *get_rand_str()
+{
+ static char str[RAND_STR_SIZE];
+ static char sym[] = RAND_ASCII;
+ struct timespec clk;
+ int i;
+
+ clock_gettime(CLOCK_REALTIME, &clk);
+ srand(clk.tv_nsec);
+ for (i = 0; i < RAND_STR_SIZE; i++)
+ str[i] = sym[rand() % (sizeof(sym) - 1)];
+
+ str[RAND_STR_SIZE - 1] = 0;
+ return str;
+}
+
+static void test_trace_file(void)
+{
+ const char *tmp = get_rand_str();
+ const char *tdir;
+ struct stat st;
+ char *file;
+
+ tdir = tracefs_tracing_dir();
+ CU_TEST(tdir != NULL);
+ CU_TEST(stat(tdir, &st) == 0);
+ CU_TEST(S_ISDIR(st.st_mode));
+
+ file = tracefs_get_tracing_file(NULL);
+ CU_TEST(file == NULL);
+ file = tracefs_get_tracing_file(tmp);
+ CU_TEST(file != NULL);
+ CU_TEST(stat(file, &st) != 0);
+ tracefs_put_tracing_file(file);
+
+ file = tracefs_get_tracing_file("trace");
+ CU_TEST(file != NULL);
+ CU_TEST(stat(file, &st) == 0);
+ tracefs_put_tracing_file(file);
+}
+
+static void test_instance_file_read(struct tracefs_instance *inst, char *fname)
+{
+ const char *tdir = tracefs_tracing_dir();
+ char buf[BUFSIZ];
+ char *fpath;
+ char *file;
+ size_t fsize = 0;
+ int size = 0;
+ int fd;
+
+ if (inst) {
+ CU_TEST(asprintf(&fpath, "%s/instances/%s/%s",
+ tdir, tracefs_instance_get_name(inst), fname) > 0);
+ } else {
+ CU_TEST(asprintf(&fpath, "%s/%s", tdir, fname) > 0);
+ }
+
+ memset(buf, 0, BUFSIZ);
+ fd = open(fpath, O_RDONLY);
+ CU_TEST(fd >= 0);
+ fsize = read(fd, buf, BUFSIZ);
+ CU_TEST(fsize >= 0);
+ close(fd);
+ buf[BUFSIZ - 1] = 0;
+
+ file = tracefs_instance_file_read(inst, fname, &size);
+ CU_TEST(file != NULL);
+ CU_TEST(size == fsize);
+ CU_TEST(strcmp(file, buf) == 0);
+
+ free(fpath);
+ free(file);
+}
+
+#define ALL_TRACERS "available_tracers"
+#define CUR_TRACER "current_tracer"
+#define PER_CPU "per_cpu"
+static void test_instance_file(void)
+{
+ struct tracefs_instance *instance = NULL;
+ struct tracefs_instance *second = NULL;
+ const char *name = get_rand_str();
+ const char *inst_name = NULL;
+ const char *tdir;
+ char *inst_file;
+ char *inst_dir;
+ struct stat st;
+ char *fname;
+ char *file1;
+ char *file2;
+ char *tracer;
+ int size;
+ int ret;
+
+ tdir = tracefs_tracing_dir();
+ CU_TEST(tdir != NULL);
+ CU_TEST(asprintf(&inst_dir, "%s/instances/%s", tdir, name) > 0);
+ CU_TEST(stat(inst_dir, &st) != 0);
+
+ CU_TEST(tracefs_instance_exists(name) == false);
+ instance = tracefs_instance_create(name);
+ CU_TEST(instance != NULL);
+ CU_TEST(tracefs_instance_is_new(instance));
+ second = tracefs_instance_create(name);
+ CU_TEST(second != NULL);
+ CU_TEST(!tracefs_instance_is_new(second));
+ tracefs_instance_free(second);
+ CU_TEST(tracefs_instance_exists(name) == true);
+ CU_TEST(stat(inst_dir, &st) == 0);
+ CU_TEST(S_ISDIR(st.st_mode));
+ inst_name = tracefs_instance_get_name(instance);
+ CU_TEST(inst_name != NULL);
+ CU_TEST(strcmp(inst_name, name) == 0);
+
+ fname = tracefs_instance_get_dir(NULL);
+ CU_TEST(fname != NULL);
+ CU_TEST(strcmp(fname, tdir) == 0);
+ free(fname);
+
+ fname = tracefs_instance_get_dir(instance);
+ CU_TEST(fname != NULL);
+ CU_TEST(strcmp(fname, inst_dir) == 0);
+ free(fname);
+
+ CU_TEST(asprintf(&fname, "%s/"ALL_TRACERS, tdir) > 0);
+ CU_TEST(fname != NULL);
+ inst_file = tracefs_instance_get_file(NULL, ALL_TRACERS);
+ CU_TEST(inst_file != NULL);
+ CU_TEST(strcmp(fname, inst_file) == 0);
+ tracefs_put_tracing_file(inst_file);
+ free(fname);
+
+ CU_TEST(asprintf(&fname, "%s/instances/%s/"ALL_TRACERS, tdir, name) > 0);
+ CU_TEST(fname != NULL);
+ CU_TEST(stat(fname, &st) == 0);
+ inst_file = tracefs_instance_get_file(instance, ALL_TRACERS);
+ CU_TEST(inst_file != NULL);
+ CU_TEST(strcmp(fname, inst_file) == 0);
+
+ test_instance_file_read(NULL, ALL_TRACERS);
+ test_instance_file_read(instance, ALL_TRACERS);
+
+ file1 = tracefs_instance_file_read(instance, ALL_TRACERS, NULL);
+ CU_TEST(file1 != NULL);
+ tracer = strtok(file1, " ");
+ CU_TEST(tracer != NULL);
+ ret = tracefs_instance_file_write(instance, CUR_TRACER, tracer);
+ CU_TEST(ret == strlen(tracer));
+ file2 = tracefs_instance_file_read(instance, CUR_TRACER, &size);
+ CU_TEST(file2 != NULL);
+ CU_TEST(size >= strlen(tracer));
+ CU_TEST(strncmp(file2, tracer, strlen(tracer)) == 0);
+ free(file1);
+ free(file2);
+
+ tracefs_put_tracing_file(inst_file);
+ free(fname);
+
+ CU_TEST(tracefs_file_exists(NULL, (char *)name) == false);
+ CU_TEST(tracefs_dir_exists(NULL, (char *)name) == false);
+ CU_TEST(tracefs_file_exists(instance, (char *)name) == false);
+ CU_TEST(tracefs_dir_exists(instance, (char *)name) == false);
+
+ CU_TEST(tracefs_file_exists(NULL, CUR_TRACER) == true);
+ CU_TEST(tracefs_dir_exists(NULL, CUR_TRACER) == false);
+ CU_TEST(tracefs_file_exists(instance, CUR_TRACER) == true);
+ CU_TEST(tracefs_dir_exists(instance, CUR_TRACER) == false);
+
+ CU_TEST(tracefs_file_exists(NULL, PER_CPU) == false);
+ CU_TEST(tracefs_dir_exists(NULL, PER_CPU) == true);
+ CU_TEST(tracefs_file_exists(instance, PER_CPU) == false);
+ CU_TEST(tracefs_dir_exists(instance, PER_CPU) == true);
+
+ CU_TEST(tracefs_instance_destroy(NULL) != 0);
+ CU_TEST(tracefs_instance_destroy(instance) == 0);
+ CU_TEST(tracefs_instance_destroy(instance) != 0);
+ tracefs_instance_free(instance);
+ CU_TEST(stat(inst_dir, &st) != 0);
+ free(inst_dir);
+}
+
+static void exclude_string(char **strings, char *name)
+{
+ int i;
+
+ for (i = 0; strings[i]; i++) {
+ if (strcmp(strings[i], name) == 0) {
+ free(strings[i]);
+ strings[i] = strdup("/");
+ return;
+ }
+ }
+}
+
+static void test_check_files(const char *fdir, char **files)
+{
+ struct dirent *dent;
+ DIR *dir;
+ int i;
+
+ dir = opendir(fdir);
+ CU_TEST(dir != NULL);
+
+ while ((dent = readdir(dir)))
+ exclude_string(files, dent->d_name);
+
+ closedir(dir);
+
+ for (i = 0; files[i]; i++)
+ CU_TEST(files[i][0] == '/');
+}
+
+static void test_system_event(void)
+{
+ const char *tdir;
+ char **systems;
+ char **events;
+ char *sdir = NULL;
+
+ tdir = tracefs_tracing_dir();
+ CU_TEST(tdir != NULL);
+
+ systems = tracefs_event_systems(tdir);
+ CU_TEST(systems != NULL);
+
+ events = tracefs_system_events(tdir, systems[0]);
+ CU_TEST(events != NULL);
+
+ asprintf(&sdir, "%s/events/%s", tdir, systems[0]);
+ CU_TEST(sdir != NULL);
+ test_check_files(sdir, events);
+ free(sdir);
+ sdir = NULL;
+
+ asprintf(&sdir, "%s/events", tdir);
+ CU_TEST(sdir != NULL);
+ test_check_files(sdir, systems);
+
+ tracefs_list_free(systems);
+ tracefs_list_free(events);
+
+ free(sdir);
+}
+
+static void test_tracers(void)
+{
+ const char *tdir;
+ char **tracers;
+ char *tfile;
+ char *tracer;
+ int i;
+
+ tdir = tracefs_tracing_dir();
+ CU_TEST(tdir != NULL);
+
+ tracers = tracefs_tracers(tdir);
+ CU_TEST(tracers != NULL);
+
+ tfile = tracefs_instance_file_read(NULL, ALL_TRACERS, NULL);
+
+ tracer = strtok(tfile, " ");
+ while (tracer) {
+ exclude_string(tracers, tracer);
+ tracer = strtok(NULL, " ");
+ }
+
+ for (i = 0; tracers[i]; i++)
+ CU_TEST(tracers[i][0] == '/');
+
+ tracefs_list_free(tracers);
+ free(tfile);
+}
+
+static void test_check_events(struct tep_handle *tep, char *system, bool exist)
+{
+ struct dirent *dent;
+ char file[PATH_MAX];
+ char buf[1024];
+ char *edir = NULL;
+ const char *tdir;
+ DIR *dir;
+ int fd;
+
+ tdir = tracefs_tracing_dir();
+ CU_TEST(tdir != NULL);
+
+ asprintf(&edir, "%s/events/%s", tdir, system);
+ dir = opendir(edir);
+ CU_TEST(dir != NULL);
+
+ while ((dent = readdir(dir))) {
+ if (dent->d_name[0] == '.')
+ continue;
+ sprintf(file, "%s/%s/id", edir, dent->d_name);
+ fd = open(file, O_RDONLY);
+ if (fd < 0)
+ continue;
+ CU_TEST(read(fd, buf, 1024) > 0);
+ if (exist) {
+ CU_TEST(tep_find_event(tep, atoi(buf)) != NULL);
+ } else {
+ CU_TEST(tep_find_event(tep, atoi(buf)) == NULL);
+ }
+
+ close(fd);
+ }
+
+ closedir(dir);
+ free(edir);
+
+}
+
+static void test_local_events(void)
+{
+ struct tep_handle *tep;
+ const char *tdir;
+ char **systems;
+ char *lsystems[3];
+ int i;
+
+ tdir = tracefs_tracing_dir();
+ CU_TEST(tdir != NULL);
+
+ tep = tracefs_local_events(tdir);
+ CU_TEST(tep != NULL);
+
+ systems = tracefs_event_systems(tdir);
+ CU_TEST(systems != NULL);
+
+ for (i = 0; systems[i]; i++)
+ test_check_events(tep, systems[i], true);
+ tep_free(tep);
+
+ memset(lsystems, 0, sizeof(lsystems));
+ for (i = 0; systems[i]; i++) {
+ if (!lsystems[0])
+ lsystems[0] = systems[i];
+ else if (!lsystems[2])
+ lsystems[2] = systems[i];
+ else
+ break;
+ }
+
+ if (lsystems[0] && lsystems[2]) {
+ tep = tracefs_local_events_system(tdir,
+ (const char * const *)lsystems);
+ CU_TEST(tep != NULL);
+ test_check_events(tep, lsystems[0], true);
+ test_check_events(tep, lsystems[2], false);
+ }
+ tep_free(tep);
+
+ tep = tep_alloc();
+ CU_TEST(tep != NULL);
+ CU_TEST(tracefs_fill_local_events(tdir, tep, NULL) == 0);
+ for (i = 0; systems[i]; i++)
+ test_check_events(tep, systems[i], true);
+
+ tep_free(tep);
+
+ tracefs_list_free(systems);
+}
+
+struct test_walk_instance {
+ struct tracefs_instance *instance;
+ bool found;
+};
+#define WALK_COUNT 10
+int test_instances_walk_cb(const char *name, void *data)
+{
+ struct test_walk_instance *instances = (struct test_walk_instance *)data;
+ int i;
+
+ CU_TEST(instances != NULL);
+ CU_TEST(name != NULL);
+
+ for (i = 0; i < WALK_COUNT; i++) {
+ if (!strcmp(name,
+ tracefs_instance_get_name(instances[i].instance))) {
+ instances[i].found = true;
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static void test_instances_walk(void)
+{
+ struct test_walk_instance instances[WALK_COUNT];
+ int i;
+
+ memset(instances, 0, WALK_COUNT * sizeof(struct test_walk_instance));
+ for (i = 0; i < WALK_COUNT; i++) {
+ instances[i].instance = tracefs_instance_create(get_rand_str());
+ CU_TEST(instances[i].instance != NULL);
+ }
+
+ CU_TEST(tracefs_instances_walk(test_instances_walk_cb, instances) == 0);
+ for (i = 0; i < WALK_COUNT; i++) {
+ CU_TEST(instances[i].found);
+ tracefs_instance_destroy(instances[i].instance);
+ instances[i].found = false;
+ }
+
+ CU_TEST(tracefs_instances_walk(test_instances_walk_cb, instances) == 0);
+ for (i = 0; i < WALK_COUNT; i++) {
+ CU_TEST(!instances[i].found);
+ tracefs_instance_free(instances[i].instance);
+ }
+}
+
+static void current_clock_check(const char *clock)
+{
+ int size = 0;
+ char *clocks;
+ char *str;
+
+ clocks = tracefs_instance_file_read(test_instance, "trace_clock", &size);
+ CU_TEST(clocks != NULL);
+ CU_TEST(size > strlen(clock));
+ str = strstr(clocks, clock);
+ CU_TEST(str != NULL);
+ CU_TEST(str != clocks);
+ CU_TEST(*(str - 1) == '[');
+ CU_TEST(*(str + strlen(clock)) == ']');
+ free(clocks);
+}
+
+static void test_get_clock(void)
+{
+ const char *clock;
+
+ clock = tracefs_get_clock(test_instance);
+ CU_TEST(clock != NULL);
+ current_clock_check(clock);
+ free((char *)clock);
+}
+
+static int test_suite_destroy(void)
+{
+ tracefs_instance_destroy(test_instance);
+ tracefs_instance_free(test_instance);
+ tep_free(test_tep);
+ return 0;
+}
+
+static int test_suite_init(void)
+{
+ const char *systems[] = {"ftrace", NULL};
+
+ test_tep = tracefs_local_events_system(NULL, systems);
+ if (test_tep == NULL)
+ return 1;
+ test_instance = tracefs_instance_create(TEST_INSTANCE_NAME);
+ if (!test_instance)
+ return 1;
+
+ return 0;
+}
+
+void test_tracefs_lib(void)
+{
+ CU_pSuite suite = NULL;
+
+ suite = CU_add_suite(TRACEFS_SUITE, test_suite_init, test_suite_destroy);
+ if (suite == NULL) {
+ fprintf(stderr, "Suite \"%s\" cannot be ceated\n", TRACEFS_SUITE);
+ return;
+ }
+ CU_add_test(suite, "tracing file / directory APIs",
+ test_trace_file);
+ CU_add_test(suite, "instance file / directory APIs",
+ test_instance_file);
+ CU_add_test(suite, "systems and events APIs",
+ test_system_event);
+ CU_add_test(suite, "tracefs_iterate_raw_events API",
+ test_iter_raw_events);
+ CU_add_test(suite, "tracefs_tracers API",
+ test_tracers);
+ CU_add_test(suite, "tracefs_local events API",
+ test_local_events);
+ CU_add_test(suite, "tracefs_instances_walk API",
+ test_instances_walk);
+ CU_add_test(suite, "tracefs_get_clock API",
+ test_get_clock);
+}